From cb86359645d77688676f6e7d3806a3d052b51220 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 25 Jun 2025 14:39:58 -0700 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?= =?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 [skip ci] --- bolt/include/bolt/Core/BinarySection.h | 5 - bolt/lib/Passes/PAuthGadgetScanner.cpp | 133 ++- .../AArch64/gs-pacret-autiasp.s | 31 +- .../AArch64/gs-pauth-authentication-oracles.s | 20 - .../binary-analysis/AArch64/gs-pauth-calls.s | 84 ++ .../AArch64/gs-pauth-debug-output.s | 32 +- .../AArch64/gs-pauth-signing-oracles.s | 27 - .../bugprone/SizeofExpressionCheck.cpp | 38 +- .../bugprone/SizeofExpressionCheck.h | 1 + clang-tools-extra/docs/ReleaseNotes.rst | 5 + .../checks/bugprone/sizeof-expression.rst | 9 + .../checkers/bugprone/sizeof-expression.cpp | 63 ++ .../bindings/python/tests/cindex/test_file.py | 1 + clang/docs/ReleaseNotes.rst | 5 +- clang/include/clang-c/Index.h | 125 ++- .../Analysis/FlowSensitive/StorageLocation.h | 2 - clang/include/clang/Basic/BuiltinsAArch64.def | 7 + clang/include/clang/Basic/BuiltinsAMDGPU.def | 3 + clang/include/clang/Basic/BuiltinsPPC.def | 6 + .../clang/Basic/DiagnosticSemaKinds.td | 14 +- clang/include/clang/CIR/Dialect/IR/CIROps.td | 88 +- clang/include/clang/CIR/MissingFeatures.h | 154 +-- clang/include/clang/Lex/Preprocessor.h | 6 +- clang/include/clang/Lex/Token.h | 9 +- clang/include/clang/Serialization/ASTReader.h | 20 +- .../clang/Serialization/ASTRecordReader.h | 11 +- .../clang/Serialization/ASTRecordWriter.h | 11 +- clang/include/clang/Serialization/ASTWriter.h | 10 +- .../Serialization/SourceLocationEncoding.h | 120 +-- .../Core/PathSensitive/CallEvent.h | 2 + clang/lib/Basic/Targets/PPC.cpp | 6 + clang/lib/CIR/CodeGen/CIRGenBuilder.h | 10 + clang/lib/CIR/CodeGen/CIRGenCXX.cpp | 2 +- clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 53 +- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 82 +- clang/lib/CIR/CodeGen/CIRGenModule.h | 12 +- clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 139 ++- .../Dialect/Transforms/CIRCanonicalize.cpp | 5 +- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 49 +- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 20 + clang/lib/CodeGen/CGBuiltin.cpp | 2 +- clang/lib/CodeGen/CGDebugInfo.cpp | 4 +- clang/lib/CodeGen/CGVTables.cpp | 4 +- clang/lib/CodeGen/CodeGenFunction.h | 3 + clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 32 +- clang/lib/CodeGen/Targets/SPIR.cpp | 4 + clang/lib/Driver/Driver.cpp | 123 ++- clang/lib/Format/FormatTokenLexer.cpp | 30 +- clang/lib/Format/QualifierAlignmentFixer.cpp | 25 +- clang/lib/Format/TokenAnnotator.cpp | 5 +- clang/lib/Headers/intrin.h | 10 +- clang/lib/Lex/PPDirectives.cpp | 4 +- clang/lib/Lex/Preprocessor.cpp | 4 +- clang/lib/Sema/SemaPPC.cpp | 4 + clang/lib/Sema/SemaTypeTraits.cpp | 271 +++++- clang/lib/Serialization/ASTReader.cpp | 37 +- clang/lib/Serialization/ASTWriter.cpp | 40 +- clang/lib/Serialization/ASTWriterStmt.cpp | 2 +- .../Serialization/TemplateArgumentHasher.cpp | 2 + clang/lib/StaticAnalyzer/Core/CallEvent.cpp | 12 + .../Analysis/lambda-convert-to-func-ptr.cpp | 21 + clang/test/CIR/CodeGen/align-load.c | 6 +- clang/test/CIR/CodeGen/align-store.c | 2 +- clang/test/CIR/CodeGen/array.cpp | 26 +- clang/test/CIR/CodeGen/basic.c | 48 +- clang/test/CIR/CodeGen/basic.cpp | 18 +- clang/test/CIR/CodeGen/binassign.c | 2 +- clang/test/CIR/CodeGen/binop.cpp | 56 +- clang/test/CIR/CodeGen/builtin_call.cpp | 24 +- clang/test/CIR/CodeGen/builtin_printf.cpp | 8 +- clang/test/CIR/CodeGen/call.c | 36 +- clang/test/CIR/CodeGen/call.cpp | 32 +- clang/test/CIR/CodeGen/cast.cpp | 14 +- clang/test/CIR/CodeGen/class.cpp | 6 +- clang/test/CIR/CodeGen/cmp.cpp | 30 +- clang/test/CIR/CodeGen/comma.c | 2 +- clang/test/CIR/CodeGen/complex.cpp | 113 +++ clang/test/CIR/CodeGen/compound_assign.cpp | 2 +- clang/test/CIR/CodeGen/ctor.cpp | 38 +- clang/test/CIR/CodeGen/dso-local.c | 32 +- clang/test/CIR/CodeGen/forrange.cpp | 10 +- clang/test/CIR/CodeGen/if.cpp | 24 +- clang/test/CIR/CodeGen/inline-cxx-func.cpp | 4 +- clang/test/CIR/CodeGen/int-to-bool.cpp | 16 +- clang/test/CIR/CodeGen/linkage-spec.cpp | 28 +- clang/test/CIR/CodeGen/local-vars.cpp | 2 +- clang/test/CIR/CodeGen/loop.cpp | 40 +- clang/test/CIR/CodeGen/member-functions.cpp | 6 +- clang/test/CIR/CodeGen/namespace.cpp | 14 +- clang/test/CIR/CodeGen/nullptr-init.cpp | 2 +- clang/test/CIR/CodeGen/string-literals.c | 16 +- clang/test/CIR/CodeGen/struct.c | 20 +- clang/test/CIR/CodeGen/struct.cpp | 10 +- clang/test/CIR/CodeGen/switch.cpp | 94 +- clang/test/CIR/CodeGen/switch_flat_op.cpp | 4 +- clang/test/CIR/CodeGen/ternary.cpp | 12 +- clang/test/CIR/CodeGen/typedef.c | 4 +- clang/test/CIR/CodeGen/unary.cpp | 74 +- clang/test/CIR/CodeGen/union.c | 28 +- clang/test/CIR/CodeGen/vector-ext.cpp | 15 + clang/test/CIR/CodeGen/vector.cpp | 15 + clang/test/CIR/CodeGenOpenACC/combined-copy.c | 6 +- clang/test/CIR/CodeGenOpenACC/combined.cpp | 4 +- clang/test/CIR/CodeGenOpenACC/compute-copy.c | 4 +- clang/test/CIR/CodeGenOpenACC/data.c | 2 +- clang/test/CIR/CodeGenOpenACC/host_data.c | 2 +- clang/test/CIR/CodeGenOpenACC/init.c | 2 +- clang/test/CIR/CodeGenOpenACC/kernels.c | 4 +- clang/test/CIR/CodeGenOpenACC/loop.cpp | 2 +- clang/test/CIR/CodeGenOpenACC/parallel.c | 4 +- clang/test/CIR/CodeGenOpenACC/serial.c | 4 +- clang/test/CIR/CodeGenOpenACC/set.c | 2 +- clang/test/CIR/CodeGenOpenACC/shutdown.c | 2 +- clang/test/CIR/CodeGenOpenACC/wait.c | 2 +- clang/test/CIR/IR/array.cir | 6 +- clang/test/CIR/IR/binassign.cir | 2 +- clang/test/CIR/IR/call.cir | 14 +- clang/test/CIR/IR/cast.cir | 4 +- clang/test/CIR/IR/cmp.cir | 10 +- clang/test/CIR/IR/func.cir | 14 +- clang/test/CIR/IR/invalid-call.cir | 12 +- clang/test/CIR/IR/invalid-complex.cir | 24 + clang/test/CIR/IR/ternary.cir | 2 +- clang/test/CIR/IR/unary.cir | 4 +- clang/test/CIR/IR/vector.cir | 16 +- clang/test/CIR/Lowering/array.cpp | 18 +- clang/test/CIR/Transforms/canonicalize.cir | 12 +- .../CIR/Transforms/complex-create-fold.cir | 2 +- .../test/CIR/Transforms/complex-imag-fold.cir | 23 + .../test/CIR/Transforms/complex-real-fold.cir | 23 + clang/test/CIR/Transforms/hoist-allocas.cir | 6 +- clang/test/CIR/Transforms/if.cir | 4 +- clang/test/CIR/Transforms/loop.cir | 6 +- clang/test/CIR/Transforms/scope.cir | 6 +- clang/test/CIR/Transforms/select.cir | 10 +- clang/test/CIR/Transforms/switch.cir | 20 +- clang/test/CIR/Transforms/ternary-fold.cir | 8 +- clang/test/CIR/Transforms/ternary.cir | 4 +- clang/test/CIR/Transforms/vector-cmp-fold.cir | 24 +- .../CIR/Transforms/vector-create-fold.cir | 2 +- .../vector-shuffle-dynamic-fold.cir | 4 +- .../CIR/Transforms/vector-shuffle-fold.cir | 6 +- .../CIR/Transforms/vector-ternary-fold.cir | 2 +- clang/test/CIR/func-linkage.cpp | 51 + clang/test/CIR/func-simple.cpp | 20 +- clang/test/CIR/mlprint.c | 2 +- .../CodeGen/PowerPC/builtins-bcd-transform.c | 79 ++ .../CodeGen/X86/ms-secure-hotpatch-bad-file.c | 2 +- .../CodeGen/X86/ms-secure-hotpatch-cpp.cpp | 2 +- .../CodeGen/X86/ms-secure-hotpatch-eh.cpp | 2 +- .../CodeGen/X86/ms-secure-hotpatch-globals.c | 2 +- .../test/CodeGen/X86/ms-secure-hotpatch-lto.c | 2 +- clang/test/CodeGen/X86/ms-secure-hotpatch.c | 2 +- .../test/CodeGen/arm64-microsoft-intrinsics.c | 60 ++ clang/test/CodeGen/builtins-overflow.c | 12 + clang/test/CodeGen/logb_scalbn.c | 873 ++++++++++++++++++ clang/test/CodeGen/pragma-comment.c | 2 +- clang/test/CodeGenCUDA/bf16.cu | 6 +- .../CodeGenOpenCL/builtins-amdgcn-gfx1250.cl | 44 + clang/test/Index/inline-assembly.c | 46 + clang/test/Modules/pr118137.cppm | 24 + clang/test/Modules/template-declare.cppm | 39 + clang/test/OpenMP/declare_mapper_messages.c | 63 +- clang/test/OpenMP/declare_target_messages.cpp | 106 ++- .../declare_variant_clauses_messages.cpp | 2 +- clang/test/OpenMP/target_data_ast_print.cpp | 8 +- clang/test/OpenMP/target_map_messages.cpp | 59 +- clang/test/Sema/builtins-bcd-transform.c | 30 + .../SemaCXX/cxx2c-trivially-relocatable.cpp | 36 + .../type-traits-unsatisfied-diags-std.cpp | 158 ++++ .../SemaCXX/type-traits-unsatisfied-diags.cpp | 279 ++++++ clang/tools/c-index-test/c-index-test.c | 50 + clang/tools/libclang/CIndex.cpp | 94 ++ clang/tools/libclang/libclang.map | 9 + clang/unittests/Format/FormatTest.cpp | 15 + clang/unittests/Format/QualifierFixerTest.cpp | 7 +- .../SourceLocationEncodingTest.cpp | 58 -- compiler-rt/cmake/Modules/AddCompilerRT.cmake | 18 + compiler-rt/lib/asan/CMakeLists.txt | 13 + .../lib/asan/asan.link_with_main_exec.txt | 115 +++ .../lib/asan/asan_cxx.link_with_main_exec.txt | 21 + compiler-rt/lib/asan/asan_descriptions.cpp | 22 +- compiler-rt/test/tsan/java_heap_init2.cpp | 34 + compiler-rt/test/tsan/munmap_clear_shadow.c | 59 ++ flang/docs/ReleaseNotes.md | 5 + .../flang/Optimizer/Dialect/FIRTypes.td | 3 +- flang/include/flang/Support/OpenMP-features.h | 14 +- flang/lib/Frontend/CompilerInvocation.cpp | 42 +- flang/lib/Lower/Bridge.cpp | 2 +- flang/lib/Lower/OpenMP/OpenMP.cpp | 2 +- flang/lib/Optimizer/Dialect/FIRType.cpp | 6 +- .../HLFIR/Transforms/ConvertToFIR.cpp | 5 +- .../test/Driver/bbc-openmp-version-macro.f90 | 11 +- .../Driver/flang-openmp-version-macro.f90 | 8 - flang/test/Driver/fopenmp-version.F90 | 25 + flang/test/Fir/convert-to-llvm.fir | 4 +- flang/test/HLFIR/declare-codegen.fir | 18 + .../OpenMP/target-data-skip-mapper-calls.f90 | 30 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/include/wchar.yaml | 7 + .../__support/wchar/character_converter.cpp | 18 +- .../src/__support/wchar/character_converter.h | 1 + libc/src/wchar/CMakeLists.txt | 13 + libc/src/wchar/wctomb.cpp | 35 + libc/src/wchar/wctomb.h | 22 + .../src/__support/wchar/utf32_to_8_test.cpp | 42 + libc/test/src/wchar/CMakeLists.txt | 11 + libc/test/src/wchar/wctomb_test.cpp | 73 ++ libclc/clc/include/clc/clcmacro.h | 105 --- libclc/clc/include/clc/math/clc_pown.h | 2 +- libclc/clc/include/clc/math/clc_rootn.h | 2 +- .../binary_decl_with_int_second_arg.inc | 0 .../binary_def_with_int_second_arg.inc | 0 libclc/clc/lib/generic/math/clc_copysign.cl | 28 +- libclc/clc/lib/generic/math/clc_pow.inc | 13 +- libclc/clc/lib/generic/math/clc_pown.inc | 13 +- libclc/clc/lib/generic/math/clc_powr.inc | 13 +- libclc/clc/lib/generic/math/clc_rootn.inc | 13 +- libclc/opencl/include/clc/opencl/math/ldexp.h | 5 + .../opencl/include/clc/opencl/math/ldexp.inc | 4 +- libclc/opencl/include/clc/opencl/math/pown.h | 2 +- libclc/opencl/include/clc/opencl/math/rootn.h | 2 +- libclc/opencl/lib/clspv/math/fma.cl | 8 +- libclc/opencl/lib/generic/common/degrees.cl | 18 +- libclc/opencl/lib/generic/common/radians.cl | 18 +- libclc/opencl/lib/generic/math/fma.cl | 18 +- libclc/opencl/lib/generic/math/ldexp.cl | 21 +- libclc/opencl/lib/generic/math/mad.cl | 18 +- libclc/opencl/lib/generic/math/nextafter.cl | 24 +- libclc/opencl/lib/generic/math/pown.cl | 2 +- libclc/opencl/lib/generic/math/rootn.cl | 2 +- libclc/opencl/lib/spirv/math/fma.cl | 8 +- libcxx/docs/Status/Cxx2cPapers.csv | 2 +- libcxx/include/variant | 31 + .../variant/variant.relops/relops.pass.cpp | 49 + .../relops_bool_conv.verify.cpp | 11 +- libcxx/test/support/test_comparisons.h | 41 + libcxxabi/src/demangle/ItaniumDemangle.h | 4 +- libcxxabi/test/DemangleTestCases.inc | 1 + lldb/include/lldb/Core/Debugger.h | 6 - lldb/include/lldb/Core/ProtocolServer.h | 5 +- lldb/include/lldb/Target/MemoryTagManager.h | 12 +- lldb/include/lldb/Utility/XcodeSDK.h | 13 - lldb/include/lldb/lldb-forward.h | 2 +- lldb/include/lldb/lldb-private-interfaces.h | 3 +- .../Python/lldbsuite/test/gdbclientutils.py | 10 + .../Commands/CommandObjectProtocolServer.cpp | 51 +- lldb/source/Core/Debugger.cpp | 23 - lldb/source/Core/ProtocolServer.cpp | 34 +- .../Clang/ClangExpressionParser.cpp | 51 +- .../Utility/MemoryTagManagerAArch64MTE.cpp | 10 +- .../Utility/MemoryTagManagerAArch64MTE.h | 2 +- .../gdb-remote/GDBRemoteClientBase.cpp | 9 +- .../Process/gdb-remote/GDBRemoteClientBase.h | 6 +- .../gdb-remote/GDBRemoteCommunication.cpp | 172 ++-- .../gdb-remote/GDBRemoteCommunication.h | 10 +- .../GDBRemoteCommunicationClient.cpp | 6 +- .../GDBRemoteCommunicationServerPlatform.cpp | 30 +- .../Process/gdb-remote/ProcessGDBRemote.cpp | 6 +- lldb/source/Plugins/Protocol/MCP/Protocol.h | 2 + .../Protocol/MCP/ProtocolServerMCP.cpp | 30 +- .../Plugins/Protocol/MCP/ProtocolServerMCP.h | 6 +- lldb/source/Plugins/Protocol/MCP/Tool.cpp | 109 ++- lldb/source/Plugins/Protocol/MCP/Tool.h | 24 +- lldb/source/Utility/XcodeSDK.cpp | 21 - .../script_alias/TestCommandScriptAlias.py | 1 + .../gdb_remote_client/TestGDBRemoteClient.py | 72 ++ .../TestAArch64LinuxMTEMemoryTagCoreFile.py | 23 + .../aarch64/mte_core_file/core.mte.notags | Bin 0 -> 32768 bytes .../API/linux/aarch64/mte_core_file/main.c | 7 +- .../restart/TestDAP_restart_runInTerminal.py | 4 +- .../runInTerminal/TestDAP_runInTerminal.py | 5 +- lldb/unittests/Host/CMakeLists.txt | 4 +- .../MemoryTagManagerAArch64MTETest.cpp | 51 +- .../Protocol/ProtocolMCPServerTest.cpp | 21 +- llvm/docs/CommandGuide/llvm-objdump.rst | 2 +- llvm/docs/HowToReleaseLLVM.rst | 14 +- llvm/docs/ReleaseNotes.md | 1 + llvm/docs/SourceLevelDebugging.rst | 57 +- llvm/include/llvm/ADT/ArrayRef.h | 51 +- llvm/include/llvm/Analysis/DXILResource.h | 4 +- llvm/include/llvm/Analysis/IR2Vec.h | 52 +- llvm/include/llvm/Analysis/ValueTracking.h | 4 +- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 9 +- .../llvm/CodeGen/GlobalISel/CallLowering.h | 9 + .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 5 + llvm/include/llvm/CodeGenTypes/LowLevelType.h | 5 +- .../llvm/CodeGenTypes/MachineValueType.h | 9 +- llvm/include/llvm/Demangle/ItaniumDemangle.h | 4 +- llvm/include/llvm/ExecutionEngine/Orc/COFF.h | 5 +- .../Orc/ExecutorProcessControl.h | 42 - .../llvm/Frontend/Directive/Spelling.h | 4 +- .../llvm/Frontend/Driver/CodeGenOptions.h | 2 +- .../Frontend/HLSL/HLSLRootSignatureUtils.h | 9 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 43 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 8 +- llvm/include/llvm/IR/DIBuilder.h | 56 ++ llvm/include/llvm/IR/DebugInfoMetadata.h | 497 +++++++--- llvm/include/llvm/IR/Intrinsics.td | 4 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 8 + llvm/include/llvm/IR/IntrinsicsPowerPC.td | 8 + llvm/include/llvm/IR/RuntimeLibcalls.h | 2 + llvm/include/llvm/MC/MCSchedule.h | 12 +- llvm/include/llvm/TargetParser/TargetParser.h | 2 +- llvm/include/llvm/TargetParser/Triple.h | 9 +- .../Testing/Demangle/DemangleTestCases.inc | 1 + llvm/lib/Analysis/BasicAliasAnalysis.cpp | 4 + llvm/lib/Analysis/ConstantFolding.cpp | 6 + llvm/lib/AsmParser/LLParser.cpp | 109 ++- llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 61 +- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 35 +- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 24 +- .../AsmPrinter/DbgEntityHistoryCalculator.cpp | 18 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 199 ++-- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 60 ++ llvm/lib/CodeGen/GlobalMergeFunctions.cpp | 1 + llvm/lib/CodeGen/IfConversion.cpp | 35 +- llvm/lib/CodeGen/InterleavedAccessPass.cpp | 199 +--- llvm/lib/CodeGen/MachineDomTreeUpdater.cpp | 9 +- llvm/lib/CodeGen/MachineDominators.cpp | 30 +- llvm/lib/CodeGen/MachineLoopInfo.cpp | 7 +- llvm/lib/CodeGen/MachinePassManager.cpp | 15 +- llvm/lib/CodeGen/MachinePostDominators.cpp | 29 +- llvm/lib/CodeGen/RegAllocScore.cpp | 17 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 13 + .../Orc/ExecutorProcessControl.cpp | 1 + llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 + llvm/lib/IR/DIBuilder.cpp | 86 +- llvm/lib/IR/DebugInfoMetadata.cpp | 101 +- llvm/lib/IR/Instructions.cpp | 4 + llvm/lib/IR/LLVMContextImpl.h | 76 +- llvm/lib/IR/RuntimeLibcalls.cpp | 63 +- llvm/lib/IR/Verifier.cpp | 25 +- llvm/lib/MC/MCSchedule.cpp | 1 + llvm/lib/MC/MCStreamer.cpp | 8 +- llvm/lib/MCA/InstrBuilder.cpp | 3 +- llvm/lib/Option/Arg.cpp | 17 +- .../Target/AArch64/AArch64ISelLowering.cpp | 35 +- .../Target/AArch64/AArch64PerfectShuffle.h | 27 + .../AArch64/AArch64TargetTransformInfo.cpp | 17 + .../MCTargetDesc/AArch64InstPrinter.cpp | 10 + llvm/lib/Target/AMDGPU/AMDGPU.td | 2 +- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 9 +- .../AMDGPU/AMDGPURegBankLegalizeHelper.h | 6 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 + llvm/lib/Target/AMDGPU/DSDIRInstructions.td | 4 +- .../Disassembler/AMDGPUDisassembler.cpp | 14 +- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 30 +- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 37 +- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 45 +- llvm/lib/Target/AMDGPU/VOPInstructions.td | 1 + llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +- .../Target/DirectX/DXILDataScalarization.cpp | 5 +- llvm/lib/Target/DirectX/DXILPrepare.cpp | 11 + .../LoongArch/LoongArchFloat64InstrInfo.td | 16 + .../LoongArch/LoongArchISelLowering.cpp | 309 ++++++- .../Target/LoongArch/LoongArchISelLowering.h | 4 + .../Target/LoongArch/LoongArchInstrInfo.td | 12 + .../NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp | 22 + .../NVPTX/MCTargetDesc/NVPTXInstPrinter.h | 2 + llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 72 +- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 - llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 121 +-- llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 32 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp | 20 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.h | 3 - llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 245 ++--- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 2 + llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 15 + llvm/lib/Target/PowerPC/PPCInstrAltivec.td | 12 +- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 2 +- llvm/lib/Target/RISCV/RISCVFeatures.td | 7 - llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 45 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 10 +- llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 12 +- .../WebAssembly/WebAssemblyISelLowering.cpp | 91 +- .../X86/GISel/X86InstructionSelector.cpp | 33 + .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 12 +- llvm/lib/Target/X86/X86.td | 2 +- .../lib/Target/X86/X86TargetTransformInfo.cpp | 3 +- llvm/lib/TargetParser/Triple.cpp | 14 +- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 8 +- .../InstCombine/InstructionCombining.cpp | 16 +- .../Instrumentation/MemorySanitizer.cpp | 43 +- .../lib/Transforms/Scalar/LoopInterchange.cpp | 4 +- llvm/lib/Transforms/Scalar/Reassociate.cpp | 10 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 13 +- llvm/lib/Transforms/Vectorize/VPlan.cpp | 8 +- .../Transforms/Vectorize/VectorCombine.cpp | 6 +- llvm/runtimes/CMakeLists.txt | 7 + .../segmented-shufflevector-patterns.ll | 53 ++ .../AArch64/arm64-indexed-vector-ldst.ll | 117 +++ .../CodeGen/AArch64/exp10-libcall-names.ll | 16 +- .../CodeGen/AArch64/streaming-func-no-sme.ll | 2 +- .../CodeGen/AArch64/sve2p1-vector-shuffles.ll | 63 +- .../CodeGen/AMDGPU/convergence-laneops.ll | 2 +- .../CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll | 107 +++ .../AMDGPU/shufflevector.v3bf16.v2bf16.ll | 16 +- .../AMDGPU/shufflevector.v3bf16.v3bf16.ll | 15 +- .../AMDGPU/shufflevector.v3f16.v2f16.ll | 16 +- .../AMDGPU/shufflevector.v3f16.v3f16.ll | 15 +- .../AMDGPU/unsupported-image-sample.ll | 19 +- .../ARM/ifcvt_unanalyzable_fallthrough.mir | 114 +++ llvm/test/CodeGen/ARM/special-reg.ll | 12 +- .../DirectX/issue-145408-gep-struct-fix.ll | 17 + llvm/test/CodeGen/DirectX/llc-pipeline.ll | 2 +- .../CodeGen/DirectX/strip-rootsignatures.ll | 18 + .../CodeGen/LoongArch/calling-conv-ilp32d.ll | 193 ++++ .../LoongArch/inline-asm-constraint-f.ll | 14 +- .../ir-instruction/double-convert.ll | 14 +- .../ir-instruction/load-store-atomic.ll | 40 +- llvm/test/CodeGen/Mips/msa/compare_float.ll | 624 ++++++------- llvm/test/CodeGen/NVPTX/alias.ll | 3 +- .../test/CodeGen/NVPTX/bf16x2-instructions.ll | 7 +- llvm/test/CodeGen/NVPTX/byval-const-global.ll | 6 +- .../CodeGen/NVPTX/call-with-alloca-buffer.ll | 3 +- llvm/test/CodeGen/NVPTX/combine-mad.ll | 7 +- .../test/CodeGen/NVPTX/convergent-mir-call.ll | 10 +- .../CodeGen/NVPTX/convert-call-to-indirect.ll | 43 +- llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll | 12 +- llvm/test/CodeGen/NVPTX/f16-instructions.ll | 24 +- llvm/test/CodeGen/NVPTX/f16x2-instructions.ll | 21 +- llvm/test/CodeGen/NVPTX/fma.ll | 14 +- llvm/test/CodeGen/NVPTX/forward-ld-param.ll | 12 +- llvm/test/CodeGen/NVPTX/fp128-storage-type.ll | 6 +- llvm/test/CodeGen/NVPTX/i16x2-instructions.ll | 21 +- llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 21 +- llvm/test/CodeGen/NVPTX/indirect_byval.ll | 16 +- llvm/test/CodeGen/NVPTX/ldparam-v4.ll | 5 +- llvm/test/CodeGen/NVPTX/local-stack-frame.ll | 36 +- .../CodeGen/NVPTX/lower-args-gridconstant.ll | 36 +- llvm/test/CodeGen/NVPTX/lower-args.ll | 14 +- llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 24 +- llvm/test/CodeGen/NVPTX/misched_func_call.ll | 6 +- .../NVPTX/naked-fn-with-frame-pointer.ll | 20 +- llvm/test/CodeGen/NVPTX/param-add.ll | 6 +- llvm/test/CodeGen/NVPTX/param-load-store.ll | 168 ++-- llvm/test/CodeGen/NVPTX/param-overalign.ll | 89 +- .../CodeGen/NVPTX/param-vectorize-device.ll | 72 +- llvm/test/CodeGen/NVPTX/shift-opt.ll | 12 +- llvm/test/CodeGen/NVPTX/st-param-imm.ll | 504 ++-------- llvm/test/CodeGen/NVPTX/store-undef.ll | 12 +- llvm/test/CodeGen/NVPTX/tex-read-cuda.ll | 6 +- .../NVPTX/unaligned-param-load-store.ll | 42 +- llvm/test/CodeGen/NVPTX/unreachable.ll | 20 +- llvm/test/CodeGen/NVPTX/variadics-backend.ll | 28 +- .../CodeGen/PowerPC/PR35812-neg-cmpxchg.ll | 2 - llvm/test/CodeGen/PowerPC/all-atomics.ll | 32 +- .../CodeGen/PowerPC/atomics-regression.ll | 68 -- .../CodeGen/PowerPC/builtins-bcd-transform.ll | 91 ++ llvm/test/CodeGen/PowerPC/loop-comment.ll | 1 - llvm/test/CodeGen/RISCV/features-info.ll | 1 - .../rvv/fixed-vectors-deinterleave-load.ll | 67 -- .../rvv/fixed-vectors-interleave-store.ll | 34 - .../rvv/fixed-vectors-interleaved-access.ll | 14 +- ...t.ll => fixed-vectors-vp-reverse-float.ll} | 33 +- ...int.ll => fixed-vectors-vp-reverse-int.ll} | 0 .../RISCV/rvv/fixed-vectors-vp-splice.ll | 106 ++- .../RISCV/rvv/vector-deinterleave-load.ll | 67 -- .../RISCV/rvv/vector-interleave-store.ll | 34 - llvm/test/CodeGen/RISCV/rvv/vp-splice.ll | 78 +- .../RISCV/rvv/vp-vector-interleaved-access.ll | 308 ++---- .../CodeGen/SystemZ/vec-max-min-zerosplat.ll | 70 +- .../CodeGen/X86/GlobalISel/llvm.sincos.mir | 189 ++++ llvm/test/CodeGen/X86/isel-fabs-x87.ll | 46 +- llvm/test/CodeGen/X86/isel-fabs.ll | 66 +- llvm/test/CodeGen/X86/llvm.sincos.ll | 92 +- llvm/test/CodeGen/X86/shift-i512.ll | 194 +--- llvm/test/CodeGen/X86/var-permute-256.ll | 216 +++-- llvm/test/CodeGen/X86/vec_int_to_fp.ll | 24 +- .../CodeGen/X86/x86-64-double-shifts-var.ll | 64 +- llvm/test/DebugInfo/dynamic-bitfield.ll | 62 ++ .../BoundsChecking/runtimes.ll | 2 +- .../MemorySanitizer/count-zeroes.ll | 88 +- llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s | 94 ++ llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s | 65 ++ llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s | 68 ++ .../MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s | 67 ++ llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s | 79 ++ .../MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s | 23 + llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s | 35 + llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s | 36 + .../gfx1250_asm_vop3_from_vop1-fake16.s | 101 ++ .../MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s | 104 +++ .../gfx1250_asm_vop3_from_vop1_dpp16-fake16.s | 63 ++ .../AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s | 67 ++ .../gfx1250_asm_vop3_from_vop1_dpp8-fake16.s | 23 + .../AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s | 27 + .../Disassembler/AMDGPU/gfx1250_dasm_vop1.txt | 71 ++ .../AMDGPU/gfx1250_dasm_vop1_dpp16.txt | 64 ++ .../AMDGPU/gfx1250_dasm_vop1_dpp8.txt | 31 + .../AMDGPU/gfx1250_dasm_vop3_from_vop1.txt | 79 ++ .../gfx1250_dasm_vop3_from_vop1_dpp16.txt | 67 ++ .../gfx1250_dasm_vop3_from_vop1_dpp8.txt | 27 + .../MC/Disassembler/RISCV/c_lui_disasm.txt | 68 -- llvm/test/MC/RISCV/rv32c-invalid.s | 11 +- llvm/test/MC/X86/gotpcrel-non-globals.ll | 36 + .../TableGen/CompressWriteLatencyEntry.td | 8 +- llvm/test/TableGen/InvalidMCSchedClassDesc.td | 22 +- .../Transforms/FunctionAttrs/initializes.ll | 6 +- llvm/test/Transforms/FunctionAttrs/nosync.ll | 2 +- .../assumption-cache-invalidation.ll | 2 +- .../InstSimplify/ConstProp/atan-intrinsic.ll | 1 - .../InstSimplify/ConstProp/calls.ll | 12 + .../AArch64/sve-deinterleave4.ll | 90 +- .../AArch64/sve-interleave4.ll | 17 +- .../RISCV/interleaved-accesses.ll | 196 ---- .../RISCV/riscv-vector-reverse.ll | 80 +- ...able-info-from-assumption-variable-size.ll | 407 -------- .../early_exit_store_legality.ll | 29 - .../SLPVectorizer/X86/pr47629-inseltpoison.ll | 26 +- .../Transforms/SLPVectorizer/X86/pr47629.ll | 26 +- ...masked-loads-consecutive-loads-same-ptr.ll | 10 +- .../SimplifyCFG/preserve-branchweights.ll | 44 +- llvm/test/Verifier/assume-bundles.ll | 3 +- llvm/test/Verifier/branch-weight.ll | 39 + .../Inputs/nvptx-basic.ll.expected | 6 +- .../symbolize-operands-executable.yaml | 67 ++ .../AArch64/symbolize-operands-relocatable.s | 79 ++ llvm/tools/llvm-exegesis/lib/Analysis.cpp | 9 +- llvm/tools/llvm-objdump/llvm-objdump.cpp | 3 +- llvm/unittests/ADT/ArrayRefTest.cpp | 19 +- .../Analysis/MemoryProfileInfoTest.cpp | 3 +- .../Orc/ObjectLinkingLayerTest.cpp | 2 + .../ExecutionEngine/Orc/OrcTestCommon.h | 41 + llvm/unittests/Frontend/CMakeLists.txt | 1 + .../Frontend/OpenMPDirectiveNameTest.cpp | 96 ++ llvm/unittests/IR/DebugInfoTest.cpp | 29 + .../unittests/IR/DebugTypeODRUniquingTest.cpp | 6 +- llvm/unittests/TargetParser/TripleTest.cpp | 38 + llvm/utils/TableGen/SubtargetEmitter.cpp | 22 +- .../include/mlir/Dialect/ArmNeon/Transforms.h | 2 +- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 14 +- mlir/include/mlir/Dialect/Linalg/IR/Linalg.h | 14 + .../mlir/Dialect/Linalg/IR/LinalgEnums.td | 15 + .../mlir/Dialect/Linalg/IR/LinalgOps.td | 18 +- .../Linalg/TransformOps/LinalgTransformOps.td | 4 +- .../Dialect/Linalg/Transforms/Transforms.h | 9 +- .../SCF/Transforms/TileUsingInterface.h | 22 +- .../Dialect/Tensor/Transforms/Transforms.h | 14 +- .../DebugExtension/DebugExtensionOps.td | 4 +- .../mlir/Dialect/Vector/IR/VectorOps.td | 7 +- mlir/include/mlir/IR/EnumAttr.td | 7 +- mlir/include/mlir/IR/OpDefinition.h | 2 +- mlir/include/mlir/IR/Visitors.h | 36 +- .../mlir/Interfaces/TilingInterface.td | 55 +- .../include/mlir/{IR => Support}/StateStack.h | 8 +- mlir/include/mlir/Support/WalkResult.h | 59 ++ .../mlir/Target/LLVMIR/ModuleTranslation.h | 2 +- .../PDLToPDLInterp/PDLToPDLInterp.cpp | 2 +- .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 16 +- .../VectorToLLVM/ConvertVectorToLLVMPass.cpp | 2 +- .../ArmNeonVectorTransformOps.cpp | 2 +- .../Dialect/ArmNeon/Transforms/CMakeLists.txt | 2 +- ... => LowerContractionToNeonI8MMPattern.cpp} | 178 +++- .../ArmSME/Transforms/VectorLegalization.cpp | 171 ++-- .../Transforms/LegalizeVectorStorage.cpp | 152 ++- .../LowerContractionToSVEI8MMPattern.cpp | 7 +- .../GPU/Transforms/DecomposeMemRefs.cpp | 3 +- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 55 +- .../TransformOps/LinalgTransformOps.cpp | 2 +- .../Linalg/Transforms/TilingInterfaceImpl.cpp | 191 ++-- .../Linalg/Transforms/WinogradConv2D.cpp | 152 ++- mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 22 +- .../SCF/Transforms/TileUsingInterface.cpp | 216 +++-- .../SwapExtractSliceWithProducerPatterns.cpp | 57 +- .../DebugExtension/DebugExtensionOps.cpp | 13 +- mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 13 +- mlir/lib/IR/CMakeLists.txt | 1 - mlir/lib/Support/CMakeLists.txt | 1 + mlir/lib/{IR => Support}/StateStack.cpp | 2 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 14 +- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 6 +- mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp | 5 +- mlir/python/CMakeLists.txt | 9 + .../dialects/TransformDebugExtensionOps.td | 19 + mlir/python/mlir/dialects/transform/debug.py | 81 ++ .../Dialect/ArmNeon/lower-to-arm-neon.mlir | 80 +- .../Dialect/ArmSME/vector-legalization.mlir | 101 +- .../ArmSVE/legalize-transfer-read.mlir | 257 ++++++ mlir/test/Dialect/Linalg/hoisting.mlir | 266 ++++-- mlir/test/Dialect/Linalg/invalid.mlir | 30 +- mlir/test/Dialect/Linalg/roundtrip.mlir | 24 +- .../transform-op-fuse-into-containing.mlir | 1 + .../transform-tile-and-winograd-rewrite.mlir | 24 +- .../Linalg/transform-tile-winograd.mlir | 36 +- .../Linalg/transform-winograd-conv2d.mlir | 24 +- .../Linalg/winograd-conv2d-rewrite.mlir | 6 +- mlir/test/Dialect/Linalg/winograd-conv2d.mlir | 42 +- mlir/test/Dialect/MemRef/invalid.mlir | 10 + mlir/test/Dialect/MemRef/ops.mlir | 3 + .../Vector/canonicalize/vector-transpose.mlir | 64 +- mlir/test/Dialect/Vector/invalid.mlir | 9 + mlir/test/Dialect/Vector/ops.mlir | 17 +- mlir/test/IR/attribute.mlir | 4 + .../transfer-read-scalable-non-trailing.mlir | 79 ++ .../tile-and-fuse-consumer.mlir | 293 +++++- .../Target/LLVMIR/Import/import-failure.ll | 16 - mlir/test/Target/LLVMIR/nvvm/elect.mlir | 20 + mlir/test/Target/LLVMIR/nvvmir.mlir | 9 - mlir/test/Target/LLVMIR/omptarget-llvm.mlir | 348 +++---- .../LLVMIR/omptargetdata-nowait-llvm.mlir | 42 +- .../LLVMIR/openmp-data-target-device.mlir | 2 +- .../openmp-nested-task-target-parallel.mlir | 62 ++ .../Dialect/Linalg/TestLinalgTransforms.cpp | 4 +- mlir/test/lib/Dialect/Test/TestEnumDefs.td | 8 +- mlir/test/lib/Dialect/Test/TestPatterns.cpp | 4 +- .../TestTilingInterfaceTransformOps.cpp | 50 +- .../TestTilingInterfaceTransformOps.td | 7 +- .../python/dialects/transform_debug_ext.py | 45 + mlir/tools/mlir-tblgen/EnumsGen.cpp | 6 +- offload/liboffload/API/Program.td | 4 +- offload/liboffload/src/OffloadImpl.cpp | 8 + offload/plugins-nextgen/amdgpu/src/rtl.cpp | 20 +- .../common/include/PluginInterface.h | 12 +- .../common/src/PluginInterface.cpp | 72 +- offload/plugins-nextgen/cuda/src/rtl.cpp | 27 +- offload/plugins-nextgen/host/src/rtl.cpp | 6 + openmp/runtime/src/kmp_alloc.cpp | 8 +- openmp/runtime/src/kmp_csupport.cpp | 2 +- openmp/runtime/src/kmp_lock.cpp | 2 + openmp/runtime/src/kmp_tasking.cpp | 2 +- .../test/ompt/misc/lock_double_destroy.cpp | 40 + .../llvm-project-overlay/clang/BUILD.bazel | 1 + .../llvm-project-overlay/llvm/config.bzl | 1 + .../llvm/include/llvm/Config/llvm-config.h | 3 + .../llvm-project-overlay/mlir/BUILD.bazel | 1 + utils/bazel/llvm_configs/llvm-config.h.cmake | 3 + 630 files changed, 15064 insertions(+), 7696 deletions(-) create mode 100644 clang/test/Analysis/lambda-convert-to-func-ptr.cpp create mode 100644 clang/test/CIR/Transforms/complex-imag-fold.cir create mode 100644 clang/test/CIR/Transforms/complex-real-fold.cir create mode 100644 clang/test/CIR/func-linkage.cpp create mode 100644 clang/test/CodeGen/PowerPC/builtins-bcd-transform.c create mode 100644 clang/test/Index/inline-assembly.c create mode 100644 clang/test/Modules/pr118137.cppm create mode 100644 clang/test/Modules/template-declare.cppm create mode 100644 clang/test/Sema/builtins-bcd-transform.c create mode 100644 compiler-rt/lib/asan/asan.link_with_main_exec.txt create mode 100644 compiler-rt/lib/asan/asan_cxx.link_with_main_exec.txt create mode 100644 compiler-rt/test/tsan/java_heap_init2.cpp create mode 100644 compiler-rt/test/tsan/munmap_clear_shadow.c create mode 100644 flang/test/Driver/fopenmp-version.F90 create mode 100644 flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90 create mode 100644 libc/src/wchar/wctomb.cpp create mode 100644 libc/src/wchar/wctomb.h create mode 100644 libc/test/src/wchar/wctomb_test.cpp rename libclc/clc/include/clc/{math => shared}/binary_decl_with_int_second_arg.inc (100%) rename libclc/clc/include/clc/{math => shared}/binary_def_with_int_second_arg.inc (100%) create mode 100644 lldb/test/API/linux/aarch64/mte_core_file/core.mte.notags create mode 100644 llvm/test/Analysis/CostModel/AArch64/segmented-shufflevector-patterns.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll create mode 100644 llvm/test/CodeGen/ARM/ifcvt_unanalyzable_fallthrough.mir create mode 100644 llvm/test/CodeGen/DirectX/issue-145408-gep-struct-fix.ll create mode 100644 llvm/test/CodeGen/DirectX/strip-rootsignatures.ll create mode 100644 llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll create mode 100644 llvm/test/CodeGen/PowerPC/builtins-bcd-transform.ll rename llvm/test/CodeGen/RISCV/rvv/{fixed-vectors-vp-reverser-float.ll => fixed-vectors-vp-reverse-float.ll} (71%) rename llvm/test/CodeGen/RISCV/rvv/{fixed-vectors-vp-reverser-int.ll => fixed-vectors-vp-reverse-int.ll} (100%) create mode 100644 llvm/test/CodeGen/X86/GlobalISel/llvm.sincos.mir create mode 100644 llvm/test/DebugInfo/dynamic-bitfield.ll create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt create mode 100644 llvm/test/MC/X86/gotpcrel-non-globals.ll delete mode 100644 llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll create mode 100644 llvm/test/Verifier/branch-weight.ll create mode 100644 llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml create mode 100644 llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-relocatable.s create mode 100644 llvm/unittests/Frontend/OpenMPDirectiveNameTest.cpp rename mlir/include/mlir/{IR => Support}/StateStack.h (96%) create mode 100644 mlir/include/mlir/Support/WalkResult.h rename mlir/lib/Dialect/ArmNeon/Transforms/{LowerContractionToSMMLAPattern.cpp => LowerContractionToNeonI8MMPattern.cpp} (59%) rename mlir/lib/{IR => Support}/StateStack.cpp (92%) create mode 100644 mlir/python/mlir/dialects/TransformDebugExtensionOps.td create mode 100644 mlir/python/mlir/dialects/transform/debug.py create mode 100644 mlir/test/Dialect/ArmSVE/legalize-transfer-read.mlir create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/transfer-read-scalable-non-trailing.mlir create mode 100644 mlir/test/Target/LLVMIR/nvvm/elect.mlir create mode 100644 mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir create mode 100644 mlir/test/python/dialects/transform_debug_ext.py create mode 100644 openmp/runtime/test/ompt/misc/lock_double_destroy.cpp diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h index ad2fed2cf27eb..154a8d12de5ce 100644 --- a/bolt/include/bolt/Core/BinarySection.h +++ b/bolt/include/bolt/Core/BinarySection.h @@ -523,11 +523,6 @@ inline uint8_t *copyByteArray(const uint8_t *Data, uint64_t Size) { return Array; } -inline uint8_t *copyByteArray(StringRef Buffer) { - return copyByteArray(reinterpret_cast(Buffer.data()), - Buffer.size()); -} - inline uint8_t *copyByteArray(ArrayRef Buffer) { return copyByteArray(reinterpret_cast(Buffer.data()), Buffer.size()); diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index 95e831fe9c8ca..2eadaf15d3a65 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -82,6 +82,22 @@ namespace PAuthGadgetScanner { dbgs() << "\n"; } +// Iterates over BinaryFunction's instructions like a range-based for loop: +// +// iterateOverInstrs(BF, [&](MCInstReference Inst) { +// // loop body +// }); +template static void iterateOverInstrs(BinaryFunction &BF, T Fn) { + if (BF.hasCFG()) { + for (BinaryBasicBlock &BB : BF) + for (int64_t I = 0, E = BB.size(); I < E; ++I) + Fn(MCInstInBBReference(&BB, I)); + } else { + for (auto I : BF.instrs()) + Fn(MCInstInBFReference(&BF, I.first)); + } +} + // This class represents mapping from a set of arbitrary physical registers to // consecutive array indexes. class TrackedRegisters { @@ -342,6 +358,29 @@ class SrcSafetyAnalysis { return S; } + /// Computes a reasonably pessimistic estimation of the register state when + /// the previous instruction is not known for sure. Takes the set of registers + /// which are trusted at function entry and removes all registers that can be + /// clobbered inside this function. + SrcState computePessimisticState(BinaryFunction &BF) { + BitVector ClobberedRegs(NumRegs); + iterateOverInstrs(BF, [&](MCInstReference Inst) { + BC.MIB->getClobberedRegs(Inst, ClobberedRegs); + + // If this is a call instruction, no register is safe anymore, unless + // it is a tail call. Ignore tail calls for the purpose of estimating the + // worst-case scenario, assuming no instructions are executed in the + // caller after this point anyway. + if (BC.MIB->isCall(Inst) && !BC.MIB->isTailCall(Inst)) + ClobberedRegs.set(); + }); + + SrcState S = createEntryState(); + S.SafeToDerefRegs.reset(ClobberedRegs); + S.TrustedRegs.reset(ClobberedRegs); + return S; + } + BitVector getClobberedRegs(const MCInst &Point) const { BitVector Clobbered(NumRegs); // Assume a call can clobber all registers, including callee-saved @@ -545,6 +584,10 @@ class DataflowSrcSafetyAnalysis using SrcSafetyAnalysis::BC; using SrcSafetyAnalysis::computeNext; + // Pessimistic initial state for basic blocks without any predecessors + // (not needed for most functions, thus initialized lazily). + SrcState PessimisticState; + public: DataflowSrcSafetyAnalysis(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId, @@ -585,6 +628,18 @@ class DataflowSrcSafetyAnalysis if (BB.isEntryPoint()) return createEntryState(); + // If a basic block without any predecessors is found in an optimized code, + // this likely means that some CFG edges were not detected. Pessimistically + // assume any register that can ever be clobbered in this function to be + // unsafe before this basic block. + // Warn about this fact in FunctionAnalysis::findUnsafeUses(), as it likely + // means imprecise CFG information. + if (BB.pred_empty()) { + if (PessimisticState.empty()) + PessimisticState = computePessimisticState(*BB.getParent()); + return PessimisticState; + } + return SrcState(); } @@ -682,19 +737,14 @@ template class CFGUnawareAnalysis { // // Then, a function can be split into a number of disjoint contiguous sequences // of instructions without labels in between. These sequences can be processed -// the same way basic blocks are processed by data-flow analysis, assuming -// pessimistically that all registers are unsafe at the start of each sequence. +// the same way basic blocks are processed by data-flow analysis, with the same +// pessimistic estimation of the initial state at the start of each sequence +// (except the first instruction of the function). class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis, public CFGUnawareAnalysis { using SrcSafetyAnalysis::BC; BinaryFunction &BF; - /// Creates a state with all registers marked unsafe (not to be confused - /// with empty state). - SrcState createUnsafeState() const { - return SrcState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters()); - } - public: CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId, @@ -704,6 +754,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis, } void run() override { + const SrcState DefaultState = computePessimisticState(BF); SrcState S = createEntryState(); for (auto &I : BF.instrs()) { MCInst &Inst = I.second; @@ -718,7 +769,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis, LLVM_DEBUG({ traceInst(BC, "Due to label, resetting the state before", Inst); }); - S = createUnsafeState(); + S = DefaultState; } // Attach the state *before* this instruction executes. @@ -1344,17 +1395,6 @@ shouldReportAuthOracle(const BinaryContext &BC, const MCInstReference &Inst, return make_gadget_report(AuthOracleKind, Inst, *AuthReg); } -template static void iterateOverInstrs(BinaryFunction &BF, T Fn) { - if (BF.hasCFG()) { - for (BinaryBasicBlock &BB : BF) - for (int64_t I = 0, E = BB.size(); I < E; ++I) - Fn(MCInstInBBReference(&BB, I)); - } else { - for (auto I : BF.instrs()) - Fn(MCInstInBFReference(&BF, I.first)); - } -} - static SmallVector collectRegsToTrack(ArrayRef> Reports) { SmallSet RegsToTrack; @@ -1375,17 +1415,60 @@ void FunctionAnalysisContext::findUnsafeUses( BF.dump(); }); + bool UnreachableBBReported = false; + if (BF.hasCFG()) { + // Warn on basic blocks being unreachable according to BOLT (at most once + // per BinaryFunction), as this likely means the CFG reconstructed by BOLT + // is imprecise. A basic block can be + // * reachable from an entry basic block - a hopefully correct non-empty + // state is propagated to that basic block sooner or later. All basic + // blocks are expected to belong to this category under normal conditions. + // * reachable from a "directly unreachable" BB (a basic block that has no + // direct predecessors and this is not because it is an entry BB) - *some* + // non-empty state is propagated to this basic block sooner or later, as + // the initial state of directly unreachable basic blocks is + // pessimistically initialized to "all registers are unsafe" + // - a warning can be printed for the "directly unreachable" basic block + // * neither reachable from an entry nor from a "directly unreachable" BB + // (such as if this BB is in an isolated loop of basic blocks) - the final + // state is computed to be empty for this basic block + // - a warning can be printed for this basic block + for (BinaryBasicBlock &BB : BF) { + MCInst *FirstInst = BB.getFirstNonPseudoInstr(); + // Skip empty basic block early for simplicity. + if (!FirstInst) + continue; + + bool IsDirectlyUnreachable = BB.pred_empty() && !BB.isEntryPoint(); + bool HasNoStateComputed = Analysis->getStateBefore(*FirstInst).empty(); + if (!IsDirectlyUnreachable && !HasNoStateComputed) + continue; + + // Arbitrarily attach the report to the first instruction of BB. + // This is printed as "[message] in function [name], basic block ..., + // at address ..." when the issue is reported to the user. + Reports.push_back(make_generic_report( + MCInstReference::get(FirstInst, BF), + "Warning: possibly imprecise CFG, the analysis quality may be " + "degraded in this function. According to BOLT, unreachable code is " + "found" /* in function [name]... */)); + UnreachableBBReported = true; + break; // One warning per function. + } + } + // FIXME: Warn the user about imprecise analysis when the function has no CFG + // information at all. + iterateOverInstrs(BF, [&](MCInstReference Inst) { if (BC.MIB->isCFI(Inst)) return; const SrcState &S = Analysis->getStateBefore(Inst); - - // If non-empty state was never propagated from the entry basic block - // to Inst, assume it to be unreachable and report a warning. if (S.empty()) { - Reports.push_back( - make_generic_report(Inst, "Warning: unreachable instruction found")); + LLVM_DEBUG( + { traceInst(BC, "Instruction has no state, skipping", Inst); }); + assert(UnreachableBBReported && "Should be reported at least once"); + (void)UnreachableBBReported; return; } diff --git a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s index 284f0bea607a5..2dadcef095863 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s +++ b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s @@ -215,7 +215,7 @@ f_callclobbered_calleesaved: .globl f_unreachable_instruction .type f_unreachable_instruction,@function f_unreachable_instruction: -// CHECK-LABEL: GS-PAUTH: Warning: unreachable instruction found in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address +// CHECK-LABEL: GS-PAUTH: Warning: possibly imprecise CFG, the analysis quality may be degraded in this function. According to BOLT, unreachable code is found in function f_unreachable_instruction, basic block {{[0-9a-zA-Z.]+}}, at address // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: add x0, x1, x2 // CHECK-NOT: instructions that write to the affected registers after any authentication are: b 1f @@ -224,20 +224,33 @@ f_unreachable_instruction: ret .size f_unreachable_instruction, .-f_unreachable_instruction -// Expected false positive: without CFG, the state is reset to all-unsafe -// after an unconditional branch. +// Without CFG, the state is reset at labels, assuming every register that can +// be clobbered in the function was actually clobbered. - .globl state_is_reset_after_indirect_branch_nocfg - .type state_is_reset_after_indirect_branch_nocfg,@function -state_is_reset_after_indirect_branch_nocfg: -// CHECK-LABEL: GS-PAUTH: non-protected ret found in function state_is_reset_after_indirect_branch_nocfg, at address -// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: ret + .globl lr_untouched_nocfg + .type lr_untouched_nocfg,@function +lr_untouched_nocfg: +// CHECK-NOT: lr_untouched_nocfg + adr x2, 1f + br x2 +1: + ret + .size lr_untouched_nocfg, .-lr_untouched_nocfg + + .globl lr_clobbered_nocfg + .type lr_clobbered_nocfg,@function +lr_clobbered_nocfg: +// CHECK-LABEL: GS-PAUTH: non-protected ret found in function lr_clobbered_nocfg, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: ret // CHECK-NEXT: The 0 instructions that write to the affected registers after any authentication are: adr x2, 1f br x2 1: + b 2f + bl g // never executed, but affects the expected worst-case scenario +2: ret - .size state_is_reset_after_indirect_branch_nocfg, .-state_is_reset_after_indirect_branch_nocfg + .size lr_clobbered_nocfg, .-lr_clobbered_nocfg /// Now do a basic sanity check on every different Authentication instruction: diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s index 717bf40df3d02..c314bc7cfe5a3 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s @@ -491,10 +491,6 @@ good_address_arith_multi_bb: ret .size good_address_arith_multi_bb, .-good_address_arith_multi_bb -// FIXME: Most *_nocfg test cases contain paciasp+autiasp instructions even if -// LR is not spilled - this is a workaround for RET instructions being -// reported as non-protected, because LR state is reset at every label. - .globl good_ret_nocfg .type good_ret_nocfg,@function good_ret_nocfg: @@ -541,14 +537,12 @@ good_branch_nocfg: .type good_load_other_reg_nocfg,@function good_load_other_reg_nocfg: // CHECK-NOT: good_load_other_reg_nocfg - paciasp adr x2, 1f br x2 1: autia x0, x1 ldr x2, [x0] - autiasp ret .size good_load_other_reg_nocfg, .-good_load_other_reg_nocfg @@ -556,14 +550,12 @@ good_load_other_reg_nocfg: .type good_load_same_reg_nocfg,@function good_load_same_reg_nocfg: // CHECK-NOT: good_load_same_reg_nocfg - paciasp adr x2, 1f br x2 1: autia x0, x1 ldr x0, [x0] - autiasp ret .size good_load_same_reg_nocfg, .-good_load_same_reg_nocfg @@ -575,13 +567,11 @@ bad_unchecked_nocfg: // CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unchecked_nocfg, at address // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 // CHECK-NEXT: The 0 instructions that leak the affected registers are: - paciasp adr x2, 1f br x2 1: autia x0, x1 - autiasp ret .size bad_unchecked_nocfg, .-bad_unchecked_nocfg @@ -615,7 +605,6 @@ bad_unknown_usage_read_nocfg: // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 // CHECK-NEXT: The 1 instructions that leak the affected registers are: // CHECK-NEXT: 1. {{[0-9a-f]+}}: mul x3, x0, x1 - paciasp adr x2, 1f br x2 1: @@ -623,7 +612,6 @@ bad_unknown_usage_read_nocfg: mul x3, x0, x1 ldr x2, [x0] - autiasp ret .size bad_unknown_usage_read_nocfg, .-bad_unknown_usage_read_nocfg @@ -634,7 +622,6 @@ bad_unknown_usage_subreg_read_nocfg: // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 // CHECK-NEXT: The 1 instructions that leak the affected registers are: // CHECK-NEXT: 1. {{[0-9a-f]+}}: mul w3, w0, w1 - paciasp adr x2, 1f br x2 1: @@ -642,7 +629,6 @@ bad_unknown_usage_subreg_read_nocfg: mul w3, w0, w1 ldr x2, [x0] - autiasp ret .size bad_unknown_usage_subreg_read_nocfg, .-bad_unknown_usage_subreg_read_nocfg @@ -653,7 +639,6 @@ bad_unknown_usage_update_nocfg: // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 // CHECK-NEXT: The 1 instructions that leak the affected registers are: // CHECK-NEXT: 1. {{[0-9a-f]+}}: movk x0, #0x2a, lsl #16 - paciasp adr x2, 1f br x2 1: @@ -661,7 +646,6 @@ bad_unknown_usage_update_nocfg: movk x0, #42, lsl #16 // does not overwrite x0 completely ldr x2, [x0] - autiasp ret .size bad_unknown_usage_update_nocfg, .-bad_unknown_usage_update_nocfg @@ -669,14 +653,12 @@ bad_unknown_usage_update_nocfg: .type good_overwrite_with_constant_nocfg,@function good_overwrite_with_constant_nocfg: // CHECK-NOT: good_overwrite_with_constant_nocfg - paciasp adr x2, 1f br x2 1: autia x0, x1 mov x0, #42 - autiasp ret .size good_overwrite_with_constant_nocfg, .-good_overwrite_with_constant_nocfg @@ -684,7 +666,6 @@ good_overwrite_with_constant_nocfg: .type good_address_arith_nocfg,@function good_address_arith_nocfg: // CHECK-NOT: good_address_arith_nocfg - paciasp adr x2, 1f br x2 1: @@ -698,7 +679,6 @@ good_address_arith_nocfg: mov x1, #0 mov x2, #0 - autiasp ret .size good_address_arith_nocfg, .-good_address_arith_nocfg diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s b/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s index c79c5926a05cd..fb0bc7cff2377 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-calls.s @@ -1428,6 +1428,90 @@ printed_instrs_nocfg: br x0 .size printed_instrs_nocfg, .-printed_instrs_nocfg +// Test handling of unreachable basic blocks. +// +// Basic blocks without any predecessors were observed in real-world optimized +// code. At least sometimes they were actually reachable via jump table, which +// was not detected, but the function was processed as if its CFG was +// reconstructed successfully. +// +// As a more predictable model example, let's use really unreachable code +// for testing. + + .globl bad_unreachable_call + .type bad_unreachable_call,@function +bad_unreachable_call: +// CHECK-LABEL: GS-PAUTH: Warning: possibly imprecise CFG, the analysis quality may be degraded in this function. According to BOLT, unreachable code is found in function bad_unreachable_call, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: blr x0 +// CHECK-NOT: instructions that write to the affected registers after any authentication are: +// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_unreachable_call, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: blr x0 +// CHECK-NEXT: The 0 instructions that write to the affected registers after any authentication are: + paciasp + stp x29, x30, [sp, #-16]! + mov x29, sp + + b 1f + // unreachable basic block: + blr x0 + +1: // reachable basic block: + ldp x29, x30, [sp], #16 + autiasp + ret + .size bad_unreachable_call, .-bad_unreachable_call + + .globl good_unreachable_call + .type good_unreachable_call,@function +good_unreachable_call: +// CHECK-NOT: non-protected call{{.*}}good_unreachable_call +// CHECK-LABEL: GS-PAUTH: Warning: possibly imprecise CFG, the analysis quality may be degraded in this function. According to BOLT, unreachable code is found in function good_unreachable_call, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NOT: instructions that write to the affected registers after any authentication are: +// CHECK-NOT: non-protected call{{.*}}good_unreachable_call + paciasp + stp x29, x30, [sp, #-16]! + mov x29, sp + + b 1f + // unreachable basic block: + autia x0, x1 + blr x0 // <-- this call is definitely protected provided at least + // basic block boundaries are detected correctly + +1: // reachable basic block: + ldp x29, x30, [sp], #16 + autiasp + ret + .size good_unreachable_call, .-good_unreachable_call + + .globl unreachable_loop_of_bbs + .type unreachable_loop_of_bbs,@function +unreachable_loop_of_bbs: +// CHECK-NOT: unreachable basic blocks{{.*}}unreachable_loop_of_bbs +// CHECK-NOT: non-protected call{{.*}}unreachable_loop_of_bbs +// CHECK-LABEL: GS-PAUTH: Warning: possibly imprecise CFG, the analysis quality may be degraded in this function. According to BOLT, unreachable code is found in function unreachable_loop_of_bbs, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: blr x0 +// CHECK-NOT: unreachable basic blocks{{.*}}unreachable_loop_of_bbs +// CHECK-NOT: non-protected call{{.*}}unreachable_loop_of_bbs + paciasp + stp x29, x30, [sp, #-16]! + mov x29, sp + b .Lreachable_epilogue_bb + +.Lfirst_unreachable_bb: + blr x0 // <-- this call is not analyzed + b .Lsecond_unreachable_bb +.Lsecond_unreachable_bb: + blr x1 // <-- this call is not analyzed + b .Lfirst_unreachable_bb + +.Lreachable_epilogue_bb: + ldp x29, x30, [sp], #16 + autiasp + ret + .size unreachable_loop_of_bbs, .-unreachable_loop_of_bbs + .globl main .type main,@function main: diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s index fbb96a63d41ed..b1cec7f92ad05 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s @@ -199,8 +199,8 @@ nocfg: // CHECK-NEXT: SrcSafetyAnalysis::ComputeNext( br x0, src-state) // CHECK-NEXT: .. result: (src-state) // CHECK-NEXT: Due to label, resetting the state before: 00000000: ret # Offset: 8 -// CHECK-NEXT: SrcSafetyAnalysis::ComputeNext( ret x30, src-state) -// CHECK-NEXT: .. result: (src-state) +// CHECK-NEXT: SrcSafetyAnalysis::ComputeNext( ret x30, src-state) +// CHECK-NEXT: .. result: (src-state) // CHECK-NEXT: After src register safety analysis: // CHECK-NEXT: Binary Function "nocfg" { // CHECK-NEXT: Number : 3 @@ -223,33 +223,7 @@ nocfg: // PAUTH-NEXT: SafeToDerefRegs: LR W0 W30 X0 W0_HI W30_HI{{[ \t]*$}} // CHECK-NEXT: Found RET inst: 00000000: ret # Offset: 8 # CFGUnawareSrcSafetyAnalysis: src-state // CHECK-NEXT: RetReg: LR -// CHECK-NEXT: SafeToDerefRegs:{{[ \t]*$}} -// CHECK-EMPTY: -// CHECK-NEXT: Running detailed src register safety analysis... -// CHECK-NEXT: SrcSafetyAnalysis::ComputeNext( adr x0, __ENTRY_nocfg@0x[[ENTRY_ADDR]], src-state) -// CHECK-NEXT: .. result: (src-state) -// CHECK-NEXT: SrcSafetyAnalysis::ComputeNext( br x0, src-state) -// CHECK-NEXT: .. result: (src-state) -// CHECK-NEXT: Due to label, resetting the state before: 00000000: ret # Offset: 8 -// CHECK-NEXT: SrcSafetyAnalysis::ComputeNext( ret x30, src-state) -// CHECK-NEXT: .. result: (src-state) -// CHECK-NEXT: After detailed src register safety analysis: -// CHECK-NEXT: Binary Function "nocfg" { -// CHECK-NEXT: Number : 3 -// ... -// CHECK: Secondary Entry Points : __ENTRY_nocfg@0x[[ENTRY_ADDR]] -// CHECK-NEXT: } -// CHECK-NEXT: .{{[A-Za-z0-9]+}}: -// CHECK-NEXT: 00000000: adr x0, __ENTRY_nocfg@0x[[ENTRY_ADDR]] # CFGUnawareSrcSafetyAnalysis: src-state -// CHECK-NEXT: 00000004: br x0 # UNKNOWN CONTROL FLOW # Offset: 4 # CFGUnawareSrcSafetyAnalysis: src-state -// CHECK-NEXT: __ENTRY_nocfg@0x[[ENTRY_ADDR]] (Entry Point): -// CHECK-NEXT: .{{[A-Za-z0-9]+}}: -// CHECK-NEXT: 00000008: ret # Offset: 8 # CFGUnawareSrcSafetyAnalysis: src-state -// CHECK-NEXT: DWARF CFI Instructions: -// CHECK-NEXT: -// CHECK-NEXT: End of Function "nocfg" -// CHECK-EMPTY: -// CHECK-NEXT: Attaching clobbering info to: 00000000: ret # Offset: 8 # CFGUnawareSrcSafetyAnalysis: src-state +// CHECK-NEXT: SafeToDerefRegs: LR W30 W30_HI{{[ \t]*$}} .globl auth_oracle .type auth_oracle,@function diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s index 334a4108d8ab8..3a4d383ec5bc6 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s @@ -505,21 +505,16 @@ bad_one_auted_one_checked_multi_bb: // * untrusted: not even s-t-d - from arg and from memory // * untrusted: subreg clobbered - between address materialization and use, between auth and check, between check and use // * untrusted: first checked then auted, auted then auted, checked then checked -// -// Note that it is important to sign and authenticate LR, as it is not kept -// safe-to-dereference across unconditional branches. .globl good_sign_addr_mat_nocfg .type good_sign_addr_mat_nocfg,@function good_sign_addr_mat_nocfg: // CHECK-NOT: good_sign_addr_mat_nocfg - paciasp adr x3, 1f br x3 1: adr x0, sym pacda x0, x1 - autiasp ret .size good_sign_addr_mat_nocfg, .-good_sign_addr_mat_nocfg @@ -527,14 +522,12 @@ good_sign_addr_mat_nocfg: .type good_sign_auted_checked_ldr_nocfg,@function good_sign_auted_checked_ldr_nocfg: // CHECK-NOT: good_sign_auted_checked_ldr_nocfg - paciasp adr x3, 1f br x3 1: autda x0, x2 ldr x2, [x0] pacda x0, x1 - autiasp ret .size good_sign_auted_checked_ldr_nocfg, .-good_sign_auted_checked_ldr_nocfg @@ -544,13 +537,11 @@ bad_sign_authed_unchecked_nocfg: // CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_sign_authed_unchecked_nocfg, at address // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: pacda x0, x1 // CHECK-NEXT: The 0 instructions that write to the affected registers after any authentication are: - paciasp adr x3, 1f br x3 1: autda x0, x2 pacda x0, x1 - autiasp ret .size bad_sign_authed_unchecked_nocfg, .-bad_sign_authed_unchecked_nocfg @@ -560,13 +551,11 @@ bad_sign_checked_not_auted_nocfg: // CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_sign_checked_not_auted_nocfg, at address // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: pacda x0, x1 // CHECK-NEXT: The 0 instructions that write to the affected registers after any authentication are: - paciasp adr x3, 1f br x3 1: ldr x2, [x0] pacda x0, x1 - autiasp ret .size bad_sign_checked_not_auted_nocfg, .-bad_sign_checked_not_auted_nocfg @@ -576,12 +565,10 @@ bad_sign_plain_arg_nocfg: // CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_sign_plain_arg_nocfg, at address // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: pacda x0, x1 // CHECK-NEXT: The 0 instructions that write to the affected registers after any authentication are: - paciasp adr x3, 1f br x3 1: pacda x0, x1 - autiasp ret .size bad_sign_plain_arg_nocfg, .-bad_sign_plain_arg_nocfg @@ -592,13 +579,11 @@ bad_sign_plain_mem_nocfg: // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: pacda x0, x1 // CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: // CHECK-NEXT: 1. {{[0-9a-f]+}}: ldr x0, [x1] - paciasp adr x3, 1f br x3 1: ldr x0, [x1] pacda x0, x1 - autiasp ret .size bad_sign_plain_mem_nocfg, .-bad_sign_plain_mem_nocfg @@ -609,14 +594,12 @@ bad_clobber_between_addr_mat_and_use_nocfg: // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: pacda x0, x1 // CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: // CHECK-NEXT: 1. {{[0-9a-f]+}}: mov w0, w4 - paciasp adr x3, 1f br x3 1: adr x0, sym mov w0, w4 pacda x0, x1 - autiasp ret .size bad_clobber_between_addr_mat_and_use_nocfg, .-bad_clobber_between_addr_mat_and_use_nocfg @@ -627,7 +610,6 @@ bad_clobber_between_auted_and_checked_nocfg: // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: pacda x0, x1 // CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: // CHECK-NEXT: 1. {{[0-9a-f]+}}: mov w0, w4 - paciasp adr x3, 1f br x3 1: @@ -635,7 +617,6 @@ bad_clobber_between_auted_and_checked_nocfg: mov w0, w4 ldr x2, [x0] pacda x0, x1 - autiasp ret .size bad_clobber_between_auted_and_checked_nocfg, .-bad_clobber_between_auted_and_checked_nocfg @@ -646,7 +627,6 @@ bad_clobber_between_checked_and_used_nocfg: // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: pacda x0, x1 // CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are: // CHECK-NEXT: 1. {{[0-9a-f]+}}: mov w0, w4 - paciasp adr x3, 1f br x3 1: @@ -654,7 +634,6 @@ bad_clobber_between_checked_and_used_nocfg: ldr x2, [x0] mov w0, w4 pacda x0, x1 - autiasp ret .size bad_clobber_between_checked_and_used_nocfg, .-bad_clobber_between_checked_and_used_nocfg @@ -664,14 +643,12 @@ bad_transition_check_then_auth_nocfg: // CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_transition_check_then_auth_nocfg, at address // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: pacda x0, x1 // CHECK-NEXT: The 0 instructions that write to the affected registers after any authentication are: - paciasp adr x3, 1f br x3 1: ldr x2, [x0] autda x0, x2 pacda x0, x1 - autiasp ret .size bad_transition_check_then_auth_nocfg, .-bad_transition_check_then_auth_nocfg @@ -681,14 +658,12 @@ bad_transition_auth_then_auth_nocfg: // CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_transition_auth_then_auth_nocfg, at address // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: pacda x0, x1 // CHECK-NEXT: The 0 instructions that write to the affected registers after any authentication are: - paciasp adr x3, 1f br x3 1: autda x0, x2 autda x0, x2 pacda x0, x1 - autiasp ret .size bad_transition_auth_then_auth_nocfg, .-bad_transition_auth_then_auth_nocfg @@ -698,14 +673,12 @@ bad_transition_check_then_check_nocfg: // CHECK-LABEL: GS-PAUTH: signing oracle found in function bad_transition_check_then_check_nocfg, at address // CHECK-NEXT: The instruction is {{[0-9a-f]+}}: pacda x0, x1 // CHECK-NEXT: The 0 instructions that write to the affected registers after any authentication are: - paciasp adr x3, 1f br x3 1: ldr x2, [x0] ldr x2, [x0] pacda x0, x1 - autiasp ret .size bad_transition_check_then_check_nocfg, .-bad_transition_check_then_check_nocfg diff --git a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp index 9eeba867f5211..88d2f2c388d07 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp @@ -72,7 +72,9 @@ SizeofExpressionCheck::SizeofExpressionCheck(StringRef Name, Options.get("WarnOnSizeOfPointerToAggregate", true)), WarnOnSizeOfPointer(Options.get("WarnOnSizeOfPointer", false)), WarnOnOffsetDividedBySizeOf( - Options.get("WarnOnOffsetDividedBySizeOf", true)) {} + Options.get("WarnOnOffsetDividedBySizeOf", true)), + WarnOnSizeOfInLoopTermination( + Options.get("WarnOnSizeOfInLoopTermination", true)) {} void SizeofExpressionCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "WarnOnSizeOfConstant", WarnOnSizeOfConstant); @@ -86,6 +88,8 @@ void SizeofExpressionCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "WarnOnSizeOfPointer", WarnOnSizeOfPointer); Options.store(Opts, "WarnOnOffsetDividedBySizeOf", WarnOnOffsetDividedBySizeOf); + Options.store(Opts, "WarnOnSizeOfInLoopTermination", + WarnOnSizeOfInLoopTermination); } void SizeofExpressionCheck::registerMatchers(MatchFinder *Finder) { @@ -93,6 +97,13 @@ void SizeofExpressionCheck::registerMatchers(MatchFinder *Finder) { // Some of the checks should not match in template code to avoid false // positives if sizeof is applied on template argument. + auto LoopCondExpr = + [](const ast_matchers::internal::Matcher &InnerMatcher) { + return stmt(anyOf(forStmt(hasCondition(InnerMatcher)), + whileStmt(hasCondition(InnerMatcher)), + doStmt(hasCondition(InnerMatcher)))); + }; + const auto IntegerExpr = ignoringParenImpCasts(integerLiteral()); const auto ConstantExpr = ignoringParenImpCasts( anyOf(integerLiteral(), unaryOperator(hasUnaryOperand(IntegerExpr)), @@ -130,6 +141,14 @@ void SizeofExpressionCheck::registerMatchers(MatchFinder *Finder) { this); } + if (WarnOnSizeOfInLoopTermination) { + auto CondExpr = binaryOperator( + allOf(has(SizeOfExpr.bind("sizeof-expr")), isComparisonOperator())); + Finder->addMatcher(LoopCondExpr(anyOf(CondExpr, hasDescendant(CondExpr))) + .bind("loop-expr"), + this); + } + // Detect sizeof(kPtr) where kPtr is 'const char* kPtr = "abc"'; const auto CharPtrType = pointerType(pointee(isAnyCharacter())); const auto ConstStrLiteralDecl = @@ -349,6 +368,23 @@ void SizeofExpressionCheck::check(const MatchFinder::MatchResult &Result) { diag(E->getBeginLoc(), "suspicious usage of 'sizeof(char*)'; do you mean 'strlen'?") << E->getSourceRange(); + } else if (Result.Nodes.getNodeAs("loop-expr")) { + auto *SizeofArgTy = Result.Nodes.getNodeAs("sizeof-arg-type"); + if (const auto member = dyn_cast(SizeofArgTy)) + SizeofArgTy = member->getPointeeType().getTypePtr(); + + const auto *SzOfExpr = Result.Nodes.getNodeAs("sizeof-expr"); + + if (const auto type = dyn_cast(SizeofArgTy)) { + // check if the array element size is larger than one. If true, + // the size of the array is higher than the number of elements + CharUnits sSize = Ctx.getTypeSizeInChars(type->getElementType()); + if (!sSize.isOne()) { + diag(SzOfExpr->getBeginLoc(), + "suspicious usage of 'sizeof' in the loop") + << SzOfExpr->getSourceRange(); + } + } } else if (const auto *E = Result.Nodes.getNodeAs("sizeof-pointer")) { diag(E->getBeginLoc(), "suspicious usage of 'sizeof()' on an expression " "of pointer type") diff --git a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h index fbd62cb80fb2d..e979b4723cf2e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h @@ -32,6 +32,7 @@ class SizeofExpressionCheck : public ClangTidyCheck { const bool WarnOnSizeOfPointerToAggregate; const bool WarnOnSizeOfPointer; const bool WarnOnOffsetDividedBySizeOf; + const bool WarnOnSizeOfInLoopTermination; }; } // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index fc51f3c9329ad..934d52086b3b9 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -173,6 +173,11 @@ Changes in existing checks ` check to detect conversion in argument of ``std::make_optional``. +- Improved :doc: `bugprone-sizeof-expression + ` check by adding + `WarnOnSizeOfInLoopTermination` option to detect misuses of ``sizeof`` + expression in loop conditions. + - Improved :doc:`bugprone-string-constructor ` check to find suspicious calls of ``std::string`` constructor with char pointer, start position and diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst index 29edb26ed7aa2..04824cc1fe0e4 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst @@ -316,3 +316,12 @@ Options When `true`, the check will warn on pointer arithmetic where the element count is obtained from a division with ``sizeof(...)``, e.g., ``Ptr + Bytes / sizeof(*T)``. Default is `true`. + +.. option:: WarnOnSizeOfInLoopTermination + + When `true`, the check will warn about incorrect use of sizeof expression + in loop termination condition. The warning triggers if the ``sizeof`` + expression appears to be incorrectly used to determine the number of + array/buffer elements. + e.g, ``long arr[10]; for(int i = 0; i < sizeof(arr); i++) { ... }``. Default + is `true`. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression.cpp index 5e6f394152e9d..33cf1cbea8377 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/sizeof-expression.cpp @@ -164,6 +164,69 @@ int Test2(MyConstChar* A) { return sum; } +struct A { + int array[10]; +}; + +struct B { + struct A a; +}; + +void loop_access_elements(int num, struct B b) { + struct A arr[10]; + char buf[20]; + + // CHECK-MESSAGES: :[[@LINE+1]]:22: warning: suspicious usage of 'sizeof' in the loop [bugprone-sizeof-expression] + for(int i = 0; i < sizeof(arr); i++) { + struct A a = arr[i]; + } + + // Loop warning should not trigger here, even though this code is incorrect + // CHECK-MESSAGES: :[[@LINE+2]]:22: warning: suspicious usage of 'sizeof(K)'; did you mean 'K'? [bugprone-sizeof-expression] + // CHECK-MESSAGES: :[[@LINE+1]]:32: warning: suspicious usage of 'sizeof(...)/sizeof(...)'; numerator is not a multiple of denominator [bugprone-sizeof-expression] + for(int i = 0; i < sizeof(10)/sizeof(A); i++) { + struct A a = arr[i]; + } + + // Should not warn here + for(int i = 0; i < sizeof(arr)/sizeof(A); i++) {} + + // Should not warn here + for (int i = 0; i < 10; i++) { + if (sizeof(arr) != 0) { + + } + } + + for (int i = 0; i < 10; i++) { + // CHECK-MESSAGES: :[[@LINE+1]]:25: warning: suspicious usage of 'sizeof' in the loop [bugprone-sizeof-expression] + for (int j = 0; j < sizeof(arr); j++) { + } + } + + // CHECK-MESSAGES: :[[@LINE+1]]:22: warning: suspicious usage of 'sizeof' in the loop [bugprone-sizeof-expression] + for(int j = 0; j < sizeof(b.a.array); j++) {} + + // Should not warn here + for(int i = 0; i < sizeof(buf); i++) {} + + // Should not warn here + for(int i = 0; i < (sizeof(arr) << 3); i++) {} + + int i = 0; + // CHECK-MESSAGES: :[[@LINE+1]]:14: warning: suspicious usage of 'sizeof' in the loop [bugprone-sizeof-expression] + while(i <= sizeof(arr)) {i++;} + + i = 0; + do { + i++; + // CHECK-MESSAGES: :[[@LINE+1]]:16: warning: suspicious usage of 'sizeof' in the loop [bugprone-sizeof-expression] + } while(i <= sizeof(arr)); + + // CHECK-MESSAGES: :[[@LINE+1]]:29: warning: suspicious usage of 'sizeof' in the loop [bugprone-sizeof-expression] + for(int i = 0, j = 0; i < sizeof(arr) && j < sizeof(buf); i++, j++) {} +} + template int Foo() { int A[T]; return sizeof(T); } // CHECK-MESSAGES: :[[@LINE-1]]:30: warning: suspicious usage of 'sizeof(K)' diff --git a/clang/bindings/python/tests/cindex/test_file.py b/clang/bindings/python/tests/cindex/test_file.py index a8c1dbf558543..2be9b9e332611 100644 --- a/clang/bindings/python/tests/cindex/test_file.py +++ b/clang/bindings/python/tests/cindex/test_file.py @@ -9,6 +9,7 @@ inputs_dir = os.path.join(os.path.dirname(__file__), "INPUTS") + class TestFile(unittest.TestCase): def test_file(self): index = Index.create() diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e1fe22393eebb..ec1e1e7334d90 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -329,6 +329,9 @@ Non-comprehensive list of changes in this release ``__reference_constructs_from_temporary`` should be used instead. (#GH44056) - Added `__builtin_get_vtable_pointer` to directly load the primary vtable pointer from a polymorphic object. +- ``libclang`` receives a family of new bindings to query basic facts about + GCC-style inline assembly blocks, including whether the block is ``volatile`` + and its template string following the LLVM IR ``asm`` format. (#GH143424) - Clang no longer rejects reinterpret_cast conversions between indirect ARC-managed pointers and other pointer types. The prior behavior was overly strict and inconsistent with the ARC specification. @@ -644,7 +647,7 @@ Improvements to Clang's diagnostics #GH69470, #GH59391, #GH58172, #GH46215, #GH45915, #GH45891, #GH44490, #GH36703, #GH32903, #GH23312, #GH69874. - + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index e4cb4327fbaac..c35311c886413 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -36,7 +36,7 @@ #define CINDEX_VERSION_MAJOR 0 #define CINDEX_VERSION_MINOR 64 -#define CINDEX_VERSION_ENCODE(major, minor) (((major)*10000) + ((minor)*1)) +#define CINDEX_VERSION_ENCODE(major, minor) (((major) * 10000) + ((minor) * 1)) #define CINDEX_VERSION \ CINDEX_VERSION_ENCODE(CINDEX_VERSION_MAJOR, CINDEX_VERSION_MINOR) @@ -4495,6 +4495,129 @@ CINDEX_LINKAGE CXStringSet *clang_Cursor_getCXXManglings(CXCursor); */ CINDEX_LINKAGE CXStringSet *clang_Cursor_getObjCManglings(CXCursor); +/** + * @} + */ + +/** + * \defgroup CINDEX_MODULE Inline Assembly introspection + * + * The functions in this group provide access to information about GCC-style + * inline assembly statements. + * + * @{ + */ + +/** + * Given a CXCursor_GCCAsmStmt cursor, return the assembly template string. + * As per LLVM IR Assembly Template language, template placeholders for + * inputs and outputs are either of the form $N where N is a decimal number + * as an index into the input-output specification, + * or ${N:M} where N is a decimal number also as an index into the + * input-output specification and M is the template argument modifier. + * The index N in both cases points into the the total inputs and outputs, + * or more specifically, into the list of outputs followed by the inputs, + * starting from index 0 as the first available template argument. + * + * This function also returns a valid empty string if the cursor does not point + * at a GCC inline assembly block. + * + * Users are responsible for releasing the allocation of returned string via + * \c clang_disposeString. + */ + +CINDEX_LINKAGE CXString clang_Cursor_getGCCAssemblyTemplate(CXCursor); + +/** + * Given a CXCursor_GCCAsmStmt cursor, check if the assembly block has goto + * labels. + * This function also returns 0 if the cursor does not point at a GCC inline + * assembly block. + */ + +CINDEX_LINKAGE unsigned clang_Cursor_isGCCAssemblyHasGoto(CXCursor); + +/** + * Given a CXCursor_GCCAsmStmt cursor, count the number of outputs. + * This function also returns 0 if the cursor does not point at a GCC inline + * assembly block. + */ + +CINDEX_LINKAGE unsigned clang_Cursor_getGCCAssemblyNumOutputs(CXCursor); + +/** + * Given a CXCursor_GCCAsmStmt cursor, count the number of inputs. + * This function also returns 0 if the cursor does not point at a GCC inline + * assembly block. + */ + +CINDEX_LINKAGE unsigned clang_Cursor_getGCCAssemblyNumInputs(CXCursor); + +/** + * Given a CXCursor_GCCAsmStmt cursor, get the constraint and expression cursor + * to the Index-th input. + * This function returns 1 when the cursor points at a GCC inline assembly + * statement, `Index` is within bounds and both the `Constraint` and `Expr` are + * not NULL. + * Otherwise, this function returns 0 but leaves `Constraint` and `Expr` + * intact. + * + * Users are responsible for releasing the allocation of `Constraint` via + * \c clang_disposeString. + */ + +CINDEX_LINKAGE unsigned clang_Cursor_getGCCAssemblyInput(CXCursor Cursor, + unsigned Index, + CXString *Constraint, + CXCursor *Expr); + +/** + * Given a CXCursor_GCCAsmStmt cursor, get the constraint and expression cursor + * to the Index-th output. + * This function returns 1 when the cursor points at a GCC inline assembly + * statement, `Index` is within bounds and both the `Constraint` and `Expr` are + * not NULL. + * Otherwise, this function returns 0 but leaves `Constraint` and `Expr` + * intact. + * + * Users are responsible for releasing the allocation of `Constraint` via + * \c clang_disposeString. + */ + +CINDEX_LINKAGE unsigned clang_Cursor_getGCCAssemblyOutput(CXCursor Cursor, + unsigned Index, + CXString *Constraint, + CXCursor *Expr); + +/** + * Given a CXCursor_GCCAsmStmt cursor, count the clobbers in it. + * This function also returns 0 if the cursor does not point at a GCC inline + * assembly block. + */ + +CINDEX_LINKAGE unsigned clang_Cursor_getGCCAssemblyNumClobbers(CXCursor Cursor); + +/** + * Given a CXCursor_GCCAsmStmt cursor, get the Index-th clobber of it. + * This function returns a valid empty string if the cursor does not point + * at a GCC inline assembly block or `Index` is out of bounds. + * + * Users are responsible for releasing the allocation of returned string via + * \c clang_disposeString. + */ + +CINDEX_LINKAGE CXString clang_Cursor_getGCCAssemblyClobber(CXCursor Cursor, + unsigned Index); + +/** + * Given a CXCursor_GCCAsmStmt cursor, check if the inline assembly is + * `volatile`. + * This function returns 0 if the cursor does not point at a GCC inline + * assembly block. + */ + +CINDEX_LINKAGE unsigned clang_Cursor_isGCCAssemblyVolatile(CXCursor Cursor); + /** * @} */ diff --git a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h index 8b263b16d5b1e..8fcc6a44027a0 100644 --- a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h +++ b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h @@ -168,8 +168,6 @@ class RecordStorageLocation final : public StorageLocation { return {Children.begin(), Children.end()}; } - bool hasChild(const ValueDecl &D) const { return Children.contains(&D); } - private: FieldToLoc Children; SyntheticFieldMap SyntheticFields; diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 8867a9fe09fb9..909e35792b461 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -155,6 +155,13 @@ TARGET_HEADER_BUILTIN(_InterlockedIncrement64, "LLiLLiD*", "nh", INTRIN_H, TARGET_HEADER_BUILTIN(_InterlockedOr64, "LLiLLiD*LLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor64, "LLiLLiD*LLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAdd_acq, "NiNiD*Ni", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAdd_rel, "NiNiD*Ni", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAdd_nf, "NiNiD*Ni", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAdd64_acq, "LLiLLiD*LLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAdd64_rel, "LLiLLiD*LLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAdd64_nf, "LLiLLiD*LLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") + TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_acq, "NiNiD*Ni", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_rel, "NiNiD*Ni", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_nf, "NiNiD*Ni", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index edb3a17ac07c6..1d1f5a4ee3f9f 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -642,5 +642,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16 TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 099500754a0e0..7c278d6841c74 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -535,6 +535,12 @@ TARGET_BUILTIN(__builtin_ppc_bcdadd_p, "iiV16UcV16Uc", "", TARGET_BUILTIN(__builtin_ppc_bcdsub_p, "iiV16UcV16Uc", "", "isa-v207-instructions") +// P9 Binary-coded decimal (BCD) builtins. +TARGET_BUILTIN(__builtin_ppc_national2packed, "V16UcV16UcUc", "t", "power9-vector") +TARGET_BUILTIN(__builtin_ppc_packed2national, "V16UcV16Uc", "", "power9-vector") +TARGET_BUILTIN(__builtin_ppc_packed2zoned, "V16UcV16UcUc", "t", "power9-vector") +TARGET_BUILTIN(__builtin_ppc_zoned2packed, "V16UcV16UcUc", "t", "power9-vector") + TARGET_BUILTIN(__builtin_altivec_vclzlsbb, "SiV16Uc", "", "power9-vector") TARGET_BUILTIN(__builtin_altivec_vctzlsbb, "SiV16Uc", "", "power9-vector") TARGET_BUILTIN(__builtin_altivec_vprtybw, "V4UiV4Ui", "", "power9-vector") diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9392cbb39c021..5062505cf3c01 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1767,7 +1767,9 @@ def note_unsatisfied_trait : Note<"%0 is not %enum_select{" "%TriviallyRelocatable{trivially relocatable}|" "%Replaceable{replaceable}|" - "%TriviallyCopyable{trivially copyable}" + "%TriviallyCopyable{trivially copyable}|" + "%Empty{empty}|" + "%StandardLayout{standard-layout}" "}1">; def note_unsatisfied_trait_reason @@ -1787,6 +1789,16 @@ def note_unsatisfied_trait_reason "%NonReplaceableField{has a non-replaceable member %1 of type %2}|" "%NTCBase{has a non-trivially-copyable base %1}|" "%NTCField{has a non-trivially-copyable member %1 of type %2}|" + "%NonEmptyMember{has a non-static data member %1 of type %2}|" + "%VirtualFunction{has a virtual function %1}|" + "%NonEmptyBase{has a base class %1 that is not empty}|" + "%NonZeroLengthField{field %1 is a non-zero-length bit-field}|" + "%NonStandardLayoutBase{has a non-standard-layout base %1}|" + "%MixedAccess{has mixed access specifiers}|" + "%MixedAccessField{field %1 has a different access specifier than field %2}|" + "%MultipleDataBase{has multiple base classes with data members}|" + "%NonStandardLayoutMember{has a non-standard-layout member %1 of type %2}|" + "%IndirectBaseWithFields{has an indirect base %1 with data members}|" "%DeletedDtr{has a %select{deleted|user-provided}1 destructor}|" "%UserProvidedCtr{has a user provided %select{copy|move}1 " "constructor}|" diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index decba83251df2..ef77c46b011f7 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -1737,16 +1737,40 @@ def GetMemberOp : CIR_Op<"get_member"> { def FuncOp : CIR_Op<"func", [ AutomaticAllocationScope, CallableOpInterface, FunctionOpInterface, + DeclareOpInterfaceMethods, IsolatedFromAbove ]> { let summary = "Declare or define a function"; let description = [{ The `cir.func` operation defines a function, similar to the `mlir::FuncOp` built-in. + + The function linkage information is specified by `linkage`, as defined by + `GlobalLinkageKind` attribute. + + Example: + + ```mlir + // External function definitions. + cir.func @abort() + + // A function with internal linkage. + cir.func internal @count(%x: i64) -> (i64) + return %x : i64 + + // Linkage information + cir.func linkonce_odr @some_method(...) + ``` }]; let arguments = (ins SymbolNameAttr:$sym_name, + CIR_VisibilityAttr:$global_visibility, TypeAttrOf:$function_type, + UnitAttr:$dso_local, + DefaultValuedAttr:$linkage, + OptionalAttr:$sym_visibility, + UnitAttr:$comdat, OptionalAttr:$arg_attrs, OptionalAttr:$res_attrs); @@ -1754,8 +1778,10 @@ def FuncOp : CIR_Op<"func", [ let skipDefaultBuilders = 1; - let builders = [OpBuilder<(ins "llvm::StringRef":$sym_name, - "FuncType":$type)>]; + let builders = [OpBuilder<(ins + "llvm::StringRef":$sym_name, "FuncType":$type, + CArg<"cir::GlobalLinkageKind", "cir::GlobalLinkageKind::ExternalLinkage">:$linkage) + >]; let extraClassDeclaration = [{ /// Returns the region on the current operation that is callable. This may @@ -2371,6 +2397,64 @@ def ComplexCreateOp : CIR_Op<"complex.create", [Pure, SameTypeOperands]> { let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// ComplexRealOp +//===----------------------------------------------------------------------===// + +def ComplexRealOp : CIR_Op<"complex.real", [Pure]> { + let summary = "Extract the real part of a complex value"; + let description = [{ + `cir.complex.real` operation takes an operand of `!cir.complex` type and + yields the real part of it. + + Example: + + ```mlir + %1 = cir.complex.real %0 : !cir.complex -> !cir.float + ``` + }]; + + let results = (outs CIR_AnyIntOrFloatType:$result); + let arguments = (ins CIR_ComplexType:$operand); + + let assemblyFormat = [{ + $operand `:` qualified(type($operand)) `->` qualified(type($result)) + attr-dict + }]; + + let hasVerifier = 1; + let hasFolder = 1; +} + +//===----------------------------------------------------------------------===// +// ComplexImagOp +//===----------------------------------------------------------------------===// + +def ComplexImagOp : CIR_Op<"complex.imag", [Pure]> { + let summary = "Extract the imaginary part of a complex value"; + let description = [{ + `cir.complex.imag` operation takes an operand of `!cir.complex` type and + yields the imaginary part of it. + + Example: + + ```mlir + %1 = cir.complex.imag %0 : !cir.complex -> !cir.float + ``` + }]; + + let results = (outs CIR_AnyIntOrFloatType:$result); + let arguments = (ins CIR_ComplexType:$operand); + + let assemblyFormat = [{ + $operand `:` qualified(type($operand)) `->` qualified(type($result)) + attr-dict + }]; + + let hasVerifier = 1; + let hasFolder = 1; +} + //===----------------------------------------------------------------------===// // Assume Operations //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index fb5014a877151..9e8944d1114b8 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -72,16 +72,18 @@ struct MissingFeatures { // FuncOp handling static bool opFuncOpenCLKernelMetadata() { return false; } + static bool opFuncAstDeclAttr() { return false; } static bool opFuncCallingConv() { return false; } static bool opFuncExtraAttrs() { return false; } - static bool opFuncDsoLocal() { return false; } - static bool opFuncLinkage() { return false; } - static bool opFuncVisibility() { return false; } static bool opFuncNoProto() { return false; } static bool opFuncCPUAndFeaturesAttributes() { return false; } static bool opFuncSection() { return false; } - static bool opFuncSetComdat() { return false; } + static bool opFuncMultipleReturnVals() { return false; } static bool opFuncAttributesForDefinition() { return false; } + static bool opFuncMaybeHandleStaticInExternC() { return false; } + static bool opFuncGlobalAliases() { return false; } + static bool setLLVMFunctionFEnvAttributes() { return false; } + static bool setFunctionAttributes() { return false; } // CallOp handling static bool opCallPseudoDtor() { return false; } @@ -157,92 +159,92 @@ struct MissingFeatures { static bool addressPointerAuthInfo() { return false; } // Misc - static bool cirgenABIInfo() { return false; } static bool abiArgInfo() { return false; } - static bool tryEmitAsConstant() { return false; } - static bool constructABIArgDirectExtend() { return false; } - static bool opGlobalViewAttr() { return false; } - static bool lowerModeOptLevel() { return false; } - static bool opTBAA() { return false; } - static bool objCLifetime() { return false; } - static bool objCBlocks() { return false; } - static bool emitNullabilityCheck() { return false; } - static bool emitLValueAlignmentAssumption() { return false; } - static bool emitLifetimeMarkers() { return false; } - static bool astVarDeclInterface() { return false; } - static bool stackSaveOp() { return false; } + static bool addHeapAllocSiteMetadata() { return false; } static bool aggValueSlot() { return false; } - static bool aggValueSlotMayOverlap() { return false; } - static bool aggValueSlotVolatile() { return false; } - static bool aggValueSlotDestructedFlag() { return false; } static bool aggValueSlotAlias() { return false; } + static bool aggValueSlotDestructedFlag() { return false; } static bool aggValueSlotGC() { return false; } - static bool generateDebugInfo() { return false; } - static bool pointerOverflowSanitizer() { return false; } - static bool fpConstraints() { return false; } - static bool sanitizers() { return false; } - static bool addHeapAllocSiteMetadata() { return false; } - static bool targetCIRGenInfoArch() { return false; } - static bool targetCIRGenInfoOS() { return false; } - static bool targetCodeGenInfoGetNullPointer() { return false; } - static bool loopInfoStack() { return false; } - static bool requiresCleanups() { return false; } - static bool createProfileWeightsForLoop() { return false; } - static bool emitCondLikelihoodViaExpectIntrinsic() { return false; } - static bool pgoUse() { return false; } - static bool cgFPOptionsRAII() { return false; } - static bool metaDataNode() { return false; } - static bool fastMathFlags() { return false; } + static bool aggValueSlotMayOverlap() { return false; } + static bool aggValueSlotVolatile() { return false; } static bool alignCXXRecordDecl() { return false; } - static bool setNonGC() { return false; } - static bool incrementProfileCounter() { return false; } - static bool insertBuiltinUnpredictable() { return false; } - static bool objCGC() { return false; } - static bool weakRefReference() { return false; } - static bool hip() { return false; } - static bool setObjCGCLValueClass() { return false; } - static bool setDLLStorageClass() { return false; } - static bool openMP() { return false; } - static bool emitCheckedInBoundsGEP() { return false; } - static bool preservedAccessIndexRegion() { return false; } + static bool armComputeVolatileBitfields() { return false; } + static bool asmLabelAttr() { return false; } + static bool astVarDeclInterface() { return false; } + static bool attributeNoBuiltin() { return false; } static bool bitfields() { return false; } - static bool msabi() { return false; } - static bool typeChecks() { return false; } - static bool lambdaFieldToName() { return false; } - static bool moduleNameHash() { return false; } - static bool constantFoldSwitchStatement() { return false; } - static bool cudaSupport() { return false; } - static bool maybeHandleStaticInExternC() { return false; } + static bool builtinCall() { return false; } + static bool builtinCallF128() { return false; } + static bool builtinCallMathErrno() { return false; } + static bool cgFPOptionsRAII() { return false; } + static bool cirgenABIInfo() { return false; } + static bool cleanupAfterErrorDiags() { return false; } + static bool cleanupsToDeactivate() { return false; } static bool constEmitterArrayILE() { return false; } static bool constEmitterVectorILE() { return false; } - static bool needsGlobalCtorDtor() { return false; } - static bool emitTypeCheck() { return false; } - static bool writebacks() { return false; } - static bool cleanupsToDeactivate() { return false; } - static bool stackBase() { return false; } - static bool deferredCXXGlobalInit() { return false; } - static bool setTargetAttributes() { return false; } + static bool constantFoldSwitchStatement() { return false; } + static bool constructABIArgDirectExtend() { return false; } static bool coverageMapping() { return false; } - static bool peepholeProtection() { return false; } - static bool instrumentation() { return false; } - static bool cleanupAfterErrorDiags() { return false; } + static bool createProfileWeightsForLoop() { return false; } + static bool ctorMemcpyizer() { return false; } + static bool cudaSupport() { return false; } static bool cxxRecordStaticMembers() { return false; } - static bool isMemcpyEquivalentSpecialMember() { return false; } - static bool isTrivialCtorOrDtor() { return false; } + static bool dataLayoutTypeAllocSize() { return false; } + static bool deferredCXXGlobalInit() { return false; } + static bool emitCheckedInBoundsGEP() { return false; } + static bool emitCondLikelihoodViaExpectIntrinsic() { return false; } + static bool emitLifetimeMarkers() { return false; } + static bool emitLValueAlignmentAssumption() { return false; } + static bool emitNullabilityCheck() { return false; } + static bool emitTypeCheck() { return false; } + static bool fastMathFlags() { return false; } + static bool fpConstraints() { return false; } + static bool generateDebugInfo() { return false; } + static bool hip() { return false; } static bool implicitConstructorArgs() { return false; } + static bool incrementProfileCounter() { return false; } + static bool insertBuiltinUnpredictable() { return false; } + static bool instrumentation() { return false; } static bool intrinsics() { return false; } - static bool attributeNoBuiltin() { return false; } - static bool thunks() { return false; } - static bool runCleanupsScope() { return false; } + static bool isMemcpyEquivalentSpecialMember() { return false; } + static bool isTrivialCtorOrDtor() { return false; } + static bool lambdaFieldToName() { return false; } + static bool loopInfoStack() { return false; } static bool lowerAggregateLoadStore() { return false; } - static bool dataLayoutTypeAllocSize() { return false; } - static bool asmLabelAttr() { return false; } - static bool builtinCall() { return false; } - static bool builtinCallF128() { return false; } - static bool builtinCallMathErrno() { return false; } + static bool lowerModeOptLevel() { return false; } + static bool maybeHandleStaticInExternC() { return false; } + static bool metaDataNode() { return false; } + static bool moduleNameHash() { return false; } + static bool msabi() { return false; } + static bool needsGlobalCtorDtor() { return false; } static bool nonFineGrainedBitfields() { return false; } - static bool armComputeVolatileBitfields() { return false; } - static bool ctorMemcpyizer() { return false; } + static bool objCBlocks() { return false; } + static bool objCGC() { return false; } + static bool objCLifetime() { return false; } + static bool openMP() { return false; } + static bool opGlobalViewAttr() { return false; } + static bool opTBAA() { return false; } + static bool peepholeProtection() { return false; } + static bool pgoUse() { return false; } + static bool pointerOverflowSanitizer() { return false; } + static bool preservedAccessIndexRegion() { return false; } + static bool requiresCleanups() { return false; } + static bool runCleanupsScope() { return false; } + static bool sanitizers() { return false; } + static bool setDLLStorageClass() { return false; } + static bool setNonGC() { return false; } + static bool setObjCGCLValueClass() { return false; } + static bool setTargetAttributes() { return false; } + static bool stackBase() { return false; } + static bool stackSaveOp() { return false; } + static bool targetCIRGenInfoArch() { return false; } + static bool targetCIRGenInfoOS() { return false; } + static bool targetCodeGenInfoGetNullPointer() { return false; } + static bool thunks() { return false; } + static bool tryEmitAsConstant() { return false; } + static bool typeChecks() { return false; } + static bool weakRefReference() { return false; } + static bool writebacks() { return false; } // Missing types static bool dataMemberType() { return false; } diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 0ec1cb4d0c5d8..dae12a6015439 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2304,7 +2304,9 @@ class Preprocessor { /// Check whether the next pp-token is one of the specificed token kind. this /// method should have no observable side-effect on the lexed tokens. - template bool isNextPPTokenOneOf() { + template bool isNextPPTokenOneOf(Ts... Ks) { + static_assert(sizeof...(Ts) > 0, + "requires at least one tok::TokenKind specified"); // Do some quick tests for rejection cases. std::optional Val; if (CurLexer) @@ -2335,7 +2337,7 @@ class Preprocessor { // Okay, we found the token and return. Otherwise we found the end of the // translation unit. - return Val->is(K) || (... || Val->is(Ks)); + return Val->isOneOf(Ks...); } private: diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index d4dfd7b44d9af..fc43e72593b94 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -101,11 +101,10 @@ class Token { /// "if (Tok.is(tok::l_brace)) {...}". bool is(tok::TokenKind K) const { return Kind == K; } bool isNot(tok::TokenKind K) const { return Kind != K; } - bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { - return is(K1) || is(K2); - } - template bool isOneOf(tok::TokenKind K1, Ts... Ks) const { - return is(K1) || isOneOf(Ks...); + template bool isOneOf(Ts... Ks) const { + static_assert(sizeof...(Ts) > 0, + "requires at least one tok::TokenKind specified"); + return (is(Ks) || ...); } /// Return true if this is a raw identifier (when lexing diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 7a4b7d21bb20e..7d4b4467eb97d 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -464,8 +464,6 @@ class ASTReader using ModuleReverseIterator = ModuleManager::ModuleReverseIterator; private: - using LocSeq = SourceLocationSequence; - /// The receiver of some callbacks invoked by ASTReader. std::unique_ptr Listener; @@ -2445,18 +2443,16 @@ class ASTReader /// Read a source location from raw form and return it in its /// originating module file's source location space. std::pair - ReadUntranslatedSourceLocation(RawLocEncoding Raw, - LocSeq *Seq = nullptr) const { - return SourceLocationEncoding::decode(Raw, Seq); + ReadUntranslatedSourceLocation(RawLocEncoding Raw) const { + return SourceLocationEncoding::decode(Raw); } /// Read a source location from raw form. - SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw, - LocSeq *Seq = nullptr) const { + SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw) const { if (!MF.ModuleOffsetMap.empty()) ReadModuleOffsetMap(MF); - auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq); + auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw); ModuleFile *OwningModuleFile = ModuleFileIndex == 0 ? &MF : MF.TransitiveImports[ModuleFileIndex - 1]; @@ -2484,9 +2480,9 @@ class ASTReader /// Read a source location. SourceLocation ReadSourceLocation(ModuleFile &ModuleFile, - const RecordDataImpl &Record, unsigned &Idx, - LocSeq *Seq = nullptr) { - return ReadSourceLocation(ModuleFile, Record[Idx++], Seq); + const RecordDataImpl &Record, + unsigned &Idx) { + return ReadSourceLocation(ModuleFile, Record[Idx++]); } /// Read a FileID. @@ -2505,7 +2501,7 @@ class ASTReader /// Read a source range. SourceRange ReadSourceRange(ModuleFile &F, const RecordData &Record, - unsigned &Idx, LocSeq *Seq = nullptr); + unsigned &Idx); static llvm::BitVector ReadBitVector(const RecordData &Record, const StringRef Blob); diff --git a/clang/include/clang/Serialization/ASTRecordReader.h b/clang/include/clang/Serialization/ASTRecordReader.h index da3f504ff27df..1472497ff5e7e 100644 --- a/clang/include/clang/Serialization/ASTRecordReader.h +++ b/clang/include/clang/Serialization/ASTRecordReader.h @@ -32,7 +32,6 @@ class OMPChildren; class ASTRecordReader : public serialization::DataStreamBasicReader { using ModuleFile = serialization::ModuleFile; - using LocSeq = SourceLocationSequence; ASTReader *Reader; ModuleFile *F; @@ -160,7 +159,7 @@ class ASTRecordReader TypeSourceInfo *readTypeSourceInfo(); /// Reads the location information for a type. - void readTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr); + void readTypeLoc(TypeLoc TL); /// Map a local type ID within a given AST file to a global type ID. serialization::TypeID getGlobalTypeID(serialization::TypeID LocalID) const { @@ -287,13 +286,13 @@ class ASTRecordReader void readOpenACCRoutineDeclAttr(OpenACCRoutineDeclAttr *A); /// Read a source location, advancing Idx. - SourceLocation readSourceLocation(LocSeq *Seq = nullptr) { - return Reader->ReadSourceLocation(*F, Record, Idx, Seq); + SourceLocation readSourceLocation() { + return Reader->ReadSourceLocation(*F, Record, Idx); } /// Read a source range, advancing Idx. - SourceRange readSourceRange(LocSeq *Seq = nullptr) { - return Reader->ReadSourceRange(*F, Record, Idx, Seq); + SourceRange readSourceRange() { + return Reader->ReadSourceRange(*F, Record, Idx); } /// Read an arbitrary constant value, advancing Idx. diff --git a/clang/include/clang/Serialization/ASTRecordWriter.h b/clang/include/clang/Serialization/ASTRecordWriter.h index 964c9e6ea8a25..ee005ec287708 100644 --- a/clang/include/clang/Serialization/ASTRecordWriter.h +++ b/clang/include/clang/Serialization/ASTRecordWriter.h @@ -29,7 +29,6 @@ class TypeLoc; /// An object for streaming information to a record. class ASTRecordWriter : public serialization::DataStreamBasicWriter { - using LocSeq = SourceLocationSequence; ASTWriter *Writer; ASTWriter::RecordDataImpl *Record; @@ -147,8 +146,8 @@ class ASTRecordWriter void AddFunctionDefinition(const FunctionDecl *FD); /// Emit a source location. - void AddSourceLocation(SourceLocation Loc, LocSeq *Seq = nullptr) { - return Writer->AddSourceLocation(Loc, *Record, Seq); + void AddSourceLocation(SourceLocation Loc) { + return Writer->AddSourceLocation(Loc, *Record); } void writeSourceLocation(SourceLocation Loc) { AddSourceLocation(Loc); @@ -174,8 +173,8 @@ class ASTRecordWriter } /// Emit a source range. - void AddSourceRange(SourceRange Range, LocSeq *Seq = nullptr) { - return Writer->AddSourceRange(Range, *Record, Seq); + void AddSourceRange(SourceRange Range) { + return Writer->AddSourceRange(Range, *Record); } void writeBool(bool Value) { @@ -245,7 +244,7 @@ class ASTRecordWriter void AddTypeSourceInfo(TypeSourceInfo *TInfo); /// Emits source location information for a type. Does not emit the type. - void AddTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr); + void AddTypeLoc(TypeLoc TL); /// Emits a template argument location info. void AddTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind, diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 97679ace8b610..162be84bbda19 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -115,8 +115,6 @@ class ASTWriter : public ASTDeserializationListener, using TypeIdxMap = llvm::DenseMap; - using LocSeq = SourceLocationSequence; - /// The bitstream writer used to emit this precompiled header. llvm::BitstreamWriter &Stream; @@ -733,16 +731,14 @@ class ASTWriter : public ASTDeserializationListener, void AddFileID(FileID FID, RecordDataImpl &Record); /// Emit a source location. - void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, - LocSeq *Seq = nullptr); + void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record); /// Return the raw encodings for source locations. SourceLocationEncoding::RawLocEncoding - getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr); + getRawSourceLocationEncoding(SourceLocation Loc); /// Emit a source range. - void AddSourceRange(SourceRange Range, RecordDataImpl &Record, - LocSeq *Seq = nullptr); + void AddSourceRange(SourceRange Range, RecordDataImpl &Record); /// Emit a reference to an identifier. void AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record); diff --git a/clang/include/clang/Serialization/SourceLocationEncoding.h b/clang/include/clang/Serialization/SourceLocationEncoding.h index 33ca1728fa479..5b2485dbc719f 100644 --- a/clang/include/clang/Serialization/SourceLocationEncoding.h +++ b/clang/include/clang/Serialization/SourceLocationEncoding.h @@ -25,20 +25,17 @@ // * C: The macro bit. We rotate it to the lowest bit so that we can save some // space in case the index of the module file is 0. // -// Specially, if the index of the module file is 0, we allow to encode a -// sequence of locations we store only differences between successive elements. // //===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H +#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H + #include "clang/Basic/SourceLocation.h" #include "llvm/Support/MathExtras.h" #include -#ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H -#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H - namespace clang { -class SourceLocationSequence; /// Serialized encoding of SourceLocations without context. /// Optimized to have small unsigned values (=> small after VBR encoding). @@ -54,119 +51,22 @@ class SourceLocationEncoding { static UIntTy decodeRaw(UIntTy Raw) { return (Raw >> 1) | (Raw << (UIntBits - 1)); } - friend SourceLocationSequence; public: using RawLocEncoding = uint64_t; static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset, - unsigned BaseModuleFileIndex, - SourceLocationSequence * = nullptr); - static std::pair - decode(RawLocEncoding, SourceLocationSequence * = nullptr); -}; - -/// Serialized encoding of a sequence of SourceLocations. -/// -/// Optimized to produce small values when locations with the sequence are -/// similar. Each element can be delta-encoded against the last nonzero element. -/// -/// Sequences should be started by creating a SourceLocationSequence::State, -/// and then passed around as SourceLocationSequence*. Example: -/// -/// // establishes a sequence -/// void EmitTopLevelThing() { -/// SourceLocationSequence::State Seq; -/// EmitContainedThing(Seq); -/// EmitRecursiveThing(Seq); -/// } -/// -/// // optionally part of a sequence -/// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) { -/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); -/// } -/// -/// // establishes a sequence if there isn't one already -/// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) { -/// SourceLocationSequence::State Seq(ParentSeq); -/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); -/// EmitRecursiveThing(Seq); -/// } -/// -class SourceLocationSequence { - using UIntTy = SourceLocation::UIntTy; - using EncodedTy = uint64_t; - constexpr static auto UIntBits = SourceLocationEncoding::UIntBits; - static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!"); - - // Prev stores the rotated last nonzero location. - UIntTy &Prev; - - // Zig-zag encoding turns small signed integers into small unsigned integers. - // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ... - static UIntTy zigZag(UIntTy V) { - UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0); - return Sign ^ (V << 1); - } - static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); } - - SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {} - - EncodedTy encodeRaw(UIntTy Raw) { - if (Raw == 0) - return 0; - UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw); - if (Prev == 0) - return Prev = Rotated; - UIntTy Delta = Rotated - Prev; - Prev = Rotated; - // Exactly one 33 bit value is possible! (1 << 32). - // This is because we have two representations of zero: trivial & relative. - return 1 + EncodedTy{zigZag(Delta)}; - } - UIntTy decodeRaw(EncodedTy Encoded) { - if (Encoded == 0) - return 0; - if (Prev == 0) - return SourceLocationEncoding::decodeRaw(Prev = Encoded); - return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1)); - } - -public: - SourceLocation decode(EncodedTy Encoded) { - return SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); - } - EncodedTy encode(SourceLocation Loc) { - return encodeRaw(Loc.getRawEncoding()); - } - - class State; -}; - -/// This object establishes a SourceLocationSequence. -class SourceLocationSequence::State { - UIntTy Prev = 0; - SourceLocationSequence Seq; - -public: - // If Parent is provided and non-null, then this root becomes part of that - // enclosing sequence instead of establishing a new one. - State(SourceLocationSequence *Parent = nullptr) - : Seq(Parent ? Parent->Prev : Prev) {} - - // Implicit conversion for uniform use of roots vs propagated sequences. - operator SourceLocationSequence *() { return &Seq; } + unsigned BaseModuleFileIndex); + static std::pair decode(RawLocEncoding); }; inline SourceLocationEncoding::RawLocEncoding SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset, - unsigned BaseModuleFileIndex, - SourceLocationSequence *Seq) { + unsigned BaseModuleFileIndex) { // If the source location is a local source location, we can try to optimize // the similar sequences to only record the differences. if (!BaseOffset) - return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding()); - + return encodeRaw(Loc.getRawEncoding()); if (Loc.isInvalid()) return 0; @@ -183,13 +83,11 @@ SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset, return Encoded; } inline std::pair -SourceLocationEncoding::decode(RawLocEncoding Encoded, - SourceLocationSequence *Seq) { +SourceLocationEncoding::decode(RawLocEncoding Encoded) { unsigned ModuleFileIndex = Encoded >> 32; if (!ModuleFileIndex) - return {Seq ? Seq->decode(Encoded) - : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)), + return {SourceLocation::getFromRawEncoding(decodeRaw(Encoded)), ModuleFileIndex}; Encoded &= llvm::maskTrailingOnes(32); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h index f6a43bf5f493b..5dcf03f7a4648 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h @@ -554,6 +554,8 @@ class SimpleFunctionCall : public AnyFunctionCall { const FunctionDecl *getDecl() const override; + RuntimeDefinition getRuntimeDefinition() const override; + unsigned getNumArgs() const override { return getOriginExpr()->getNumArgs(); } const Expr *getArgExpr(unsigned Index) const override { diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 77145e2891a8a..05a5dc2d94256 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -89,6 +89,12 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector &Features, } static void defineXLCompatMacros(MacroBuilder &Builder) { + Builder.defineMacro("__builtin_national2packed", + "__builtin_ppc_national2packed"); + Builder.defineMacro("__builtin_packed2national", + "__builtin_ppc_packed2national"); + Builder.defineMacro("__builtin_packed2zoned", "__builtin_ppc_packed2zoned"); + Builder.defineMacro("__builtin_zoned2packed", "__builtin_ppc_zoned2packed"); Builder.defineMacro("__cdtbcd", "__builtin_ppc_cdtbcd"); Builder.defineMacro("__cbcdtd", "__builtin_ppc_cbcdtd"); Builder.defineMacro("__addg6s", "__builtin_ppc_addg6s"); diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index e38faba83b80c..ac62ea7c6aa16 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -366,6 +366,16 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { return create(loc, resultComplexTy, real, imag); } + mlir::Value createComplexReal(mlir::Location loc, mlir::Value operand) { + auto operandTy = mlir::cast(operand.getType()); + return create(loc, operandTy.getElementType(), operand); + } + + mlir::Value createComplexImag(mlir::Location loc, mlir::Value operand) { + auto operandTy = mlir::cast(operand.getType()); + return create(loc, operandTy.getElementType(), operand); + } + /// Create a cir.ptr_stride operation to get access to an array element. /// \p idx is the index of the element to access, \p shouldDecay is true if /// the result should decay to a pointer to the element type. diff --git a/clang/lib/CIR/CodeGen/CIRGenCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenCXX.cpp index 51751483d34e9..da507d6f28335 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCXX.cpp @@ -25,7 +25,7 @@ cir::FuncOp CIRGenModule::codegenCXXStructor(GlobalDecl gd) { cir::FuncType funcType = getTypes().getFunctionType(fnInfo); cir::FuncOp fn = getAddrOfCXXStructor(gd, &fnInfo, /*FnType=*/nullptr, /*DontDefer=*/true, ForDefinition); - assert(!cir::MissingFeatures::opFuncLinkage()); + setFunctionLinkage(gd, fn); CIRGenFunction cgf{*this, builder}; curCGF = &cgf; { diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 8d0db5cd0a1e5..7f8dcd96a6bff 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -603,6 +603,10 @@ class ScalarExprEmitter : public StmtVisitor { mlir::Value VisitUnaryLNot(const UnaryOperator *e); + mlir::Value VisitUnaryReal(const UnaryOperator *e); + + mlir::Value VisitUnaryImag(const UnaryOperator *e); + mlir::Value VisitCXXThisExpr(CXXThisExpr *te) { return cgf.loadCXXThis(); } /// Emit a conversion from the specified type to the specified destination @@ -1891,6 +1895,48 @@ mlir::Value ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *e) { return maybePromoteBoolResult(boolVal, cgf.convertType(e->getType())); } +mlir::Value ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *e) { + // TODO(cir): handle scalar promotion. + Expr *op = e->getSubExpr(); + if (op->getType()->isAnyComplexType()) { + // If it's an l-value, load through the appropriate subobject l-value. + // Note that we have to ask `e` because `op` might be an l-value that + // this won't work for, e.g. an Obj-C property. + if (e->isGLValue()) { + mlir::Location loc = cgf.getLoc(e->getExprLoc()); + mlir::Value complex = cgf.emitComplexExpr(op); + return cgf.builder.createComplexReal(loc, complex); + } + + // Otherwise, calculate and project. + cgf.cgm.errorNYI(e->getSourceRange(), + "VisitUnaryReal calculate and project"); + } + + return Visit(op); +} + +mlir::Value ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *e) { + // TODO(cir): handle scalar promotion. + Expr *op = e->getSubExpr(); + if (op->getType()->isAnyComplexType()) { + // If it's an l-value, load through the appropriate subobject l-value. + // Note that we have to ask `e` because `op` might be an l-value that + // this won't work for, e.g. an Obj-C property. + if (e->isGLValue()) { + mlir::Location loc = cgf.getLoc(e->getExprLoc()); + mlir::Value complex = cgf.emitComplexExpr(op); + return cgf.builder.createComplexImag(loc, complex); + } + + // Otherwise, calculate and project. + cgf.cgm.errorNYI(e->getSourceRange(), + "VisitUnaryImag calculate and project"); + } + + return Visit(op); +} + /// Return the size or alignment of the type of argument of the sizeof /// expression as an integer. mlir::Value ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr( @@ -1914,13 +1960,6 @@ mlir::Value ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr( return builder.getConstant( loc, builder.getAttr( cgf.cgm.UInt64Ty, llvm::APSInt(llvm::APInt(64, 1), true))); - } else if (e->getKind() == UETT_VectorElements) { - cgf.getCIRGenModule().errorNYI(e->getSourceRange(), - "sizeof operator for VectorElements", - e->getStmtClassName()); - return builder.getConstant( - loc, builder.getAttr( - cgf.cgm.UInt64Ty, llvm::APSInt(llvm::APInt(64, 1), true))); } return builder.getConstant( diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 68ab81ed53af9..f24bee44f26a7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -406,6 +406,16 @@ void CIRGenModule::emitGlobalFunctionDefinition(clang::GlobalDecl gd, /*DontDefer=*/true, ForDefinition); } + // Already emitted. + if (!funcOp.isDeclaration()) + return; + + setFunctionLinkage(gd, funcOp); + setGVProperties(funcOp, funcDecl); + assert(!cir::MissingFeatures::opFuncMaybeHandleStaticInExternC()); + maybeSetTrivialComdat(*funcDecl, funcOp); + assert(!cir::MissingFeatures::setLLVMFunctionFEnvAttributes()); + CIRGenFunction cgf(*this, builder); curCGF = &cgf; { @@ -413,7 +423,17 @@ void CIRGenModule::emitGlobalFunctionDefinition(clang::GlobalDecl gd, cgf.generateCode(gd, funcOp, funcType); } curCGF = nullptr; + + setNonAliasAttributes(gd, funcOp); assert(!cir::MissingFeatures::opFuncAttributesForDefinition()); + + if (const ConstructorAttr *ca = funcDecl->getAttr()) + errorNYI(funcDecl->getSourceRange(), "constructor attribute"); + if (const DestructorAttr *da = funcDecl->getAttr()) + errorNYI(funcDecl->getSourceRange(), "destructor attribute"); + + if (funcDecl->getAttr()) + errorNYI(funcDecl->getSourceRange(), "deferredAnnotations"); } mlir::Operation *CIRGenModule::getGlobalValue(StringRef name) { @@ -855,10 +875,12 @@ static bool shouldBeInCOMDAT(CIRGenModule &cgm, const Decl &d) { void CIRGenModule::maybeSetTrivialComdat(const Decl &d, mlir::Operation *op) { if (!shouldBeInCOMDAT(*this, d)) return; - if (auto globalOp = dyn_cast_or_null(op)) + if (auto globalOp = dyn_cast_or_null(op)) { globalOp.setComdat(true); - - assert(!cir::MissingFeatures::opFuncSetComdat()); + } else { + auto funcOp = cast(op); + funcOp.setComdat(true); + } } void CIRGenModule::updateCompletedType(const TagDecl *td) { @@ -1028,6 +1050,17 @@ CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) { return getCIRLinkageForDeclarator(vd, linkage, isConstant); } +cir::GlobalLinkageKind CIRGenModule::getFunctionLinkage(GlobalDecl gd) { + const auto *fd = cast(gd.getDecl()); + + GVALinkage linkage = astContext.GetGVALinkageForFunction(fd); + + if (const auto *dtor = dyn_cast(fd)) + errorNYI(fd->getSourceRange(), "getFunctionLinkage: CXXDestructorDecl"); + + return getCIRLinkageForDeclarator(fd, linkage, /*IsConstantVariable=*/false); +} + static cir::GlobalOp generateStringLiteral(mlir::Location loc, mlir::TypedAttr c, cir::GlobalLinkageKind lt, CIRGenModule &cgm, @@ -1534,6 +1567,27 @@ void CIRGenModule::setGVPropertiesAux(mlir::Operation *op, assert(!cir::MissingFeatures::opGlobalPartition()); } +void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl, + cir::FuncOp func, + bool isIncompleteFunction, + bool isThunk) { + // NOTE(cir): Original CodeGen checks if this is an intrinsic. In CIR we + // represent them in dedicated ops. The correct attributes are ensured during + // translation to LLVM. Thus, we don't need to check for them here. + + assert(!cir::MissingFeatures::setFunctionAttributes()); + assert(!cir::MissingFeatures::setTargetAttributes()); + + // TODO(cir): This needs a lot of work to better match CodeGen. That + // ultimately ends up in setGlobalVisibility, which already has the linkage of + // the LLVM GV (corresponding to our FuncOp) computed, so it doesn't have to + // recompute it here. This is a minimal fix for now. + if (!isLocalLinkage(getFunctionLinkage(globalDecl))) { + const Decl *decl = globalDecl.getDecl(); + func.setGlobalVisibilityAttr(getGlobalVisibilityAttrFromDecl(decl)); + } +} + cir::FuncOp CIRGenModule::getOrCreateCIRFunction( StringRef mangledName, mlir::Type funcType, GlobalDecl gd, bool forVTable, bool dontDefer, bool isThunk, ForDefinition_t isForDefinition, @@ -1576,8 +1630,9 @@ cir::FuncOp CIRGenModule::getOrCreateCIRFunction( // If there are two attempts to define the same mangled name, issue an // error. auto fn = cast(entry); - assert((!isForDefinition || !fn || !fn.isDeclaration()) && - "Duplicate function definition"); + if (isForDefinition && fn && !fn.isDeclaration()) { + errorNYI(d->getSourceRange(), "Duplicate function definition"); + } if (fn && fn.getFunctionType() == funcType) { return fn; } @@ -1598,6 +1653,9 @@ cir::FuncOp CIRGenModule::getOrCreateCIRFunction( invalidLoc ? theModule->getLoc() : getLoc(funcDecl->getSourceRange()), mangledName, mlir::cast(funcType), funcDecl); + if (d) + setFunctionAttributes(gd, funcOp, /*isIncompleteFunction=*/false, isThunk); + // 'dontDefer' actually means don't move this to the deferredDeclsToEmit list. if (dontDefer) { // TODO(cir): This assertion will need an additional condition when we @@ -1668,6 +1726,20 @@ CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name, func = builder.create(loc, name, funcType); + assert(!cir::MissingFeatures::opFuncAstDeclAttr()); + assert(!cir::MissingFeatures::opFuncNoProto()); + + assert(func.isDeclaration() && "expected empty body"); + + // A declaration gets private visibility by default, but external linkage + // as the default linkage. + func.setLinkageAttr(cir::GlobalLinkageKindAttr::get( + &getMLIRContext(), cir::GlobalLinkageKind::ExternalLinkage)); + mlir::SymbolTable::setSymbolVisibility( + func, mlir::SymbolTable::Visibility::Private); + + assert(!cir::MissingFeatures::opFuncExtraAttrs()); + if (!cgf) theModule.push_back(func); } diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 71806e3c5de21..9f6a57c31d291 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -268,6 +268,10 @@ class CIRGenModule : public CIRGenTypeCache { void setGVProperties(mlir::Operation *op, const NamedDecl *d) const; void setGVPropertiesAux(mlir::Operation *op, const NamedDecl *d) const; + /// Set function attributes for a function declaration. + void setFunctionAttributes(GlobalDecl gd, cir::FuncOp f, + bool isIncompleteFunction, bool isThunk); + void emitGlobalDefinition(clang::GlobalDecl gd, mlir::Operation *op = nullptr); void emitGlobalFunctionDefinition(clang::GlobalDecl gd, mlir::Operation *op); @@ -340,10 +344,16 @@ class CIRGenModule : public CIRGenTypeCache { clang::VisibilityAttr::VisibilityType visibility); cir::VisibilityAttr getGlobalVisibilityAttrFromDecl(const Decl *decl); static mlir::SymbolTable::Visibility getMLIRVisibility(cir::GlobalOp op); - + cir::GlobalLinkageKind getFunctionLinkage(GlobalDecl gd); cir::GlobalLinkageKind getCIRLinkageForDeclarator(const DeclaratorDecl *dd, GVALinkage linkage, bool isConstantVariable); + void setFunctionLinkage(GlobalDecl gd, cir::FuncOp f) { + cir::GlobalLinkageKind l = getFunctionLinkage(gd); + f.setLinkageAttr(cir::GlobalLinkageKindAttr::get(&getMLIRContext(), l)); + mlir::SymbolTable::setSymbolVisibility(f, + getMLIRVisibilityFromCIRLinkage(l)); + } cir::GlobalLinkageKind getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant); diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 27f4ecb5ab85d..17157561357f9 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -115,9 +115,26 @@ template struct EnumTraits {}; static unsigned getMaxEnumVal() { return cir::getMaxEnumValFor##Ty(); } \ } +REGISTER_ENUM_TYPE(GlobalLinkageKind); +REGISTER_ENUM_TYPE(VisibilityKind); REGISTER_ENUM_TYPE(SideEffect); } // namespace +/// Parse an enum from the keyword, or default to the provided default value. +/// The return type is the enum type by default, unless overriden with the +/// second template argument. +template +static RetTy parseOptionalCIRKeyword(AsmParser &parser, EnumTy defaultValue) { + llvm::SmallVector names; + for (unsigned i = 0, e = EnumTraits::getMaxEnumVal(); i <= e; ++i) + names.push_back(EnumTraits::stringify(static_cast(i))); + + int index = parseOptionalKeywordAlternative(parser, names); + if (index == -1) + return static_cast(defaultValue); + return static_cast(index); +} + /// Parse an enum from the keyword, return failure if the keyword is not found. template static ParseResult parseCIRKeyword(AsmParser &parser, RetTy &result) { @@ -170,6 +187,26 @@ static bool omitRegionTerm(mlir::Region &r) { return singleNonEmptyBlock && yieldsNothing(); } +void printVisibilityAttr(OpAsmPrinter &printer, + cir::VisibilityAttr &visibility) { + switch (visibility.getValue()) { + case cir::VisibilityKind::Hidden: + printer << "hidden"; + break; + case cir::VisibilityKind::Protected: + printer << "protected"; + break; + case cir::VisibilityKind::Default: + break; + } +} + +void parseVisibilityAttr(OpAsmParser &parser, cir::VisibilityAttr &visibility) { + cir::VisibilityKind visibilityKind = + parseOptionalCIRKeyword(parser, cir::VisibilityKind::Default); + visibility = cir::VisibilityAttr::get(parser.getContext(), visibilityKind); +} + //===----------------------------------------------------------------------===// // CIR Custom Parsers/Printers //===----------------------------------------------------------------------===// @@ -1287,19 +1324,54 @@ cir::GetGlobalOp::verifySymbolUses(SymbolTableCollection &symbolTable) { // FuncOp //===----------------------------------------------------------------------===// +/// Returns the name used for the linkage attribute. This *must* correspond to +/// the name of the attribute in ODS. +static llvm::StringRef getLinkageAttrNameString() { return "linkage"; } + void cir::FuncOp::build(OpBuilder &builder, OperationState &result, - StringRef name, FuncType type) { + StringRef name, FuncType type, + GlobalLinkageKind linkage) { result.addRegion(); result.addAttribute(SymbolTable::getSymbolAttrName(), builder.getStringAttr(name)); result.addAttribute(getFunctionTypeAttrName(result.name), TypeAttr::get(type)); + result.addAttribute( + getLinkageAttrNameString(), + GlobalLinkageKindAttr::get(builder.getContext(), linkage)); + result.addAttribute(getGlobalVisibilityAttrName(result.name), + cir::VisibilityAttr::get(builder.getContext())); } ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) { llvm::SMLoc loc = parser.getCurrentLocation(); mlir::Builder &builder = parser.getBuilder(); + mlir::StringAttr visNameAttr = getSymVisibilityAttrName(state.name); + mlir::StringAttr visibilityNameAttr = getGlobalVisibilityAttrName(state.name); + mlir::StringAttr dsoLocalNameAttr = getDsoLocalAttrName(state.name); + + // Default to external linkage if no keyword is provided. + state.addAttribute(getLinkageAttrNameString(), + GlobalLinkageKindAttr::get( + parser.getContext(), + parseOptionalCIRKeyword( + parser, GlobalLinkageKind::ExternalLinkage))); + + ::llvm::StringRef visAttrStr; + if (parser.parseOptionalKeyword(&visAttrStr, {"private", "public", "nested"}) + .succeeded()) { + state.addAttribute(visNameAttr, + parser.getBuilder().getStringAttr(visAttrStr)); + } + + cir::VisibilityAttr cirVisibilityAttr; + parseVisibilityAttr(parser, cirVisibilityAttr); + state.addAttribute(visibilityNameAttr, cirVisibilityAttr); + + if (parser.parseOptionalKeyword(dsoLocalNameAttr).succeeded()) + state.addAttribute(dsoLocalNameAttr, parser.getBuilder().getUnitAttr()); + StringAttr nameAttr; if (parser.parseSymbolName(nameAttr, SymbolTable::getSymbolAttrName(), state.attributes)) @@ -1346,10 +1418,14 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) { return success(); } +// This function corresponds to `llvm::GlobalValue::isDeclaration` and should +// have a similar implementation. We don't currently support aliases, ifuncs, +// or materializable functions, but those should be handled here as they are +// implemented. bool cir::FuncOp::isDeclaration() { - // TODO(CIR): This function will actually do something once external - // function declarations and aliases are upstreamed. - return false; + assert(!cir::MissingFeatures::opFuncGlobalAliases()); + assert(!cir::MissingFeatures::supportIFuncAttr()); + return getFunctionBody().empty(); } mlir::Region *cir::FuncOp::getCallableRegion() { @@ -1359,6 +1435,25 @@ mlir::Region *cir::FuncOp::getCallableRegion() { } void cir::FuncOp::print(OpAsmPrinter &p) { + if (getComdat()) + p << " comdat"; + + if (getLinkage() != GlobalLinkageKind::ExternalLinkage) + p << ' ' << stringifyGlobalLinkageKind(getLinkage()); + + mlir::SymbolTable::Visibility vis = getVisibility(); + if (vis != mlir::SymbolTable::Visibility::Public) + p << ' ' << vis; + + cir::VisibilityAttr cirVisibilityAttr = getGlobalVisibilityAttr(); + if (!cirVisibilityAttr.isDefault()) { + p << ' '; + printVisibilityAttr(p, cirVisibilityAttr); + } + + if (getDsoLocal()) + p << " dso_local"; + p << ' '; p.printSymbolName(getSymName()); cir::FuncType fnType = getFunctionType(); @@ -1914,6 +2009,42 @@ OpFoldResult cir::ComplexCreateOp::fold(FoldAdaptor adaptor) { return cir::ConstComplexAttr::get(realAttr, imagAttr); } +//===----------------------------------------------------------------------===// +// ComplexRealOp +//===----------------------------------------------------------------------===// + +LogicalResult cir::ComplexRealOp::verify() { + if (getType() != getOperand().getType().getElementType()) { + emitOpError() << ": result type does not match operand type"; + return failure(); + } + return success(); +} + +OpFoldResult cir::ComplexRealOp::fold(FoldAdaptor adaptor) { + auto complex = + mlir::cast_if_present(adaptor.getOperand()); + return complex ? complex.getReal() : nullptr; +} + +//===----------------------------------------------------------------------===// +// ComplexImagOp +//===----------------------------------------------------------------------===// + +LogicalResult cir::ComplexImagOp::verify() { + if (getType() != getOperand().getType().getElementType()) { + emitOpError() << ": result type does not match operand type"; + return failure(); + } + return success(); +} + +OpFoldResult cir::ComplexImagOp::fold(FoldAdaptor adaptor) { + auto complex = + mlir::cast_if_present(adaptor.getOperand()); + return complex ? complex.getImag() : nullptr; +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp index f07e234e5e84c..e505db50d3609 100644 --- a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp +++ b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp @@ -141,8 +141,9 @@ void CIRCanonicalizePass::runOnOperation() { // Many operations are here to perform a manual `fold` in // applyOpPatternsGreedily. if (isa(op)) + ComplexCreateOp, ComplexImagOp, ComplexRealOp, VecCmpOp, + VecCreateOp, VecExtractOp, VecShuffleOp, VecShuffleDynamicOp, + VecTernaryOp>(op)) ops.push_back(op); }); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index a870e6c45b69d..1c13c88902d9a 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -97,6 +97,18 @@ static mlir::Value createIntCast(mlir::OpBuilder &bld, mlir::Value src, return bld.create(loc, dstTy, src); } +static mlir::LLVM::Visibility +lowerCIRVisibilityToLLVMVisibility(cir::VisibilityKind visibilityKind) { + switch (visibilityKind) { + case cir::VisibilityKind::Default: + return ::mlir::LLVM::Visibility::Default; + case cir::VisibilityKind::Hidden: + return ::mlir::LLVM::Visibility::Hidden; + case cir::VisibilityKind::Protected: + return ::mlir::LLVM::Visibility::Protected; + } +} + /// Emits the value from memory as expected by its users. Should be called when /// the memory represetnation of a CIR type is not equal to its scalar /// representation. @@ -1014,9 +1026,12 @@ void CIRToLLVMFuncOpLowering::lowerFuncAttributes( SmallVectorImpl &result) const { assert(!cir::MissingFeatures::opFuncCallingConv()); for (mlir::NamedAttribute attr : func->getAttrs()) { + assert(!cir::MissingFeatures::opFuncCallingConv()); if (attr.getName() == mlir::SymbolTable::getSymbolAttrName() || attr.getName() == func.getFunctionTypeAttrName() || attr.getName() == getLinkageAttrNameString() || + attr.getName() == func.getGlobalVisibilityAttrName() || + attr.getName() == func.getDsoLocalAttrName() || (filterArgAndResAttrs && (attr.getName() == func.getArgAttrsAttrName() || attr.getName() == func.getResAttrsAttrName()))) @@ -1032,8 +1047,7 @@ mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewrite( mlir::ConversionPatternRewriter &rewriter) const { cir::FuncType fnType = op.getFunctionType(); - assert(!cir::MissingFeatures::opFuncDsoLocal()); - bool isDsoLocal = false; + bool isDsoLocal = op.getDsoLocal(); mlir::TypeConverter::SignatureConversion signatureConversion( fnType.getNumInputs()); @@ -1061,8 +1075,7 @@ mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewrite( mlir::isa(loc)) && "expected single location or unknown location here"); - assert(!cir::MissingFeatures::opFuncLinkage()); - mlir::LLVM::Linkage linkage = mlir::LLVM::Linkage::External; + mlir::LLVM::Linkage linkage = convertLinkage(op.getLinkage()); assert(!cir::MissingFeatures::opFuncCallingConv()); mlir::LLVM::CConv cconv = mlir::LLVM::CConv::C; SmallVector attributes; @@ -1072,7 +1085,11 @@ mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewrite( loc, op.getName(), llvmFnTy, linkage, isDsoLocal, cconv, mlir::SymbolRefAttr(), attributes); - assert(!cir::MissingFeatures::opFuncVisibility()); + assert(!cir::MissingFeatures::opFuncMultipleReturnVals()); + + fn.setVisibility_Attr(mlir::LLVM::VisibilityAttr::get( + getContext(), lowerCIRVisibilityToLLVMVisibility( + op.getGlobalVisibilityAttr().getValue()))); rewriter.inlineRegionBefore(op.getBody(), fn.getBody(), fn.end()); if (failed(rewriter.convertRegionTypes(&fn.getBody(), *typeConverter, @@ -1903,7 +1920,9 @@ void ConvertCIRToLLVMPass::runOnOperation() { CIRToLLVMVecShuffleOpLowering, CIRToLLVMVecShuffleDynamicOpLowering, CIRToLLVMVecTernaryOpLowering, - CIRToLLVMComplexCreateOpLowering + CIRToLLVMComplexCreateOpLowering, + CIRToLLVMComplexRealOpLowering, + CIRToLLVMComplexImagOpLowering // clang-format on >(converter, patterns.getContext()); @@ -2207,6 +2226,24 @@ mlir::LogicalResult CIRToLLVMComplexCreateOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMComplexRealOpLowering::matchAndRewrite( + cir::ComplexRealOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Type resultLLVMTy = getTypeConverter()->convertType(op.getType()); + rewriter.replaceOpWithNewOp( + op, resultLLVMTy, adaptor.getOperand(), llvm::ArrayRef{0}); + return mlir::success(); +} + +mlir::LogicalResult CIRToLLVMComplexImagOpLowering::matchAndRewrite( + cir::ComplexImagOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Type resultLLVMTy = getTypeConverter()->convertType(op.getType()); + rewriter.replaceOpWithNewOp( + op, resultLLVMTy, adaptor.getOperand(), llvm::ArrayRef{1}); + return mlir::success(); +} + std::unique_ptr createConvertCIRToLLVMPass() { return std::make_unique(); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index 52959d61355b0..8502cb1ae5d9f 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -443,6 +443,26 @@ class CIRToLLVMComplexCreateOpLowering mlir::ConversionPatternRewriter &) const override; }; +class CIRToLLVMComplexRealOpLowering + : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::ComplexRealOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + +class CIRToLLVMComplexImagOpLowering + : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::ComplexImagOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + } // namespace direct } // namespace cir diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2c011a9519860..2a8722221f24b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2356,7 +2356,7 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, llvm::Type *OpTy = Signed->getType(); llvm::Value *Zero = llvm::Constant::getNullValue(OpTy); Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); - llvm::Type *ResTy = ResultPtr.getElementType(); + llvm::Type *ResTy = CGF.getTypes().ConvertType(ResultQTy); unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width); // Take the absolute value of the signed operand. diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index ee5e3d68a5ffa..7ab0e2fdaa731 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -841,8 +841,8 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { auto *ISATy = DBuilder.createPointerType(ClassTy, Size); ObjTy = DBuilder.createStructType(TheCU, "objc_object", TheCU->getFile(), 0, - 0, 0, llvm::DINode::FlagZero, nullptr, - llvm::DINodeArray()); + (uint64_t)0, 0, llvm::DINode::FlagZero, + nullptr, llvm::DINodeArray()); DBuilder.replaceArrays( ObjTy, DBuilder.getOrCreateArray(&*DBuilder.createMemberType( diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index 2897ccdf88660..0b6e830e0d557 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -1138,7 +1138,9 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) { llvm::Function::InternalLinkage; case TSK_ExplicitInstantiationDeclaration: - llvm_unreachable("Should not have been asked to emit this"); + return IsExternalDefinition + ? llvm::GlobalVariable::AvailableExternallyLinkage + : llvm::GlobalVariable::ExternalLinkage; } } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 59f14b3e35fd0..6c32c98cec011 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -234,6 +234,9 @@ class ApplyAtomGroup { uint64_t OriginalAtom = 0; CGDebugInfo *DI = nullptr; + ApplyAtomGroup(const ApplyAtomGroup &) = delete; + void operator=(const ApplyAtomGroup &) = delete; + public: ApplyAtomGroup(CGDebugInfo *DI); ~ApplyAtomGroup(); diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 6738d4be6dd21..e30a8c6133055 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -6499,12 +6499,38 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case clang::AArch64::BI_InterlockedAdd: - case clang::AArch64::BI_InterlockedAdd64: { + case clang::AArch64::BI_InterlockedAdd_acq: + case clang::AArch64::BI_InterlockedAdd_rel: + case clang::AArch64::BI_InterlockedAdd_nf: + case clang::AArch64::BI_InterlockedAdd64: + case clang::AArch64::BI_InterlockedAdd64_acq: + case clang::AArch64::BI_InterlockedAdd64_rel: + case clang::AArch64::BI_InterlockedAdd64_nf: { Address DestAddr = CheckAtomicAlignment(*this, E); Value *Val = EmitScalarExpr(E->getArg(1)); + llvm::AtomicOrdering Ordering; + switch (BuiltinID) { + case clang::AArch64::BI_InterlockedAdd: + case clang::AArch64::BI_InterlockedAdd64: + Ordering = llvm::AtomicOrdering::SequentiallyConsistent; + break; + case clang::AArch64::BI_InterlockedAdd_acq: + case clang::AArch64::BI_InterlockedAdd64_acq: + Ordering = llvm::AtomicOrdering::Acquire; + break; + case clang::AArch64::BI_InterlockedAdd_rel: + case clang::AArch64::BI_InterlockedAdd64_rel: + Ordering = llvm::AtomicOrdering::Release; + break; + case clang::AArch64::BI_InterlockedAdd_nf: + case clang::AArch64::BI_InterlockedAdd64_nf: + Ordering = llvm::AtomicOrdering::Monotonic; + break; + default: + llvm_unreachable("missing builtin ID in switch!"); + } AtomicRMWInst *RMWI = - Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, - llvm::AtomicOrdering::SequentiallyConsistent); + Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, Ordering); return Builder.CreateAdd(RMWI, Val); } } diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index afa23bffcd073..be9a5e60af358 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -75,6 +75,10 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { SyncScope Scope, llvm::AtomicOrdering Ordering, llvm::LLVMContext &Ctx) const override; + bool supportsLibCall() const override { + return getABIInfo().getTarget().getTriple().getVendor() != + llvm::Triple::AMD; + } }; inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) { diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 2d055ffa17a8f..b88f148b2f1ad 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1030,10 +1030,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, return; } - llvm::StringMap> DerivedArchs; - llvm::StringMap FoundNormalizedTriples; - std::multiset OpenMPTriples; - // If the user specified -fopenmp-targets= we create a toolchain for each // valid triple. Otherwise, if only --offload-arch= was specified we instead // attempt to derive the appropriate toolchains from the arguments. @@ -1044,82 +1040,77 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, << OpenMPTargets->getAsString(C.getInputArgs()); return; } + + // Make sure these show up in a deterministic order. + std::multiset OpenMPTriples; for (StringRef T : OpenMPTargets->getValues()) OpenMPTriples.insert(T); + + llvm::StringMap FoundNormalizedTriples; + for (StringRef T : OpenMPTriples) { + llvm::Triple TT(ToolChain::getOpenMPTriple(T)); + std::string NormalizedName = TT.normalize(); + + // Make sure we don't have a duplicate triple. + auto [TripleIt, Inserted] = + FoundNormalizedTriples.try_emplace(NormalizedName, T); + if (!Inserted) { + Diag(clang::diag::warn_drv_omp_offload_target_duplicate) + << T << TripleIt->second; + continue; + } + + // If the specified target is invalid, emit a diagnostic. + if (TT.getArch() == llvm::Triple::UnknownArch) { + Diag(clang::diag::err_drv_invalid_omp_target) << T; + continue; + } + + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); + } } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && ((!IsHIP && !IsCuda) || UseLLVMOffload)) { - const ToolChain *HostTC = C.getSingleOffloadToolChain(); - auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); - auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), - HostTC->getTriple()); + llvm::Triple AMDTriple("amdgcn-amd-amdhsa"); + llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda"); // Attempt to deduce the offloading triple from the set of architectures. // We can only correctly deduce NVPTX / AMDGPU triples currently. - // We need to temporarily create these toolchains so that we can access - // tools for inferring architectures. - llvm::DenseSet Archs; - for (const std::optional &TT : {NVPTXTriple, AMDTriple}) { - if (!TT) - continue; - - auto &TC = - getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, *TT, - C.getDefaultToolChain().getTriple()); - for (StringRef Arch : - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, true)) - Archs.insert(Arch); - } + for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) { + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, + C.getDefaultToolChain().getTriple()); + + llvm::DenseSet Archs = + getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, true); + llvm::DenseSet ArchsForTarget; + for (StringRef Arch : Archs) { + bool IsNVPTX = IsNVIDIAOffloadArch( + StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); + bool IsAMDGPU = IsAMDOffloadArch( + StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); + if (!IsNVPTX && !IsAMDGPU && !Arch.equals_insensitive("native")) { + Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) + << Arch; + return; + } - for (StringRef Arch : Archs) { - if (NVPTXTriple && IsNVIDIAOffloadArch(StringToOffloadArch( - getProcessorFromTargetID(*NVPTXTriple, Arch)))) { - DerivedArchs[NVPTXTriple->getTriple()].insert(Arch); - } else if (AMDTriple && - IsAMDOffloadArch(StringToOffloadArch( - getProcessorFromTargetID(*AMDTriple, Arch)))) { - DerivedArchs[AMDTriple->getTriple()].insert(Arch); - } else { - Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch; - return; + if (TT.isNVPTX() && IsNVPTX) + ArchsForTarget.insert(Arch); + else if (TT.isAMDGPU() && IsAMDGPU) + ArchsForTarget.insert(Arch); + } + if (!ArchsForTarget.empty()) { + C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); + KnownArchs[&TC] = ArchsForTarget; } } // If the set is empty then we failed to find a native architecture. - if (Archs.empty()) { + auto TCRange = C.getOffloadToolChains(Action::OFK_OpenMP); + if (TCRange.first == TCRange.second) Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << "native"; - return; - } - - for (const auto &TripleAndArchs : DerivedArchs) - OpenMPTriples.insert(TripleAndArchs.first()); - } - - for (StringRef Val : OpenMPTriples) { - llvm::Triple TT(ToolChain::getOpenMPTriple(Val)); - std::string NormalizedName = TT.normalize(); - - // Make sure we don't have a duplicate triple. - auto [TripleIt, Inserted] = - FoundNormalizedTriples.try_emplace(NormalizedName, Val); - if (!Inserted) { - Diag(clang::diag::warn_drv_omp_offload_target_duplicate) - << Val << TripleIt->second; - continue; - } - - // If the specified target is invalid, emit a diagnostic. - if (TT.getArch() == llvm::Triple::UnknownArch) { - Diag(clang::diag::err_drv_invalid_omp_target) << Val; - continue; - } - - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); - auto It = DerivedArchs.find(TT.getTriple()); - if (It != DerivedArchs.end()) - KnownArchs[&TC] = It->second; } } else if (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ)) { Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 4cc4f5f22db0d..06f68ec8b0fc1 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -14,6 +14,7 @@ #include "FormatTokenLexer.h" #include "FormatToken.h" +#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" @@ -1203,16 +1204,22 @@ static size_t countLeadingWhitespace(StringRef Text) { const unsigned char *const End = Text.bytes_end(); const unsigned char *Cur = Begin; while (Cur < End) { - if (isspace(Cur[0])) { + if (isWhitespace(Cur[0])) { ++Cur; - } else if (Cur[0] == '\\' && (Cur[1] == '\n' || Cur[1] == '\r')) { - // A '\' followed by a newline always escapes the newline, regardless - // of whether there is another '\' before it. + } else if (Cur[0] == '\\') { + // A backslash followed by optional horizontal whitespaces (P22232R2) and + // then a newline always escapes the newline. // The source has a null byte at the end. So the end of the entire input // isn't reached yet. Also the lexer doesn't break apart an escaped // newline. - assert(End - Cur >= 2); - Cur += 2; + const auto *Lookahead = Cur + 1; + while (isHorizontalWhitespace(*Lookahead)) + ++Lookahead; + // No line splice found; the backslash is a token. + if (!isVerticalWhitespace(*Lookahead)) + break; + // Splice found, consume it. + Cur = Lookahead + 1; } else if (Cur[0] == '?' && Cur[1] == '?' && Cur[2] == '/' && (Cur[3] == '\n' || Cur[3] == '\r')) { // Newlines can also be escaped by a '?' '?' '/' trigraph. By the way, the @@ -1295,13 +1302,18 @@ FormatToken *FormatTokenLexer::getNextToken() { case '/': // The text was entirely whitespace when this loop was entered. Thus // this has to be an escape sequence. - assert(Text.substr(i, 2) == "\\\r" || Text.substr(i, 2) == "\\\n" || - Text.substr(i, 4) == "\?\?/\r" || + assert(Text.substr(i, 4) == "\?\?/\r" || Text.substr(i, 4) == "\?\?/\n" || (i >= 1 && (Text.substr(i - 1, 4) == "\?\?/\r" || Text.substr(i - 1, 4) == "\?\?/\n")) || (i >= 2 && (Text.substr(i - 2, 4) == "\?\?/\r" || - Text.substr(i - 2, 4) == "\?\?/\n"))); + Text.substr(i - 2, 4) == "\?\?/\n")) || + (Text[i] == '\\' && [&]() -> bool { + size_t j = i + 1; + while (j < Text.size() && isHorizontalWhitespace(Text[j])) + ++j; + return j < Text.size() && (Text[j] == '\n' || Text[j] == '\r'); + }())); InEscape = true; break; default: diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp index 8e55d339b2388..b0dda65adfba1 100644 --- a/clang/lib/Format/QualifierAlignmentFixer.cpp +++ b/clang/lib/Format/QualifierAlignmentFixer.cpp @@ -635,15 +635,26 @@ bool isConfiguredQualifierOrType(const FormatToken *Tok, // If a token is an identifier and it's upper case, it could // be a macro and hence we need to be able to ignore it. bool isPossibleMacro(const FormatToken *Tok) { - if (!Tok) - return false; + assert(Tok); if (Tok->isNot(tok::identifier)) return false; - if (Tok->TokenText.upper() == Tok->TokenText.str()) { - // T,K,U,V likely could be template arguments - return Tok->TokenText.size() != 1; - } - return false; + + const auto Text = Tok->TokenText; + assert(Text.size() > 0); + + // T,K,U,V likely could be template arguments + if (Text.size() == 1) + return false; + + // It's unlikely that qualified names are object-like macros. + const auto *Prev = Tok->getPreviousNonComment(); + if (Prev && Prev->is(tok::coloncolon)) + return false; + const auto *Next = Tok->getNextNonComment(); + if (Next && Next->is(tok::coloncolon)) + return false; + + return Text == Text.upper(); } } // namespace format diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index d2f8b2703a9a3..6ad9a79998426 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1371,7 +1371,7 @@ class AnnotatingParser { Tok->setType(TT_InlineASMColon); } else if (Contexts.back().ColonIsDictLiteral || Style.isProto()) { Tok->setType(TT_DictLiteral); - if (Prev && Style.isTextProto()) + if (Style.isTextProto()) Prev->setType(TT_SelectorName); } else if (Contexts.back().ColonIsObjCMethodExpr || Line.startsWith(TT_ObjCMethodSpecifier)) { @@ -1408,7 +1408,6 @@ class AnnotatingParser { } } else if (Contexts.back().ContextType == Context::C11GenericSelection) { Tok->setType(TT_GenericSelectionColon); - assert(Prev); if (Prev->isPointerOrReference()) Prev->setFinalizedType(TT_PointerOrReference); } else if ((CurrentToken && CurrentToken->is(tok::numeric_constant)) || @@ -1419,8 +1418,6 @@ class AnnotatingParser { !Line.getFirstNonComment()->isOneOf(tok::kw_enum, tok::kw_case, tok::kw_default) && !Line.startsWith(tok::kw_typedef, tok::kw_enum)) { - if (!Prev) - break; if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept) || Prev->ClosesRequiresClause) { Tok->setType(TT_CtorInitializerColon); diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index 3dd1eb45817d4..39ccc97540b1e 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -370,8 +370,14 @@ static __inline__ void __DEFAULT_FN_ATTRS __nop(void) { \*----------------------------------------------------------------------------*/ #if defined(__aarch64__) || defined(__arm64ec__) unsigned __int64 __getReg(int); -long _InterlockedAdd(long volatile *Addend, long Value); -__int64 _InterlockedAdd64(__int64 volatile *Addend, __int64 Value); +long _InterlockedAdd(long volatile *, long); +long _InterlockedAdd_acq(long volatile *, long); +long _InterlockedAdd_nf(long volatile *, long); +long _InterlockedAdd_rel(long volatile *, long); +__int64 _InterlockedAdd64(__int64 volatile *, __int64); +__int64 _InterlockedAdd64_acq(__int64 volatile *, __int64); +__int64 _InterlockedAdd64_nf(__int64 volatile *, __int64); +__int64 _InterlockedAdd64_rel(__int64 volatile *, __int64); __int64 _ReadStatusReg(int); void _WriteStatusReg(int, __int64); diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index c8974e5a3528c..b88624b22e622 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -183,9 +183,9 @@ static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) { AttributeCommonInfo::AttrArgsInfo AttrArgsInfo = AttributeCommonInfo::getCXX11AttrArgsInfo(II); if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required) - return PP.isNextPPTokenOneOf(); + return PP.isNextPPTokenOneOf(tok::l_paren); - return !PP.isNextPPTokenOneOf() || + return !PP.isNextPPTokenOneOf(tok::l_paren) || AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional; } return false; diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 7fecbe9eee53c..500cf6f8400e0 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -813,14 +813,14 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { if (!Identifier.isExpandDisabled() && MI->isEnabled()) { // C99 6.10.3p10: If the preprocessing token immediately after the // macro name isn't a '(', this macro should not be expanded. - if (!MI->isFunctionLike() || isNextPPTokenOneOf()) + if (!MI->isFunctionLike() || isNextPPTokenOneOf(tok::l_paren)) return HandleMacroExpandedIdentifier(Identifier, MD); } else { // C99 6.10.3.4p2 says that a disabled macro may never again be // expanded, even if it's in a context where it could be expanded in the // future. Identifier.setFlag(Token::DisableExpand); - if (MI->isObjectLike() || isNextPPTokenOneOf()) + if (MI->isObjectLike() || isNextPPTokenOneOf(tok::l_paren)) Diag(Identifier, diag::pp_disabled_macro_expansion); } } diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp index 9b4d82745f881..d5c83aedb3008 100644 --- a/clang/lib/Sema/SemaPPC.cpp +++ b/clang/lib/Sema/SemaPPC.cpp @@ -106,6 +106,10 @@ bool SemaPPC::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, switch (BuiltinID) { default: return false; + case PPC::BI__builtin_ppc_national2packed: + case PPC::BI__builtin_ppc_packed2zoned: + case PPC::BI__builtin_ppc_zoned2packed: + return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1); case PPC::BI__builtin_altivec_crypto_vshasigmaw: case PPC::BI__builtin_altivec_crypto_vshasigmad: return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) || diff --git a/clang/lib/Sema/SemaTypeTraits.cpp b/clang/lib/Sema/SemaTypeTraits.cpp index 4dbb2450857e0..cb3d9b77ee4dd 100644 --- a/clang/lib/Sema/SemaTypeTraits.cpp +++ b/clang/lib/Sema/SemaTypeTraits.cpp @@ -121,7 +121,7 @@ static bool hasSuitableConstructorForRelocation(Sema &SemaRef, CXXMethodDecl *Decl = LookupSpecialMemberFromXValue(SemaRef, D, /*Assign=*/false); - return Decl && Decl->isUserProvided() == AllowUserDefined && + return Decl && (AllowUserDefined || !Decl->isUserProvided()) && !Decl->isDeleted(); } @@ -137,7 +137,7 @@ static bool hasSuitableMoveAssignmentOperatorForRelocation( if (!Decl) return false; - return Decl && Decl->isUserProvided() == AllowUserDefined && + return Decl && (AllowUserDefined || !Decl->isUserProvided()) && !Decl->isDeleted(); } @@ -1725,14 +1725,15 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, // Build expressions that emulate the effect of declval() and // declval(). - if (LhsT->isObjectType() || LhsT->isFunctionType()) - LhsT = Self.Context.getRValueReferenceType(LhsT); - if (RhsT->isObjectType() || RhsT->isFunctionType()) - RhsT = Self.Context.getRValueReferenceType(RhsT); - OpaqueValueExpr Lhs(KeyLoc, LhsT.getNonLValueExprType(Self.Context), - Expr::getValueKindForType(LhsT)); - OpaqueValueExpr Rhs(KeyLoc, RhsT.getNonLValueExprType(Self.Context), - Expr::getValueKindForType(RhsT)); + auto createDeclValExpr = [&](QualType Ty) -> OpaqueValueExpr { + if (Ty->isObjectType() || Ty->isFunctionType()) + Ty = Self.Context.getRValueReferenceType(Ty); + return {KeyLoc, Ty.getNonLValueExprType(Self.Context), + Expr::getValueKindForType(Ty)}; + }; + + auto Lhs = createDeclValExpr(LhsT); + auto Rhs = createDeclValExpr(RhsT); // Attempt the assignment in an unevaluated context within a SFINAE // trap at translation unit scope. @@ -1956,6 +1957,9 @@ static std::optional StdNameToTypeTrait(StringRef Name) { TypeTrait::UTT_IsCppTriviallyRelocatable) .Case("is_replaceable", TypeTrait::UTT_IsReplaceable) .Case("is_trivially_copyable", TypeTrait::UTT_IsTriviallyCopyable) + .Case("is_assignable", TypeTrait::BTT_IsAssignable) + .Case("is_empty", TypeTrait::UTT_IsEmpty) + .Case("is_standard_layout", TypeTrait::UTT_IsStandardLayout) .Default(std::nullopt); } @@ -2285,6 +2289,244 @@ static void DiagnoseNonTriviallyCopyableReason(Sema &SemaRef, SemaRef.Diag(D->getLocation(), diag::note_defined_here) << D; } +static void DiagnoseNonAssignableReason(Sema &SemaRef, SourceLocation Loc, + QualType T, QualType U) { + const CXXRecordDecl *D = T->getAsCXXRecordDecl(); + + auto createDeclValExpr = [&](QualType Ty) -> OpaqueValueExpr { + if (Ty->isObjectType() || Ty->isFunctionType()) + Ty = SemaRef.Context.getRValueReferenceType(Ty); + return {Loc, Ty.getNonLValueExprType(SemaRef.Context), + Expr::getValueKindForType(Ty)}; + }; + + auto LHS = createDeclValExpr(T); + auto RHS = createDeclValExpr(U); + + EnterExpressionEvaluationContext Unevaluated( + SemaRef, Sema::ExpressionEvaluationContext::Unevaluated); + Sema::ContextRAII TUContext(SemaRef, + SemaRef.Context.getTranslationUnitDecl()); + SemaRef.BuildBinOp(/*S=*/nullptr, Loc, BO_Assign, &LHS, &RHS); + + if (!D || D->isInvalidDecl()) + return; + + SemaRef.Diag(D->getLocation(), diag::note_defined_here) << D; +} + +static void DiagnoseIsEmptyReason(Sema &S, SourceLocation Loc, + const CXXRecordDecl *D) { + // Non-static data members (ignore zero-width bit‐fields). + for (const auto *Field : D->fields()) { + if (Field->isZeroLengthBitField()) + continue; + if (Field->isBitField()) { + S.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::NonZeroLengthField << Field + << Field->getSourceRange(); + continue; + } + S.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::NonEmptyMember << Field + << Field->getType() << Field->getSourceRange(); + } + + // Virtual functions. + for (const auto *M : D->methods()) { + if (M->isVirtual()) { + S.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::VirtualFunction << M + << M->getSourceRange(); + break; + } + } + + // Virtual bases and non-empty bases. + for (const auto &B : D->bases()) { + const auto *BR = B.getType()->getAsCXXRecordDecl(); + if (!BR || BR->isInvalidDecl()) + continue; + if (B.isVirtual()) { + S.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::VBase << B.getType() + << B.getSourceRange(); + } + if (!BR->isEmpty()) { + S.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::NonEmptyBase << B.getType() + << B.getSourceRange(); + } + } +} + +static void DiagnoseIsEmptyReason(Sema &S, SourceLocation Loc, QualType T) { + // Emit primary "not empty" diagnostic. + S.Diag(Loc, diag::note_unsatisfied_trait) << T << diag::TraitName::Empty; + + // While diagnosing is_empty, we want to look at the actual type, not a + // reference or an array of it. So we need to massage the QualType param to + // strip refs and arrays. + if (T->isReferenceType()) + S.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::Ref; + T = T.getNonReferenceType(); + + if (auto *AT = S.Context.getAsArrayType(T)) + T = AT->getElementType(); + + if (auto *D = T->getAsCXXRecordDecl()) { + if (D->hasDefinition()) { + DiagnoseIsEmptyReason(S, Loc, D); + S.Diag(D->getLocation(), diag::note_defined_here) << D; + } + } +} + +static bool hasMultipleDataBaseClassesWithFields(const CXXRecordDecl *D) { + int NumBasesWithFields = 0; + for (const CXXBaseSpecifier &Base : D->bases()) { + const CXXRecordDecl *BaseRD = Base.getType()->getAsCXXRecordDecl(); + if (!BaseRD || BaseRD->isInvalidDecl()) + continue; + + for (const FieldDecl *Field : BaseRD->fields()) { + if (!Field->isUnnamedBitField()) { + if (++NumBasesWithFields > 1) + return true; // found more than one base class with fields + break; // no need to check further fields in this base class + } + } + } + return false; +} + +static void DiagnoseNonStandardLayoutReason(Sema &SemaRef, SourceLocation Loc, + const CXXRecordDecl *D) { + for (const CXXBaseSpecifier &B : D->bases()) { + assert(B.getType()->getAsCXXRecordDecl() && "invalid base?"); + if (B.isVirtual()) { + SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::VBase << B.getType() + << B.getSourceRange(); + } + if (!B.getType()->isStandardLayoutType()) { + SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::NonStandardLayoutBase << B.getType() + << B.getSourceRange(); + } + } + // Check for mixed access specifiers in fields. + const FieldDecl *FirstField = nullptr; + AccessSpecifier FirstAccess = AS_none; + + for (const FieldDecl *Field : D->fields()) { + if (Field->isUnnamedBitField()) + continue; + + // Record the first field we see + if (!FirstField) { + FirstField = Field; + FirstAccess = Field->getAccess(); + continue; + } + + // Check if the field has a different access specifier than the first one. + if (Field->getAccess() != FirstAccess) { + // Emit a diagnostic about mixed access specifiers. + SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::MixedAccess; + + SemaRef.Diag(FirstField->getLocation(), diag::note_defined_here) + << FirstField; + + SemaRef.Diag(Field->getLocation(), diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::MixedAccessField << Field + << FirstField; + + // No need to check further fields, as we already found mixed access. + break; + } + } + if (hasMultipleDataBaseClassesWithFields(D)) { + SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::MultipleDataBase; + } + if (D->isPolymorphic()) { + // Find the best location to point “defined here” at. + const CXXMethodDecl *VirtualMD = nullptr; + // First, look for a virtual method. + for (const auto *M : D->methods()) { + if (M->isVirtual()) { + VirtualMD = M; + break; + } + } + if (VirtualMD) { + SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::VirtualFunction << VirtualMD; + SemaRef.Diag(VirtualMD->getLocation(), diag::note_defined_here) + << VirtualMD; + } else { + // If no virtual method, point to the record declaration itself. + SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::VirtualFunction << D; + SemaRef.Diag(D->getLocation(), diag::note_defined_here) << D; + } + } + for (const FieldDecl *Field : D->fields()) { + if (!Field->getType()->isStandardLayoutType()) { + SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::NonStandardLayoutMember << Field + << Field->getType() << Field->getSourceRange(); + } + } + // Find any indirect base classes that have fields. + if (D->hasDirectFields()) { + const CXXRecordDecl *Indirect = nullptr; + D->forallBases([&](const CXXRecordDecl *BaseDef) { + if (BaseDef->hasDirectFields()) { + Indirect = BaseDef; + return false; // stop traversal + } + return true; // continue to the next base + }); + if (Indirect) { + SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::IndirectBaseWithFields << Indirect + << Indirect->getSourceRange(); + } + } +} + +static void DiagnoseNonStandardLayoutReason(Sema &SemaRef, SourceLocation Loc, + QualType T) { + SemaRef.Diag(Loc, diag::note_unsatisfied_trait) + << T << diag::TraitName::StandardLayout; + + // Check type-level exclusion first. + if (T->isVariablyModifiedType()) { + SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::VLA; + return; + } + + if (T->isReferenceType()) { + SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) + << diag::TraitNotSatisfiedReason::Ref; + return; + } + T = T.getNonReferenceType(); + const CXXRecordDecl *D = T->getAsCXXRecordDecl(); + if (!D || D->isInvalidDecl()) + return; + + if (D->hasDefinition()) + DiagnoseNonStandardLayoutReason(SemaRef, Loc, D); + + SemaRef.Diag(D->getLocation(), diag::note_defined_here) << D; +} + void Sema::DiagnoseTypeTraitDetails(const Expr *E) { E = E->IgnoreParenImpCasts(); if (E->containsErrors()) @@ -2305,6 +2547,15 @@ void Sema::DiagnoseTypeTraitDetails(const Expr *E) { case UTT_IsTriviallyCopyable: DiagnoseNonTriviallyCopyableReason(*this, E->getBeginLoc(), Args[0]); break; + case BTT_IsAssignable: + DiagnoseNonAssignableReason(*this, E->getBeginLoc(), Args[0], Args[1]); + break; + case UTT_IsEmpty: + DiagnoseIsEmptyReason(*this, E->getBeginLoc(), Args[0]); + break; + case UTT_IsStandardLayout: + DiagnoseNonStandardLayoutReason(*this, E->getBeginLoc(), Args[0]); + break; default: break; } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index b696cb2efee3d..523165c6cab64 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1914,10 +1914,9 @@ bool ASTReader::ReadSLocEntry(int ID) { } case SM_SLOC_EXPANSION_ENTRY: { - LocSeq::State Seq; - SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq); - SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq); - SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq); + SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1]); + SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2]); + SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3]); SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd, Record[5], Record[4], ID, BaseOffset + Record[0]); @@ -7072,13 +7071,10 @@ QualType ASTReader::readTypeRecord(TypeID ID) { namespace clang { class TypeLocReader : public TypeLocVisitor { - using LocSeq = SourceLocationSequence; - ASTRecordReader &Reader; - LocSeq *Seq; - SourceLocation readSourceLocation() { return Reader.readSourceLocation(Seq); } - SourceRange readSourceRange() { return Reader.readSourceRange(Seq); } + SourceLocation readSourceLocation() { return Reader.readSourceLocation(); } + SourceRange readSourceRange() { return Reader.readSourceRange(); } TypeSourceInfo *GetTypeSourceInfo() { return Reader.readTypeSourceInfo(); @@ -7093,8 +7089,7 @@ class TypeLocReader : public TypeLocVisitor { } public: - TypeLocReader(ASTRecordReader &Reader, LocSeq *Seq) - : Reader(Reader), Seq(Seq) {} + TypeLocReader(ASTRecordReader &Reader) : Reader(Reader) {} // We want compile-time assurance that we've enumerated all of // these, so unfortunately we have to declare them first, then @@ -7458,9 +7453,8 @@ void TypeLocReader::VisitDependentBitIntTypeLoc( TL.setNameLoc(readSourceLocation()); } -void ASTRecordReader::readTypeLoc(TypeLoc TL, LocSeq *ParentSeq) { - LocSeq::State Seq(ParentSeq); - TypeLocReader TLR(*this, Seq); +void ASTRecordReader::readTypeLoc(TypeLoc TL) { + TypeLocReader TLR(*this); for (; !TL.isNull(); TL = TL.getNextTypeLoc()) TLR.Visit(TL); } @@ -8381,6 +8375,15 @@ bool ASTReader::LoadExternalSpecializationsImpl( if (It == SpecLookups.end()) return false; + llvm::TimeTraceScope TimeScope("Load External Specializations for ", [&] { + std::string Name; + llvm::raw_string_ostream OS(Name); + auto *ND = cast(D); + ND->getNameForDiagnostic(OS, ND->getASTContext().getPrintingPolicy(), + /*Qualified=*/true); + return Name; + }); + Deserializing LookupResults(this); auto HashValue = StableHashForTemplateArguments(TemplateArgs); @@ -10016,9 +10019,9 @@ ASTRecordReader::readNestedNameSpecifierLoc() { } SourceRange ASTReader::ReadSourceRange(ModuleFile &F, const RecordData &Record, - unsigned &Idx, LocSeq *Seq) { - SourceLocation beg = ReadSourceLocation(F, Record, Idx, Seq); - SourceLocation end = ReadSourceLocation(F, Record, Idx, Seq); + unsigned &Idx) { + SourceLocation beg = ReadSourceLocation(F, Record, Idx); + SourceLocation end = ReadSourceLocation(F, Record, Idx); return SourceRange(beg, end); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 4cca214f8e308..04cbd1ca552b7 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -330,19 +330,13 @@ class ASTTypeWriter { }; class TypeLocWriter : public TypeLocVisitor { - using LocSeq = SourceLocationSequence; - ASTRecordWriter &Record; - LocSeq *Seq; - void addSourceLocation(SourceLocation Loc) { - Record.AddSourceLocation(Loc, Seq); - } - void addSourceRange(SourceRange Range) { Record.AddSourceRange(Range, Seq); } + void addSourceLocation(SourceLocation Loc) { Record.AddSourceLocation(Loc); } + void addSourceRange(SourceRange Range) { Record.AddSourceRange(Range); } public: - TypeLocWriter(ASTRecordWriter &Record, LocSeq *Seq) - : Record(Record), Seq(Seq) {} + TypeLocWriter(ASTRecordWriter &Record) : Record(Record) {} #define ABSTRACT_TYPELOC(CLASS, PARENT) #define TYPELOC(CLASS, PARENT) \ @@ -2449,13 +2443,12 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr) { SLocEntryOffsets.push_back(Offset); // Starting offset of this entry within this module, so skip the dummy. Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2); - LocSeq::State Seq; - AddSourceLocation(Expansion.getSpellingLoc(), Record, Seq); - AddSourceLocation(Expansion.getExpansionLocStart(), Record, Seq); + AddSourceLocation(Expansion.getSpellingLoc(), Record); + AddSourceLocation(Expansion.getExpansionLocStart(), Record); AddSourceLocation(Expansion.isMacroArgExpansion() ? SourceLocation() : Expansion.getExpansionLocEnd(), - Record, Seq); + Record); Record.push_back(Expansion.isExpansionTokenRange()); // Compute the token length for this macro expansion. @@ -6653,7 +6646,7 @@ void ASTWriter::AddFileID(FileID FID, RecordDataImpl &Record) { } SourceLocationEncoding::RawLocEncoding -ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) { +ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc) { unsigned BaseOffset = 0; unsigned ModuleFileIndex = 0; @@ -6672,19 +6665,17 @@ ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) { assert(&getChain()->getModuleManager()[F->Index] == F); } - return SourceLocationEncoding::encode(Loc, BaseOffset, ModuleFileIndex, Seq); + return SourceLocationEncoding::encode(Loc, BaseOffset, ModuleFileIndex); } -void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, - SourceLocationSequence *Seq) { +void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record) { Loc = getAdjustedLocation(Loc); - Record.push_back(getRawSourceLocationEncoding(Loc, Seq)); + Record.push_back(getRawSourceLocationEncoding(Loc)); } -void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record, - SourceLocationSequence *Seq) { - AddSourceLocation(Range.getBegin(), Record, Seq); - AddSourceLocation(Range.getEnd(), Record, Seq); +void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record) { + AddSourceLocation(Range.getBegin(), Record); + AddSourceLocation(Range.getEnd(), Record); } void ASTRecordWriter::AddAPFloat(const llvm::APFloat &Value) { @@ -6804,9 +6795,8 @@ void ASTRecordWriter::AddTypeSourceInfo(TypeSourceInfo *TInfo) { AddTypeLoc(TInfo->getTypeLoc()); } -void ASTRecordWriter::AddTypeLoc(TypeLoc TL, LocSeq *OuterSeq) { - LocSeq::State Seq(OuterSeq); - TypeLocWriter TLW(*this, Seq); +void ASTRecordWriter::AddTypeLoc(TypeLoc TL) { + TypeLocWriter TLW(*this); for (; !TL.isNull(); TL = TL.getNextTypeLoc()) TLW.Visit(TL); } diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index a6e320c7f3eb0..87536be8c8d98 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -971,7 +971,7 @@ void ASTStmtWriter::VisitCallExpr(CallExpr *E) { Record.push_back(E->getFPFeatures().getAsOpaqueInt()); if (!E->hasStoredFPFeatures() && !static_cast(E->getADLCallKind()) && - E->getStmtClass() == Stmt::CallExprClass) + !E->usesMemberSyntax() && E->getStmtClass() == Stmt::CallExprClass) AbbrevToUse = Writer.getCallExprAbbrev(); Code = serialization::EXPR_CALL; diff --git a/clang/lib/Serialization/TemplateArgumentHasher.cpp b/clang/lib/Serialization/TemplateArgumentHasher.cpp index aa61496d4aa0c..c56138e8893c1 100644 --- a/clang/lib/Serialization/TemplateArgumentHasher.cpp +++ b/clang/lib/Serialization/TemplateArgumentHasher.cpp @@ -15,6 +15,7 @@ #include "clang/AST/TypeVisitor.h" #include "clang/Basic/IdentifierTable.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/TimeProfiler.h" using namespace clang; @@ -405,6 +406,7 @@ void TemplateArgumentHasher::AddType(const Type *T) { unsigned clang::serialization::StableHashForTemplateArguments( llvm::ArrayRef Args) { + llvm::TimeTraceScope TimeScope("Stable Hash for Template Arguments"); TemplateArgumentHasher Hasher; Hasher.AddInteger(Args.size()); for (TemplateArgument Arg : Args) diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index f78b1b84f9df6..34fcb9b64d555 100644 --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -688,6 +688,18 @@ const FunctionDecl *SimpleFunctionCall::getDecl() const { return getSVal(getOriginExpr()->getCallee()).getAsFunctionDecl(); } +RuntimeDefinition SimpleFunctionCall::getRuntimeDefinition() const { + // Clang converts lambdas to function pointers using an implicit conversion + // operator, which returns the lambda's '__invoke' method. However, Sema + // leaves the body of '__invoke' empty (it is generated later in CodeGen), so + // we need to skip '__invoke' and access the lambda's operator() directly. + if (const auto *CMD = dyn_cast_if_present(getDecl()); + CMD && CMD->isLambdaStaticInvoker()) + return RuntimeDefinition{CMD->getParent()->getLambdaCallOperator()}; + + return AnyFunctionCall::getRuntimeDefinition(); +} + const FunctionDecl *CXXInstanceCall::getDecl() const { const auto *CE = cast_or_null(getOriginExpr()); if (!CE) diff --git a/clang/test/Analysis/lambda-convert-to-func-ptr.cpp b/clang/test/Analysis/lambda-convert-to-func-ptr.cpp new file mode 100644 index 0000000000000..c2ad7cd2de34a --- /dev/null +++ b/clang/test/Analysis/lambda-convert-to-func-ptr.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_analyze_cc1 -std=c++11 -analyzer-checker=core,debug.ExprInspection -analyzer-config inline-lambdas=true -verify %s + +void clang_analyzer_eval(bool); + +void basic() { + int (*ret_zero)() = []() { return 0; }; + clang_analyzer_eval(ret_zero() == 0); // expected-warning{{TRUE}} +} + +void withParam() { + int (*add_ten)(int) = [](int b) { return b + 10; }; + clang_analyzer_eval(add_ten(1) == 11); // expected-warning{{TRUE}} +} + +int callBack(int (*fp)(int), int x) { + return fp(x); +} + +void passWithFunc() { + clang_analyzer_eval(callBack([](int x) { return x; }, 5) == 5); // expected-warning{{TRUE}} +} diff --git a/clang/test/CIR/CodeGen/align-load.c b/clang/test/CIR/CodeGen/align-load.c index 06553a307f93a..17171d3607545 100644 --- a/clang/test/CIR/CodeGen/align-load.c +++ b/clang/test/CIR/CodeGen/align-load.c @@ -21,7 +21,7 @@ void accessStruct(struct S u) { u.d; } -// CIR: cir.func @accessStruct +// CIR: cir.func{{.*}} @accessStruct // CIR: cir.load align(8) // CIR: cir.load align(2) // CIR: cir.load align(4) @@ -58,7 +58,7 @@ void accessUnion(union U u) { u.d; } -// CIR: cir.func @accessUnion +// CIR: cir.func{{.*}} @accessUnion // CIR: cir.load align(8) // CIR: cir.load align(8) // CIR: cir.load align(8) @@ -86,7 +86,7 @@ int loadAligned(myint *p) { return *p; } -// CIR: cir.func @loadAligned +// CIR: cir.func{{.*}} @loadAligned // CIR: cir.load align(1) // LLVM: @loadAligned diff --git a/clang/test/CIR/CodeGen/align-store.c b/clang/test/CIR/CodeGen/align-store.c index 9ce26fa020eeb..88686b94d8adf 100644 --- a/clang/test/CIR/CodeGen/align-store.c +++ b/clang/test/CIR/CodeGen/align-store.c @@ -12,7 +12,7 @@ void test1(myint *p) { *p = 0; } -// CIR: cir.func @test1 +// CIR: cir.func{{.*}} @test1 // CIR: cir.store align(1) // LLVM: @test1 diff --git a/clang/test/CIR/CodeGen/array.cpp b/clang/test/CIR/CodeGen/array.cpp index 26e172a006451..141b67e0e63c7 100644 --- a/clang/test/CIR/CodeGen/array.cpp +++ b/clang/test/CIR/CodeGen/array.cpp @@ -101,7 +101,7 @@ void func() { // CIR: %[[TMP:.*]] = cir.load{{.*}} %[[ELE_PTR]] : !cir.ptr, !s32i // CIR" cir.store %[[TMP]], %[[INIT_2]] : !s32i, !cir.ptr -// LLVM: define void @_Z4funcv() +// LLVM: define{{.*}} void @_Z4funcv() // LLVM-NEXT: %[[ARR:.*]] = alloca [10 x i32], i64 1, align 16 // LLVM-NEXT: %[[INIT:.*]] = alloca i32, i64 1, align 4 // LLVM-NEXT: %[[INIT_2:.*]] = alloca i32, i64 1, align 4 @@ -143,7 +143,7 @@ void func2() { // CIR: %[[ELE_1_PTR:.*]] = cir.ptr_stride(%[[LOAD_1]] : !cir.ptr, %[[OFFSET_1]] : !s64i), !cir.ptr // CIR: cir.store{{.*}} %[[ELE_1_PTR]], %[[ELE_ALLOCA]] : !cir.ptr, !cir.ptr> -// LLVM: define void @_Z5func2v() +// LLVM: define{{.*}} void @_Z5func2v() // LLVM: %[[ARR:.*]] = alloca [2 x i32], i64 1, align 4 // LLVM: %[[TMP:.*]] = alloca ptr, i64 1, align 8 // LLVM: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR]], i32 0 @@ -183,7 +183,7 @@ void func3() { // CIR: %[[ELE_TMP:.*]] = cir.load{{.*}} %[[ELE_PTR]] : !cir.ptr, !s32i // CIR: cir.store{{.*}} %[[ELE_TMP]], %[[INIT]] : !s32i, !cir.ptr -// LLVM: define void @_Z5func3v() +// LLVM: define{{.*}} void @_Z5func3v() // LLVM: %[[ARR:.*]] = alloca [2 x i32], i64 1, align 4 // LLVM: %[[IDX:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[INIT:.*]] = alloca i32, i64 1, align 4 @@ -235,7 +235,7 @@ void func4() { // CIR: %[[TMP:.*]] = cir.load{{.*}} %[[ELE_0]] : !cir.ptr, !s32i // CIR: cir.store{{.*}} %[[TMP]], %[[INIT]] : !s32i, !cir.ptr -// LLVM: define void @_Z5func4v() +// LLVM: define{{.*}} void @_Z5func4v() // LLVM: %[[ARR:.*]] = alloca [2 x [1 x i32]], i64 1, align 4 // LLVM: %[[INIT:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR]], i32 0 @@ -279,7 +279,7 @@ void func5() { // CIR: %10 = cir.ptr_stride(%7 : !cir.ptr>, %[[OFFSET_1]] : !s64i), !cir.ptr> // CIR: cir.store{{.*}} %10, %[[ARR_PTR]] : !cir.ptr>, !cir.ptr>> -// LLVM: define void @_Z5func5v() +// LLVM: define{{.*}} void @_Z5func5v() // LLVM: %[[ARR:.*]] = alloca [2 x [1 x i32]], i64 1, align 4 // LLVM: %[[TMP:.*]] = alloca ptr, i64 1, align 8 // LLVM: %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR]], i32 0 @@ -312,7 +312,7 @@ void func6() { // CIR: %[[V1:.*]] = cir.const #cir.int<5> : !s32i // CIR: cir.store{{.*}} %[[V1]], %[[ELE_PTR]] : !s32i, !cir.ptr -// LLVM: define void @_Z5func6v() +// LLVM: define{{.*}} void @_Z5func6v() // LLVM: %[[VAR:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[ARR:.*]] = alloca [2 x i32], i64 1, align 4 // LLVM: store i32 4, ptr %[[VAR]], align 4 @@ -345,7 +345,7 @@ void func7() { // CIR: %[[ELE_PTR:.*]] = cir.ptr_stride(%[[TMP]] : !cir.ptr>, %[[OFFSET]] : !s64i), !cir.ptr> // CIR: cir.store{{.*}} %[[ELE_PTR]], %[[ARR_TMP]] : !cir.ptr>, !cir.ptr>> -// LLVM: define void @_Z5func7v() +// LLVM: define{{.*}} void @_Z5func7v() // LLVM: %[[ARR:.*]] = alloca [1 x ptr], i64 1, align 8 // LLVM: %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8 // LLVM: %[[ELE_PTR:.*]] = getelementptr ptr, ptr %[[ARR]], i32 0 @@ -363,7 +363,7 @@ void func8(int arr[10]) { int e2 = arr[1]; } -// CIR: cir.func @_Z5func8Pi(%[[ARG:.*]]: !cir.ptr +// CIR: cir.func{{.*}} @_Z5func8Pi(%[[ARG:.*]]: !cir.ptr // CIR: %[[ARR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["arr", init] // CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr, ["e", init] // CIR: %[[INIT_2:.*]] = cir.alloca !s32i, !cir.ptr, ["e2", init] @@ -379,7 +379,7 @@ void func8(int arr[10]) { // CIR: %[[TMP_4:.*]] = cir.load{{.*}} %[[ELE_1]] : !cir.ptr, !s32i // CIR: cir.store{{.*}} %[[TMP_4]], %[[INIT_2]] : !s32i, !cir.ptr -// LLVM: define void @_Z5func8Pi(ptr %[[ARG:.*]]) +// LLVM: define{{.*}} void @_Z5func8Pi(ptr %[[ARG:.*]]) // LLVM: %[[ARR:.*]] = alloca ptr, i64 1, align 8 // LLVM: %[[INIT:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[INIT_2:.*]] = alloca i32, i64 1, align 4 @@ -410,7 +410,7 @@ void func9(int arr[10][5]) { int e = arr[1][2]; } -// CIR: cir.func @_Z5func9PA5_i(%[[ARG:.*]]: !cir.ptr> +// CIR: cir.func{{.*}} @_Z5func9PA5_i(%[[ARG:.*]]: !cir.ptr> // CIR: %[[ARR:.*]] = cir.alloca !cir.ptr>, !cir.ptr>>, ["arr", init] // CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr, ["e", init] // CIR: cir.store{{.*}} %[[ARG]], %[[ARR]] : !cir.ptr>, !cir.ptr>> @@ -423,7 +423,7 @@ void func9(int arr[10][5]) { // CIR: %[[TMP_2:.*]] = cir.load{{.*}} %[[ARR_1_2]] : !cir.ptr, !s32i // CIR: cir.store{{.*}} %[[TMP_2]], %[[INIT]] : !s32i, !cir.ptr -// LLVM: define void @_Z5func9PA5_i(ptr %[[ARG:.*]]) +// LLVM: define{{.*}} void @_Z5func9PA5_i(ptr %[[ARG:.*]]) // LLVM: %[[ARR:.*]] = alloca ptr, i64 1, align 8 // LLVM: %[[INIT:.*]] = alloca i32, i64 1, align 4 // LLVM: store ptr %[[ARG]], ptr %[[ARR]], align 8 @@ -447,7 +447,7 @@ void func10(int *a) { int e = a[5]; } -// CIR: cir.func @_Z6func10Pi(%[[ARG:.*]]: !cir.ptr +// CIR: cir.func{{.*}} @_Z6func10Pi(%[[ARG:.*]]: !cir.ptr // CIR: %[[ARR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["a", init] // CIR: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr, ["e", init] // CIR: cir.store{{.*}} %[[ARG]], %[[ARR]] : !cir.ptr, !cir.ptr> @@ -457,7 +457,7 @@ void func10(int *a) { // CIR: %[[TMP_2:.*]] = cir.load{{.*}} %[[ELE]] : !cir.ptr, !s32i // CIR: cir.store{{.*}} %[[TMP_2]], %[[INIT]] : !s32i, !cir.ptr -// LLVM: define void @_Z6func10Pi(ptr %[[ARG:.*]]) { +// LLVM: define{{.*}} void @_Z6func10Pi(ptr %[[ARG:.*]]) { // LLVM: %[[ARR:.*]] = alloca ptr, i64 1, align 8 // LLVM: %[[INIT:.*]] = alloca i32, i64 1, align 4 // LLVM: store ptr %[[ARG]], ptr %[[ARR]], align 8 diff --git a/clang/test/CIR/CodeGen/basic.c b/clang/test/CIR/CodeGen/basic.c index 7ff73ee95f799..2c3c5b0f22a5c 100644 --- a/clang/test/CIR/CodeGen/basic.c +++ b/clang/test/CIR/CodeGen/basic.c @@ -34,7 +34,7 @@ int f1(int i) { return i; } -// CIR: cir.func @f1(%arg0: !s32i loc({{.*}})) -> !s32i +// CIR: cir.func{{.*}} @f1(%arg0: !s32i loc({{.*}})) -> !s32i // CIR-NEXT: %[[I_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["i", init] {alignment = 4 : i64} // CIR-NEXT: %[[RV:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CIR-NEXT: cir.store{{.*}} %arg0, %[[I_PTR]] : !s32i, !cir.ptr @@ -44,7 +44,7 @@ int f1(int i) { // CIR-NEXT: %[[R:.*]] = cir.load{{.*}} %[[RV]] : !cir.ptr, !s32i // CIR-NEXT: cir.return %[[R]] : !s32i -// LLVM: define i32 @f1(i32 %[[IP:.*]]) +// LLVM: define{{.*}} i32 @f1(i32 %[[IP:.*]]) // LLVM-NEXT: %[[I_PTR:.*]] = alloca i32, i64 1, align 4 // LLVM-NEXT: %[[RV:.*]] = alloca i32, i64 1, align 4 // LLVM-NEXT: store i32 %[[IP]], ptr %[[I_PTR]], align 4 @@ -64,14 +64,14 @@ int f1(int i) { int f2(void) { return 3; } -// CIR: cir.func @f2() -> !s32i +// CIR: cir.func{{.*}} @f2() -> !s32i // CIR-NEXT: %[[RV:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CIR-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i // CIR-NEXT: cir.store{{.*}} %[[THREE]], %[[RV]] : !s32i, !cir.ptr // CIR-NEXT: %[[R:.*]] = cir.load{{.*}} %0 : !cir.ptr, !s32i // CIR-NEXT: cir.return %[[R]] : !s32i -// LLVM: define i32 @f2() +// LLVM: define{{.*}} i32 @f2() // LLVM-NEXT: %[[RV:.*]] = alloca i32, i64 1, align 4 // LLVM-NEXT: store i32 3, ptr %[[RV]], align 4 // LLVM-NEXT: %[[R:.*]] = load i32, ptr %[[RV]], align 4 @@ -86,7 +86,7 @@ int f3(void) { return i; } -// CIR: cir.func @f3() -> !s32i +// CIR: cir.func{{.*}} @f3() -> !s32i // CIR-NEXT: %[[RV:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CIR-NEXT: %[[I_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["i", init] {alignment = 4 : i64} // CIR-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i @@ -96,7 +96,7 @@ int f3(void) { // CIR-NEXT: %[[R:.*]] = cir.load{{.*}} %[[RV]] : !cir.ptr, !s32i // CIR-NEXT: cir.return %[[R]] : !s32i -// LLVM: define i32 @f3() +// LLVM: define{{.*}} i32 @f3() // LLVM-NEXT: %[[RV:.*]] = alloca i32, i64 1, align 4 // LLVM-NEXT: %[[I_PTR:.*]] = alloca i32, i64 1, align 4 // LLVM-NEXT: store i32 3, ptr %[[I_PTR]], align 4 @@ -117,10 +117,10 @@ void f4(void) { ; } -// CIR: cir.func @f4() +// CIR: cir.func{{.*}} @f4() // CIR-NEXT: cir.return -// LLVM: define void @f4() +// LLVM: define{{.*}} void @f4() // LLVM-NEXT: ret void // OGCG: define{{.*}} void @f4() @@ -133,7 +133,7 @@ void f5(void) { ; } -// CIR: cir.func @f5() +// CIR: cir.func{{.*}} @f5() // CIR-NEXT: cir.scope { // CIR-NEXT: cir.for : cond { // CIR-NEXT: %0 = cir.const #true @@ -147,7 +147,7 @@ void f5(void) { // CIR-NEXT: cir.return // CIR-NEXT: } -// LLVM: define void @f5() +// LLVM: define{{.*}} void @f5() // LLVM: br label %[[SCOPE:.*]] // LLVM: [[SCOPE]]: // LLVM: br label %[[LOOP:.*]] @@ -171,7 +171,7 @@ int f6(void) { return gv; } -// CIR: cir.func @f6() -> !s32i +// CIR: cir.func{{.*}} @f6() -> !s32i // CIR-NEXT: %[[RV:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CIR-NEXT: %[[GV_PTR:.*]] = cir.get_global @gv : !cir.ptr // CIR-NEXT: %[[GV:.*]] = cir.load{{.*}} %[[GV_PTR]] : !cir.ptr, !s32i @@ -179,7 +179,7 @@ int f6(void) { // CIR-NEXT: %[[R:.*]] = cir.load{{.*}} %[[RV]] : !cir.ptr, !s32i // CIR-NEXT: cir.return %[[R]] : !s32i -// LLVM: define i32 @f6() +// LLVM: define{{.*}} i32 @f6() // LLVM-NEXT: %[[RV_PTR:.*]] = alloca i32, i64 1, align 4 // LLVM-NEXT: %[[GV:.*]] = load i32, ptr @gv, align 4 // LLVM-NEXT: store i32 %[[GV]], ptr %[[RV_PTR]], align 4 @@ -195,7 +195,7 @@ int f7(int a, int b, int c) { return a + (b + c); } -// CIR: cir.func @f7 +// CIR: cir.func{{.*}} @f7 // CIR: %[[A_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CIR: %[[B_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["b", init] // CIR: %[[C_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["c", init] @@ -205,7 +205,7 @@ int f7(int a, int b, int c) { // CIR: %[[B_PLUS_C:.*]] = cir.binop(add, %[[B]], %[[C]]) nsw : !s32i // CIR: %[[RETVAL:.*]] = cir.binop(add, %[[A]], %[[B_PLUS_C]]) nsw : !s32i -// LLVM: define i32 @f7 +// LLVM: define{{.*}} i32 @f7 // LLVM: %[[A_PTR:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[B_PTR:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[C_PTR:.*]] = alloca i32, i64 1, align 4 @@ -231,7 +231,7 @@ int f8(int *p) { return (*p); } -// CIR: cir.func @f8 +// CIR: cir.func{{.*}} @f8 // CIR: %[[P_PTR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["p", init] // CIR: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i // CIR: %[[P:.*]] = cir.load deref{{.*}} %[[P_PTR]] : !cir.ptr>, !cir.ptr @@ -239,7 +239,7 @@ int f8(int *p) { // CIR: %[[P2:.*]] = cir.load deref{{.*}} %[[P_PTR]] : !cir.ptr>, !cir.ptr // CIR: %[[STAR_P:.*]] = cir.load{{.*}} %[[P2]] : !cir.ptr, !s32i -// LLVM: define i32 @f8 +// LLVM: define{{.*}} i32 @f8 // LLVM: %[[P_PTR:.*]] = alloca ptr, i64 1, align 8 // LLVM: %[[P:.*]] = load ptr, ptr %[[P_PTR]], align 8 // LLVM: store i32 2, ptr %[[P]], align 4 @@ -257,10 +257,10 @@ int f8(int *p) { void f9() {} -// CIR: cir.func @f9() +// CIR: cir.func{{.*}} @f9() // CIR-NEXT: cir.return -// LLVM: define void @f9() +// LLVM: define{{.*}} void @f9() // LLVM-NEXT: ret void // OGCG: define{{.*}} void @f9() @@ -269,12 +269,12 @@ void f9() {} void f10(int arg0, ...) {} -// CIR: cir.func @f10(%[[ARG0:.*]]: !s32i loc({{.*}}), ...) +// CIR: cir.func{{.*}} @f10(%[[ARG0:.*]]: !s32i loc({{.*}}), ...) // CIR-NEXT: %[[ARG0_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["arg0", init] {alignment = 4 : i64} // CIR-NEXT: cir.store{{.*}} %[[ARG0]], %[[ARG0_PTR]] : !s32i, !cir.ptr // CIR-NEXT: cir.return -// LLVM: define void @f10(i32 %[[ARG0:.*]], ...) +// LLVM: define{{.*}} void @f10(i32 %[[ARG0:.*]], ...) // LLVM-NEXT: %[[ARG0_PTR:.*]] = alloca i32, i64 1, align 4 // LLVM-NEXT: store i32 %[[ARG0]], ptr %[[ARG0_PTR]], align 4 // LLVM-NEXT: ret void @@ -292,7 +292,7 @@ size_type max_size(void) { return (size_type)~0 / sizeof(_Tp); } -// CIR: cir.func @max_size() +// CIR: cir.func{{.*}} @max_size() // CIR: %0 = cir.alloca !u64i, !cir.ptr, ["__retval"] {alignment = 8 : i64} // CIR: %1 = cir.const #cir.int<0> : !s32i // CIR: %2 = cir.unary(not, %1) : !s32i, !s32i @@ -300,7 +300,7 @@ size_type max_size(void) { // CIR: %4 = cir.const #cir.int<8> : !u64i // CIR: %5 = cir.binop(div, %3, %4) : !u64i -// LLVM: define i64 @max_size() +// LLVM: define{{.*}} i64 @max_size() // LLVM: store i64 2305843009213693951, ptr // OGCG: define{{.*}} i64 @max_size() @@ -315,10 +315,10 @@ void test_char_literal() { c = 'X'; } -// CIR: cir.func @test_char_literal +// CIR: cir.func{{.*}} @test_char_literal // CIR: cir.const #cir.int<88> -// LLVM: define void @test_char_literal() +// LLVM: define{{.*}} void @test_char_literal() // LLVM: store i8 88, ptr %{{.*}}, align 1 // OGCG: define{{.*}} void @test_char_literal() diff --git a/clang/test/CIR/CodeGen/basic.cpp b/clang/test/CIR/CodeGen/basic.cpp index ed1c6d364a0ef..fe6dd938f0faf 100644 --- a/clang/test/CIR/CodeGen/basic.cpp +++ b/clang/test/CIR/CodeGen/basic.cpp @@ -31,7 +31,7 @@ int f1() { return i; } -// CHECK: cir.func @_Z2f1v() -> !s32i +// CHECK: cir.func{{.*}} @_Z2f1v() -> !s32i // CHECK: %[[RV:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CHECK: %[[I_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["i"] {alignment = 4 : i64} // CHECK: %[[I:.*]] = cir.load{{.*}} %[[I_PTR]] : !cir.ptr, !s32i @@ -44,7 +44,7 @@ int f2() { return i; } -// CHECK: cir.func @_Z2f2v() -> !s32i +// CHECK: cir.func{{.*}} @_Z2f2v() -> !s32i // CHECK: %[[RV:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CHECK: %[[I_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["i", init, const] {alignment = 4 : i64} // CHECK: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i @@ -58,7 +58,7 @@ int f3(int i) { return i; } -// CHECK: cir.func @_Z2f3i(%[[ARG:.*]]: !s32i loc({{.*}})) -> !s32i +// CHECK: cir.func{{.*}} @_Z2f3i(%[[ARG:.*]]: !s32i loc({{.*}})) -> !s32i // CHECK: %[[ARG_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr, ["i", init] {alignment = 4 : i64} // CHECK: %[[RV:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CHECK: cir.store{{.*}} %[[ARG]], %[[ARG_ALLOCA]] : !s32i, !cir.ptr @@ -71,7 +71,7 @@ int f4(const int i) { return i; } -// CHECK: cir.func @_Z2f4i(%[[ARG:.*]]: !s32i loc({{.*}})) -> !s32i +// CHECK: cir.func{{.*}} @_Z2f4i(%[[ARG:.*]]: !s32i loc({{.*}})) -> !s32i // CHECK: %[[ARG_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr, ["i", init, const] {alignment = 4 : i64} // CHECK: %[[RV:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CHECK: cir.store{{.*}} %[[ARG]], %[[ARG_ALLOCA]] : !s32i, !cir.ptr @@ -91,7 +91,7 @@ int *f5() { return p; } -// CHECK: cir.func @_Z2f5v() -> !cir.ptr +// CHECK: cir.func{{.*}} @_Z2f5v() -> !cir.ptr // CHECK-NEXT: %[[RET_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["__retval"] {alignment = 8 : i64} // CHECK-NEXT: %[[P_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["p", init] {alignment = 8 : i64} // CHECK-NEXT: %[[NULLPTR:.*]] = cir.const #cir.ptr : !cir.ptr @@ -120,7 +120,7 @@ size_type max_size() { return size_type(~0) / sizeof(_Tp); } -// CHECK: cir.func @_Z8max_sizev() -> !u64i +// CHECK: cir.func{{.*}} @_Z8max_sizev() -> !u64i // CHECK: %0 = cir.alloca !u64i, !cir.ptr, ["__retval"] {alignment = 8 : i64} // CHECK: %1 = cir.const #cir.int<0> : !s32i // CHECK: %2 = cir.unary(not, %1) : !s32i, !s32i @@ -137,7 +137,7 @@ void ref_arg(int &x) { x = 3; } -// CHECK: cir.func @_Z7ref_argRi(%[[ARG:.*]]: !cir.ptr {{.*}}) +// CHECK: cir.func{{.*}} @_Z7ref_argRi(%[[ARG:.*]]: !cir.ptr {{.*}}) // CHECK: %[[X_REF_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["x", init, const] {alignment = 8 : i64} // CHECK: %[[Y_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["y", init] {alignment = 4 : i64} // CHECK: cir.store{{.*}} %[[ARG]], %[[X_REF_ADDR]] : !cir.ptr, !cir.ptr> @@ -154,7 +154,7 @@ short &return_ref() { return gs; } -// CHECK: cir.func @_Z10return_refv() -> !cir.ptr +// CHECK: cir.func{{.*}} @_Z10return_refv() -> !cir.ptr // CHECK: %[[RETVAL_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["__retval"] {alignment = 8 : i64} // CHECK: %[[GS_ADDR:.*]] = cir.get_global @gs : !cir.ptr // CHECK: cir.store{{.*}} %[[GS_ADDR]], %[[RETVAL_ADDR]] : !cir.ptr, !cir.ptr> @@ -165,7 +165,7 @@ void ref_local(short x) { short &y = x; } -// CHECK: cir.func @_Z9ref_locals(%[[ARG:.*]]: !s16i {{.*}}) +// CHECK: cir.func{{.*}} @_Z9ref_locals(%[[ARG:.*]]: !s16i {{.*}}) // CHECK: %[[X_ADDR:.*]] = cir.alloca !s16i, !cir.ptr, ["x", init] {alignment = 2 : i64} // CHECK: %[[Y_REF_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["y", init, const] {alignment = 8 : i64} // CHECK: cir.store{{.*}} %[[ARG]], %[[X_ADDR]] : !s16i, !cir.ptr diff --git a/clang/test/CIR/CodeGen/binassign.c b/clang/test/CIR/CodeGen/binassign.c index 4955c988ec095..541b50a664c0e 100644 --- a/clang/test/CIR/CodeGen/binassign.c +++ b/clang/test/CIR/CodeGen/binassign.c @@ -17,7 +17,7 @@ void binary_assign(void) { i = 42; } -// CIR-LABEL: cir.func @binary_assign() { +// CIR-LABEL: cir.func{{.*}} @binary_assign() { // CIR: %[[B:.*]] = cir.alloca !cir.bool, !cir.ptr, ["b"] // CIR: %[[C:.*]] = cir.alloca !s8i, !cir.ptr, ["c"] // CIR: %[[F:.*]] = cir.alloca !cir.float, !cir.ptr, ["f"] diff --git a/clang/test/CIR/CodeGen/binop.cpp b/clang/test/CIR/CodeGen/binop.cpp index c728f0d0c1bc1..847e81755939f 100644 --- a/clang/test/CIR/CodeGen/binop.cpp +++ b/clang/test/CIR/CodeGen/binop.cpp @@ -16,7 +16,7 @@ void b0(int a, int b) { x = x | b; } -// CIR-LABEL: cir.func @_Z2b0ii( +// CIR-LABEL: cir.func{{.*}} @_Z2b0ii( // CIR: %{{.+}} = cir.binop(mul, %{{.+}}, %{{.+}}) nsw : !s32i // CIR: %{{.+}} = cir.binop(div, %{{.+}}, %{{.+}}) : !s32i // CIR: %{{.+}} = cir.binop(rem, %{{.+}}, %{{.+}}) : !s32i @@ -27,7 +27,7 @@ void b0(int a, int b) { // CIR: %{{.+}} = cir.binop(or, %{{.+}}, %{{.+}}) : !s32i // CIR: cir.return -// LLVM-LABEL: define void @_Z2b0ii( +// LLVM-LABEL: define{{.*}} void @_Z2b0ii( // LLVM-SAME: i32 %[[A:.*]], i32 %[[B:.*]]) // LLVM: %[[A_ADDR:.*]] = alloca i32 // LLVM: %[[B_ADDR:.*]] = alloca i32 @@ -77,7 +77,7 @@ void b0(int a, int b) { // LLVM: ret void -// OGCG-LABEL: define dso_local void @_Z2b0ii(i32 {{.*}} %a, i32 {{.*}} %b) {{.*}} { +// OGCG-LABEL: define{{.*}} void @_Z2b0ii(i32 {{.*}} %a, i32 {{.*}} %b) {{.*}} { // OGCG: %[[A_ADDR:.*]] = alloca i32 // OGCG: %[[B_ADDR:.*]] = alloca i32 // OGCG: %[[X:.*]] = alloca i32 @@ -133,14 +133,14 @@ void testFloatingPointBinOps(float a, float b) { a - b; } -// CIR-LABEL: cir.func @_Z23testFloatingPointBinOpsff( +// CIR-LABEL: cir.func{{.*}} @_Z23testFloatingPointBinOpsff( // CIR: cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.float // CIR: cir.binop(div, %{{.+}}, %{{.+}}) : !cir.float // CIR: cir.binop(add, %{{.+}}, %{{.+}}) : !cir.float // CIR: cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.float // CIR: cir.return -// LLVM-LABEL: define void @_Z23testFloatingPointBinOpsff( +// LLVM-LABEL: define{{.*}} void @_Z23testFloatingPointBinOpsff( // LLVM-SAME: float %[[A:.*]], float %[[B:.*]]) // LLVM: %[[A_ADDR:.*]] = alloca float, i64 1 // LLVM: %[[B_ADDR:.*]] = alloca float, i64 1 @@ -165,7 +165,7 @@ void testFloatingPointBinOps(float a, float b) { // LLVM: ret void -// OGCG-LABEL: define dso_local void @_Z23testFloatingPointBinOpsff(float {{.*}} %a, float {{.*}} %b) +// OGCG-LABEL: define{{.*}} void @_Z23testFloatingPointBinOpsff(float {{.*}} %a, float {{.*}} %b) // OGCG: %a.addr = alloca float // OGCG: %b.addr = alloca float // OGCG: store float %a, ptr %a.addr @@ -194,7 +194,7 @@ void signed_shift(int a, int b) { x = a << b; } -// CIR-LABEL: cir.func @_Z12signed_shiftii( +// CIR-LABEL: cir.func{{.*}} @_Z12signed_shiftii( // CIR-SAME: %[[ARG0:.*]]: !s32i{{.*}}, %[[ARG1:.*]]: !s32i{{.*}}) // CIR: %[[A_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CIR: %[[B_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["b", init] @@ -215,7 +215,7 @@ void signed_shift(int a, int b) { // CIR: cir.return -// LLVM-LABEL: define void @_Z12signed_shiftii +// LLVM-LABEL: define{{.*}} void @_Z12signed_shiftii // LLVM-SAME: (i32 %[[A:.*]], i32 %[[B:.*]]) // LLVM: %[[A_ADDR:.*]] = alloca i32 // LLVM: %[[B_ADDR:.*]] = alloca i32 @@ -235,7 +235,7 @@ void signed_shift(int a, int b) { // LLVM: ret void -// OGCG-LABEL: define dso_local void @_Z12signed_shiftii +// OGCG-LABEL: define{{.*}} void @_Z12signed_shiftii // OGCG-SAME: (i32 {{.*}} %[[A:.*]], i32 {{.*}} %[[B:.*]]) // OGCG: %[[A_ADDR:.*]] = alloca i32 // OGCG: %[[B_ADDR:.*]] = alloca i32 @@ -260,7 +260,7 @@ void unsigned_shift(unsigned a, unsigned b) { x = a << b; } -// CIR-LABEL: cir.func @_Z14unsigned_shiftjj( +// CIR-LABEL: cir.func{{.*}} @_Z14unsigned_shiftjj( // CIR-SAME: %[[ARG0:.*]]: !u32i{{.*}}, %[[ARG1:.*]]: !u32i{{.*}}) // CIR: %[[A_PTR:.*]] = cir.alloca !u32i, !cir.ptr, ["a", init] // CIR: %[[B_PTR:.*]] = cir.alloca !u32i, !cir.ptr, ["b", init] @@ -281,7 +281,7 @@ void unsigned_shift(unsigned a, unsigned b) { // CIR: cir.return -// LLVM-LABEL: define void @_Z14unsigned_shiftjj +// LLVM-LABEL: define{{.*}} void @_Z14unsigned_shiftjj // LLVM-SAME: (i32 %[[A:.*]], i32 %[[B:.*]]) // LLVM: %[[A_ADDR:.*]] = alloca i32 // LLVM: %[[B_ADDR:.*]] = alloca i32 @@ -301,7 +301,7 @@ void unsigned_shift(unsigned a, unsigned b) { // LLVM: ret void -// OGCG-LABEL: define dso_local void @_Z14unsigned_shiftjj +// OGCG-LABEL: define{{.*}} void @_Z14unsigned_shiftjj // OGCG-SAME: (i32 {{.*}} %[[A:.*]], i32 {{.*}} %[[B:.*]]) // OGCG: %[[A_ADDR:.*]] = alloca i32 // OGCG: %[[B_ADDR:.*]] = alloca i32 @@ -326,7 +326,7 @@ void zext_shift_example(int a, unsigned char b) { x = a << b; } -// CIR-LABEL: cir.func @_Z18zext_shift_exampleih( +// CIR-LABEL: cir.func{{.*}} @_Z18zext_shift_exampleih( // CIR-SAME: %[[ARG0:.*]]: !s32i{{.*}}, %[[ARG1:.*]]: !u8i{{.*}}) // CIR: %[[A_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CIR: %[[B_PTR:.*]] = cir.alloca !u8i, !cir.ptr, ["b", init] @@ -349,7 +349,7 @@ void zext_shift_example(int a, unsigned char b) { // CIR: cir.return -// LLVM-LABEL: define void @_Z18zext_shift_exampleih +// LLVM-LABEL: define{{.*}} void @_Z18zext_shift_exampleih // LLVM-SAME: (i32 %[[A:.*]], i8 %[[B:.*]]) // LLVM: %[[A_ADDR:.*]] = alloca i32 // LLVM: %[[B_ADDR:.*]] = alloca i8 @@ -371,7 +371,7 @@ void zext_shift_example(int a, unsigned char b) { // LLVM: ret void -// OGCG-LABEL: define dso_local void @_Z18zext_shift_exampleih +// OGCG-LABEL: define{{.*}} void @_Z18zext_shift_exampleih // OGCG-SAME: (i32 {{.*}} %[[A:.*]], i8 {{.*}} %[[B:.*]]) // OGCG: %[[A_ADDR:.*]] = alloca i32 // OGCG: %[[B_ADDR:.*]] = alloca i8 @@ -398,7 +398,7 @@ void sext_shift_example(int a, signed char b) { x = a << b; } -// CIR-LABEL: cir.func @_Z18sext_shift_exampleia( +// CIR-LABEL: cir.func{{.*}} @_Z18sext_shift_exampleia( // CIR-SAME: %[[ARG0:.*]]: !s32i{{.*}}, %[[ARG1:.*]]: !s8i{{.*}}) // CIR: %[[A_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CIR: %[[B_PTR:.*]] = cir.alloca !s8i, !cir.ptr, ["b", init] @@ -421,7 +421,7 @@ void sext_shift_example(int a, signed char b) { // CIR: cir.return -// LLVM-LABEL: define void @_Z18sext_shift_exampleia +// LLVM-LABEL: define{{.*}} void @_Z18sext_shift_exampleia // LLVM-SAME: (i32 %[[A:.*]], i8 %[[B:.*]]) // LLVM: %[[A_ADDR:.*]] = alloca i32 // LLVM: %[[B_ADDR:.*]] = alloca i8 @@ -443,7 +443,7 @@ void sext_shift_example(int a, signed char b) { // LLVM: ret void -// OGCG-LABEL: define dso_local void @_Z18sext_shift_exampleia +// OGCG-LABEL: define{{.*}} void @_Z18sext_shift_exampleia // OGCG-SAME: (i32 {{.*}} %[[A:.*]], i8 {{.*}} %[[B:.*]]) // OGCG: %[[A_ADDR:.*]] = alloca i32 // OGCG: %[[B_ADDR:.*]] = alloca i8 @@ -470,7 +470,7 @@ void long_shift_example(long long a, short b) { x = a << b; } -// CIR-LABEL: cir.func @_Z18long_shift_examplexs( +// CIR-LABEL: cir.func{{.*}} @_Z18long_shift_examplexs( // CIR-SAME: %[[ARG0:.*]]: !s64i{{.*}}, %[[ARG1:.*]]: !s16i{{.*}}) // CIR: %[[A_PTR:.*]] = cir.alloca !s64i, !cir.ptr, ["a", init] // CIR: %[[B_PTR:.*]] = cir.alloca !s16i, !cir.ptr, ["b", init] @@ -493,7 +493,7 @@ void long_shift_example(long long a, short b) { // CIR: cir.return -// LLVM-LABEL: define void @_Z18long_shift_examplexs +// LLVM-LABEL: define{{.*}} void @_Z18long_shift_examplexs // LLVM-SAME: (i64 %[[A:.*]], i16 %[[B:.*]]) // LLVM: %[[A_ADDR:.*]] = alloca i64 // LLVM: %[[B_ADDR:.*]] = alloca i16 @@ -517,7 +517,7 @@ void long_shift_example(long long a, short b) { // LLVM: ret void -// OGCG-LABEL: define dso_local void @_Z18long_shift_examplexs +// OGCG-LABEL: define{{.*}} void @_Z18long_shift_examplexs // OGCG-SAME: (i64 {{.*}} %[[A:.*]], i16 {{.*}} %[[B:.*]]) // OGCG: %[[A_ADDR:.*]] = alloca i64 // OGCG: %[[B_ADDR:.*]] = alloca i16 @@ -546,7 +546,7 @@ void b1(bool a, bool b) { x = x || b; } -// CIR-LABEL: cir.func @_Z2b1bb( +// CIR-LABEL: cir.func{{.*}} @_Z2b1bb( // CIR-SAME: %[[ARG0:.*]]: !cir.bool {{.*}}, %[[ARG1:.*]]: !cir.bool {{.*}}) // CIR: [[A:%[0-9]+]] = cir.alloca !cir.bool, !cir.ptr, ["a", init] // CIR: [[B:%[0-9]+]] = cir.alloca !cir.bool, !cir.ptr, ["b", init] @@ -574,7 +574,7 @@ void b1(bool a, bool b) { // CIR: cir.return -// LLVM-LABEL: define void @_Z2b1bb( +// LLVM-LABEL: define{{.*}} void @_Z2b1bb( // LLVM-SAME: i1 %[[ARG0:.+]], i1 %[[ARG1:.+]]) // LLVM: %[[A_ADDR:.*]] = alloca i8 // LLVM: %[[B_ADDR:.*]] = alloca i8 @@ -611,7 +611,7 @@ void b1(bool a, bool b) { // LLVM: store i8 %[[ZEXT_OR]], ptr %[[X]] // LLVM: ret void -// OGCG-LABEL: define dso_local void @_Z2b1bb +// OGCG-LABEL: define{{.*}} void @_Z2b1bb // OGCG-SAME: (i1 {{.*}} %[[ARG0:.+]], i1 {{.*}} %[[ARG1:.+]]) // OGCG: [[ENTRY:.*]]: // OGCG: %[[A_ADDR:.*]] = alloca i8 @@ -650,7 +650,7 @@ void b3(int a, int b, int c, int d) { x = (a == b) || (c == d); } -// CIR-LABEL: cir.func @_Z2b3iiii( +// CIR-LABEL: cir.func{{.*}} @_Z2b3iiii( // CIR-SAME: %[[ARG0:.*]]: !s32i {{.*}}, %[[ARG1:.*]]: !s32i {{.*}}, %[[ARG2:.*]]: !s32i {{.*}}, %[[ARG3:.*]]: !s32i {{.*}}) // CIR: [[A:%[0-9]+]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CIR: [[B:%[0-9]+]] = cir.alloca !s32i, !cir.ptr, ["b", init] @@ -690,7 +690,7 @@ void b3(int a, int b, int c, int d) { // CIR: cir.return -// LLVM-LABEL: define void @_Z2b3iiii( +// LLVM-LABEL: define{{.*}} void @_Z2b3iiii( // LLVM-SAME: i32 %[[ARG0:.+]], i32 %[[ARG1:.+]], i32 %[[ARG2:.+]], i32 %[[ARG3:.+]]) // LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1 // LLVM: %[[B_ADDR:.*]] = alloca i32, i64 1 @@ -733,7 +733,7 @@ void b3(int a, int b, int c, int d) { // LLVM: store i8 %[[ZEXT_OR]], ptr %[[X]] // LLVM: ret void -// OGCG-LABEL: define dso_local void @_Z2b3iiii( +// OGCG-LABEL: define{{.*}} void @_Z2b3iiii( // OGCG-SAME: i32 {{.*}} %[[ARG0:.+]], i32 {{.*}} %[[ARG1:.+]], i32 {{.*}} %[[ARG2:.+]], i32 {{.*}} %[[ARG3:.+]]) // OGCG: [[ENTRY:.*]]: // OGCG: %[[A_ADDR:.*]] = alloca i32 @@ -771,4 +771,4 @@ void b3(int a, int b, int c, int d) { // OGCG: %[[OR_PHI:.*]] = phi i1 [ true, %[[AND_MERGE]] ], [ %[[CMP4]], %[[OR_FALSE]] ] // OGCG: %[[ZEXT_OR:.*]] = zext i1 %[[OR_PHI]] to i8 // OGCG: store i8 %[[ZEXT_OR]], ptr %[[X]] -// OGCG: ret void \ No newline at end of file +// OGCG: ret void diff --git a/clang/test/CIR/CodeGen/builtin_call.cpp b/clang/test/CIR/CodeGen/builtin_call.cpp index bbe5e36b8bd99..b956f2580593e 100644 --- a/clang/test/CIR/CodeGen/builtin_call.cpp +++ b/clang/test/CIR/CodeGen/builtin_call.cpp @@ -27,7 +27,7 @@ int is_constant_evaluated() { return __builtin_is_constant_evaluated(); } -// CIR: cir.func @_Z21is_constant_evaluatedv() -> !s32i +// CIR: cir.func{{.*}} @_Z21is_constant_evaluatedv() -> !s32i // CIR: %[[ZERO:.+]] = cir.const #cir.int<0> // LLVM: define {{.*}}i32 @_Z21is_constant_evaluatedv() @@ -45,7 +45,7 @@ long double constant_fp_builtin_ld() { return __builtin_fabsl(-0.1L); } -// CIR: cir.func @_Z22constant_fp_builtin_ldv() -> !cir.long_double +// CIR: cir.func{{.*}} @_Z22constant_fp_builtin_ldv() -> !cir.long_double // CIR: %[[PONE:.+]] = cir.const #cir.fp<1.000000e-01> : !cir.long_double // LLVM: define {{.*}}x86_fp80 @_Z22constant_fp_builtin_ldv() @@ -63,7 +63,7 @@ float constant_fp_builtin_single() { return __builtin_fabsf(-0.1f); } -// CIR: cir.func @_Z26constant_fp_builtin_singlev() -> !cir.float +// CIR: cir.func{{.*}} @_Z26constant_fp_builtin_singlev() -> !cir.float // CIR: %[[PONE:.+]] = cir.const #cir.fp<1.000000e-01> : !cir.float // LLVM: define {{.*}}float @_Z26constant_fp_builtin_singlev() @@ -82,16 +82,16 @@ void library_builtins() { __builtin_abort(); } -// CIR: cir.func @_Z16library_builtinsv() { +// CIR: cir.func{{.*}} @_Z16library_builtinsv() { // CIR: %[[NULL:.+]] = cir.const #cir.ptr : !cir.ptr // CIR: cir.call @printf(%[[NULL]]) : (!cir.ptr) -> !s32i // CIR: cir.call @abort() : () -> () -// LLVM: define void @_Z16library_builtinsv() +// LLVM: define{{.*}} void @_Z16library_builtinsv() // LLVM: call i32 (ptr, ...) @printf(ptr null) // LLVM: call void @abort() -// OGCG: define dso_local void @_Z16library_builtinsv() +// OGCG: define{{.*}} void @_Z16library_builtinsv() // OGCG: call i32 (ptr, ...) @printf(ptr noundef null) // OGCG: call void @abort() @@ -99,11 +99,11 @@ void assume(bool arg) { __builtin_assume(arg); } -// CIR: cir.func @_Z6assumeb +// CIR: cir.func{{.*}} @_Z6assumeb // CIR: cir.assume %{{.+}} : !cir.bool // CIR: } -// LLVM: define void @_Z6assumeb +// LLVM: define {{.*}}void @_Z6assumeb // LLVM: call void @llvm.assume(i1 %{{.+}}) // LLVM: } @@ -115,7 +115,7 @@ void expect(int x, int y) { __builtin_expect(x, y); } -// CIR-LABEL: cir.func @_Z6expectii +// CIR-LABEL: cir.func{{.*}} @_Z6expectii // CIR: %[[X:.+]] = cir.load align(4) %{{.+}} : !cir.ptr, !s32i // CIR-NEXT: %[[X_LONG:.+]] = cir.cast(integral, %[[X]] : !s32i), !s64i // CIR-NEXT: %[[Y:.+]] = cir.load align(4) %{{.+}} : !cir.ptr, !s32i @@ -123,7 +123,7 @@ void expect(int x, int y) { // CIR-NEXT: %{{.+}} = cir.expect(%[[X_LONG]], %[[Y_LONG]]) : !s64i // CIR: } -// LLVM-LABEL: define void @_Z6expectii +// LLVM-LABEL: define{{.*}} void @_Z6expectii // LLVM: %[[X:.+]] = load i32, ptr %{{.+}}, align 4 // LLVM-NEXT: %[[X_LONG:.+]] = sext i32 %[[X]] to i64 // LLVM-NEXT: %[[Y:.+]] = load i32, ptr %{{.+}}, align 4 @@ -135,7 +135,7 @@ void expect_prob(int x, int y) { __builtin_expect_with_probability(x, y, 0.25); } -// CIR-LABEL: cir.func @_Z11expect_probii +// CIR-LABEL: cir.func{{.*}} @_Z11expect_probii // CIR: %[[X:.+]] = cir.load align(4) %{{.+}} : !cir.ptr, !s32i // CIR-NEXT: %[[X_LONG:.+]] = cir.cast(integral, %[[X]] : !s32i), !s64i // CIR-NEXT: %[[Y:.+]] = cir.load align(4) %{{.+}} : !cir.ptr, !s32i @@ -143,7 +143,7 @@ void expect_prob(int x, int y) { // CIR-NEXT: %{{.+}} = cir.expect(%[[X_LONG]], %[[Y_LONG]], 2.500000e-01) : !s64i // CIR: } -// LLVM: define void @_Z11expect_probii +// LLVM: define{{.*}} void @_Z11expect_probii // LLVM: %[[X:.+]] = load i32, ptr %{{.+}}, align 4 // LLVM-NEXT: %[[X_LONG:.+]] = sext i32 %[[X]] to i64 // LLVM-NEXT: %[[Y:.+]] = load i32, ptr %{{.+}}, align 4 diff --git a/clang/test/CIR/CodeGen/builtin_printf.cpp b/clang/test/CIR/CodeGen/builtin_printf.cpp index 35c71eba86874..d12f822d43ebf 100644 --- a/clang/test/CIR/CodeGen/builtin_printf.cpp +++ b/clang/test/CIR/CodeGen/builtin_printf.cpp @@ -18,9 +18,9 @@ void func(char const * const str, int i) { __builtin_printf("%s %d\n", str, i); } -// CIR: cir.func @printf(!cir.ptr, ...) -> !s32i +// CIR: cir.func{{.*}} @printf(!cir.ptr, ...) -> !s32i -// CIR: cir.func @_Z4funcPKci(%[[arg0:.+]]: !cir.ptr{{.*}}, %[[arg1:.+]]: !s32i{{.*}}) { +// CIR: cir.func{{.*}} @_Z4funcPKci(%[[arg0:.+]]: !cir.ptr{{.*}}, %[[arg1:.+]]: !s32i{{.*}}) { // CIR: %[[str_ptr:.+]] = cir.alloca !cir.ptr, !cir.ptr>, ["str", init, const] // CIR: %[[i_ptr:.+]] = cir.alloca !s32i, !cir.ptr, ["i", init] // CIR: cir.store %[[arg0]], %[[str_ptr]] : !cir.ptr, !cir.ptr> @@ -38,7 +38,7 @@ void func(char const * const str, int i) { // CIR: %[[printf_result3:.+]] = cir.call @printf(%[[full_fmt_ptr]], %[[str_val2]], %[[i_val]]) : (!cir.ptr, !cir.ptr, !s32i) -> !s32i // CIR: cir.return -// LLVM: define void @_Z4funcPKci(ptr %[[arg0:.+]], i32 %[[arg1:.+]]) +// LLVM: define{{.*}} void @_Z4funcPKci(ptr %[[arg0:.+]], i32 %[[arg1:.+]]) // LLVM: %[[str_ptr:.+]] = alloca ptr // LLVM: %[[i_ptr:.+]] = alloca i32 // LLVM: store ptr %[[arg0]], ptr %[[str_ptr]]{{.*}} @@ -51,7 +51,7 @@ void func(char const * const str, int i) { // LLVM: %[[printf_result3:.+]] = call i32 (ptr, ...) @printf(ptr @.str.1, ptr %[[str_val2]], i32 %[[i_val]]) // LLVM: ret void -// OGCG: define dso_local void @_Z4funcPKci(ptr noundef %[[arg0:.+]], i32 noundef %[[arg1:.+]]) +// OGCG: define{{.*}} void @_Z4funcPKci(ptr noundef %[[arg0:.+]], i32 noundef %[[arg1:.+]]) // OGCG: %[[str_ptr:.+]] = alloca ptr // OGCG: %[[i_ptr:.+]] = alloca i32 // OGCG: store ptr %[[arg0]], ptr %[[str_ptr]]{{.*}} diff --git a/clang/test/CIR/CodeGen/call.c b/clang/test/CIR/CodeGen/call.c index f6aa41df7439e..83a66fca638c2 100644 --- a/clang/test/CIR/CodeGen/call.c +++ b/clang/test/CIR/CodeGen/call.c @@ -16,15 +16,15 @@ void f2() { f1(s); } -// CIR-LABEL: cir.func @f2() +// CIR-LABEL: cir.func{{.*}} @f2() // CIR: %[[S:.+]] = cir.load align(4) %{{.+}} : !cir.ptr, !rec_S // CIR-NEXT: cir.call @f1(%[[S]]) : (!rec_S) -> () -// LLVM-LABEL: define void @f2() +// LLVM-LABEL: define{{.*}} void @f2() // LLVM: %[[S:.+]] = load %struct.S, ptr %{{.+}}, align 4 // LLVM-NEXT: call void @f1(%struct.S %[[S]]) -// OGCG-LABEL: define dso_local void @f2() +// OGCG-LABEL: define{{.*}} void @f2() // OGCG: %[[S:.+]] = load i64, ptr %{{.+}}, align 4 // OGCG-NEXT: call void @f1(i64 %[[S]]) @@ -33,15 +33,15 @@ void f4() { struct S s = f3(); } -// CIR-LABEL: cir.func @f4() { +// CIR-LABEL: cir.func{{.*}} @f4() { // CIR: %[[S:.+]] = cir.call @f3() : () -> !rec_S // CIR-NEXT: cir.store align(4) %[[S]], %{{.+}} : !rec_S, !cir.ptr -// LLVM-LABEL: define void @f4() { +// LLVM-LABEL: define{{.*}} void @f4() { // LLVM: %[[S:.+]] = call %struct.S (...) @f3() // LLVM-NEXT: store %struct.S %[[S]], ptr %{{.+}}, align 4 -// OGCG-LABEL: define dso_local void @f4() #0 { +// OGCG-LABEL: define{{.*}} void @f4() #0 { // OGCG: %[[S:.+]] = call i64 (...) @f3() // OGCG-NEXT: store i64 %[[S]], ptr %{{.+}}, align 4 @@ -57,15 +57,15 @@ void f7() { f5(b); } -// CIR-LABEL: cir.func @f7() +// CIR-LABEL: cir.func{{.*}} @f7() // CIR: %[[B:.+]] = cir.load align(4) %{{.+}} : !cir.ptr, !rec_Big // CIR-NEXT: cir.call @f5(%[[B]]) : (!rec_Big) -> () -// LLVM-LABEL: define void @f7() { +// LLVM-LABEL: define{{.*}} void @f7() { // LLVM: %[[B:.+]] = load %struct.Big, ptr %{{.+}}, align 4 // LLVM-NEXT: call void @f5(%struct.Big %[[B]]) -// OGCG-LABEL: define dso_local void @f7() #0 { +// OGCG-LABEL: define{{.*}} void @f7() #0 { // OGCG: %[[B:.+]] = alloca %struct.Big, align 8 // OGCG-NEXT: call void @f5(ptr noundef byval(%struct.Big) align 8 %[[B]]) @@ -73,15 +73,15 @@ void f8() { struct Big b = f6(); } -// CIR-LABEL: cir.func @f8() +// CIR-LABEL: cir.func{{.*}} @f8() // CIR: %[[B:.+]] = cir.call @f6() : () -> !rec_Big // CIR: cir.store align(4) %[[B]], %{{.+}} : !rec_Big, !cir.ptr -// LLVM-LABEL: define void @f8() { +// LLVM-LABEL: define{{.*}} void @f8() { // LLVM: %[[B:.+]] = call %struct.Big (...) @f6() // LLVM-NEXT: store %struct.Big %[[B]], ptr %{{.+}}, align 4 -// OGCG-LABEL: define dso_local void @f8() #0 { +// OGCG-LABEL: define{{.*}} void @f8() #0 { // OGCG: %[[B:.+]] = alloca %struct.Big, align 4 // OGCG-NEXT: call void (ptr, ...) @f6(ptr dead_on_unwind writable sret(%struct.Big) align 4 %[[B]]) @@ -89,21 +89,21 @@ void f9() { f1(f3()); } -// CIR-LABEL: cir.func @f9() +// CIR-LABEL: cir.func{{.*}} @f9() // CIR: %[[SLOT:.+]] = cir.alloca !rec_S, !cir.ptr, ["agg.tmp0"] {alignment = 4 : i64} // CIR-NEXT: %[[RET:.+]] = cir.call @f3() : () -> !rec_S // CIR-NEXT: cir.store align(4) %[[RET]], %[[SLOT]] : !rec_S, !cir.ptr // CIR-NEXT: %[[ARG:.+]] = cir.load align(4) %[[SLOT]] : !cir.ptr, !rec_S // CIR-NEXT: cir.call @f1(%[[ARG]]) : (!rec_S) -> () -// LLVM-LABEL: define void @f9() { +// LLVM-LABEL: define{{.*}} void @f9() { // LLVM: %[[SLOT:.+]] = alloca %struct.S, i64 1, align 4 // LLVM-NEXT: %[[RET:.+]] = call %struct.S (...) @f3() // LLVM-NEXT: store %struct.S %[[RET]], ptr %[[SLOT]], align 4 // LLVM-NEXT: %[[ARG:.+]] = load %struct.S, ptr %[[SLOT]], align 4 // LLVM-NEXT: call void @f1(%struct.S %[[ARG]]) -// OGCG-LABEL: define dso_local void @f9() #0 { +// OGCG-LABEL: define{{.*}} void @f9() #0 { // OGCG: %[[SLOT:.+]] = alloca %struct.S, align 4 // OGCG-NEXT: %[[RET:.+]] = call i64 (...) @f3() // OGCG-NEXT: store i64 %[[RET]], ptr %[[SLOT]], align 4 @@ -116,17 +116,17 @@ int f12(void) { return f10(1) + f11(2); } -// CIR-LABEL: cir.func @f12() -> !s32i +// CIR-LABEL: cir.func{{.*}} @f12() -> !s32i // CIR: %[[A:.+]] = cir.const #cir.int<1> : !s32i // CIR-NEXT: %{{.+}} = cir.call @f10(%[[A]]) side_effect(pure) : (!s32i) -> !s32i // CIR-NEXT: %[[B:.+]] = cir.const #cir.int<2> : !s32i // CIR-NEXT: %{{.+}} = cir.call @f11(%[[B]]) side_effect(const) : (!s32i) -> !s32i -// LLVM-LABEL: define i32 @f12() +// LLVM-LABEL: define{{.*}} i32 @f12() // LLVM: %{{.+}} = call i32 @f10(i32 1) #[[ATTR0:.+]] // LLVM-NEXT: %{{.+}} = call i32 @f11(i32 2) #[[ATTR1:.+]] -// OGCG-LABEL: define dso_local i32 @f12() +// OGCG-LABEL: define{{.*}} i32 @f12() // OGCG: %{{.+}} = call i32 @f10(i32 noundef 1) #[[ATTR0:.+]] // OGCG-NEXT: %{{.+}} = call i32 @f11(i32 noundef 2) #[[ATTR1:.+]] diff --git a/clang/test/CIR/CodeGen/call.cpp b/clang/test/CIR/CodeGen/call.cpp index cc25afce1e5a4..f7653ed7a572b 100644 --- a/clang/test/CIR/CodeGen/call.cpp +++ b/clang/test/CIR/CodeGen/call.cpp @@ -8,11 +8,11 @@ void f2() { f1(); } -// CIR-LABEL: cir.func @_Z2f1v -// CIR-LABEL: cir.func @_Z2f2v +// CIR-LABEL: cir.func{{.*}} @_Z2f1v +// CIR-LABEL: cir.func{{.*}} @_Z2f2v // CIR: cir.call @_Z2f1v() : () -> () -// LLVM-LABEL: define void @_Z2f2v() { +// LLVM-LABEL: define{{.*}} void @_Z2f2v() { // LLVM: call void @_Z2f1v() int f3() { return 2; } @@ -21,11 +21,11 @@ int f4() { return x; } -// CIR-LABEL: cir.func @_Z2f3v() -> !s32i -// CIR-LABEL: cir.func @_Z2f4v() -> !s32i +// CIR-LABEL: cir.func{{.*}} @_Z2f3v() -> !s32i +// CIR-LABEL: cir.func{{.*}} @_Z2f4v() -> !s32i // CIR: cir.call @_Z2f3v() : () -> !s32i -// LLVM-LABEL: define i32 @_Z2f4v() { +// LLVM-LABEL: define{{.*}} i32 @_Z2f4v() { // LLVM: %{{.+}} = call i32 @_Z2f3v() int f5(int a, int *b, bool c); @@ -34,26 +34,26 @@ int f6() { return f5(2, &b, false); } -// CIR-LABEL: cir.func @_Z2f6v() -> !s32i +// CIR-LABEL: cir.func{{.*}} @_Z2f6v() -> !s32i // CIR: %[[#b:]] = cir.alloca !s32i, !cir.ptr, ["b", init] // CIR: %[[#a:]] = cir.const #cir.int<2> : !s32i // CIR-NEXT: %[[#c:]] = cir.const #false // CIR-NEXT: %{{.+}} = cir.call @_Z2f5iPib(%[[#a]], %[[#b:]], %[[#c]]) : (!s32i, !cir.ptr, !cir.bool) -> !s32i -// LLVM-LABEL: define i32 @_Z2f6v() { +// LLVM-LABEL: define{{.*}} i32 @_Z2f6v() { // LLVM: %{{.+}} = call i32 @_Z2f5iPib(i32 2, ptr %{{.+}}, i1 false) int f7(int (*ptr)(int, int)) { return ptr(1, 2); } -// CIR-LABEL: cir.func @_Z2f7PFiiiE +// CIR-LABEL: cir.func{{.*}} @_Z2f7PFiiiE // CIR: %[[#ptr:]] = cir.load{{.*}} %{{.+}} : !cir.ptr !s32i>>>, !cir.ptr !s32i>> // CIR-NEXT: %[[#a:]] = cir.const #cir.int<1> : !s32i // CIR-NEXT: %[[#b:]] = cir.const #cir.int<2> : !s32i // CIR-NEXT: %{{.+}} = cir.call %[[#ptr]](%[[#a]], %[[#b]]) : (!cir.ptr !s32i>>, !s32i, !s32i) -> !s32i -// LLVM-LABEL: define i32 @_Z2f7PFiiiE +// LLVM-LABEL: define{{.*}} i32 @_Z2f7PFiiiE // LLVM: %[[#ptr:]] = load ptr, ptr %{{.+}} // LLVM-NEXT: %{{.+}} = call i32 %[[#ptr]](i32 1, i32 2) @@ -63,11 +63,11 @@ void f9() { f8(1, 2, 3, 4); } -// CIR-LABEL: cir.func @_Z2f9v() +// CIR-LABEL: cir.func{{.*}} @_Z2f9v() // CIR: cir.call @_Z2f8iz(%{{.+}}) : (!s32i) -> () // CIR: cir.call @_Z2f8iz(%{{.+}}, %{{.+}}, %{{.+}}, %{{.+}}) : (!s32i, !s32i, !s32i, !s32i) -> () -// LLVM-LABEL: define void @_Z2f9v() +// LLVM-LABEL: define{{.*}} void @_Z2f9v() // LLVM: call void (i32, ...) @_Z2f8iz(i32 1) // LLVM: call void (i32, ...) @_Z2f8iz(i32 1, i32 2, i32 3, i32 4) @@ -81,11 +81,11 @@ void f11() { S s = f10(); } -// CIR-LABEL: cir.func @_Z3f11v() +// CIR-LABEL: cir.func{{.*}} @_Z3f11v() // CIR: %[[#s:]] = cir.call @_Z3f10v() : () -> !rec_S // CIR-NEXT: cir.store align(4) %[[#s]], %{{.+}} : !rec_S, !cir.ptr -// LLVM-LABEL: define void @_Z3f11v() +// LLVM-LABEL: define{{.*}} void @_Z3f11v() // LLVM: %[[#s:]] = call %struct.S @_Z3f10v() // LLVM-NEXT: store %struct.S %[[#s]], ptr %{{.+}}, align 4 @@ -93,12 +93,12 @@ void f12() { f10(); } -// CIR-LABEL: cir.func @_Z3f12v() +// CIR-LABEL: cir.func{{.*}} @_Z3f12v() // CIR: %[[#slot:]] = cir.alloca !rec_S, !cir.ptr, ["agg.tmp0"] // CIR-NEXT: %[[#ret:]] = cir.call @_Z3f10v() : () -> !rec_S // CIR-NEXT: cir.store align(4) %[[#ret]], %[[#slot]] : !rec_S, !cir.ptr -// LLVM-LABEL: define void @_Z3f12v() { +// LLVM-LABEL: define{{.*}} void @_Z3f12v() { // LLVM: %[[#slot:]] = alloca %struct.S, i64 1, align 4 // LLVM-NEXT: %[[#ret:]] = call %struct.S @_Z3f10v() // LLVM-NEXT: store %struct.S %[[#ret]], ptr %[[#slot]], align 4 diff --git a/clang/test/CIR/CodeGen/cast.cpp b/clang/test/CIR/CodeGen/cast.cpp index 84f55242a6118..caf6de7c7d485 100644 --- a/clang/test/CIR/CodeGen/cast.cpp +++ b/clang/test/CIR/CodeGen/cast.cpp @@ -7,7 +7,7 @@ unsigned char cxxstaticcast_0(unsigned int x) { return static_cast(x); } -// CIR: cir.func @_Z15cxxstaticcast_0j +// CIR: cir.func{{.*}} @_Z15cxxstaticcast_0j // CIR: %[[XPTR:[0-9]+]] = cir.alloca !u32i, !cir.ptr, ["x", init] {alignment = 4 : i64} // CIR: %[[RV:[0-9]+]] = cir.alloca !u8i, !cir.ptr, ["__retval"] {alignment = 1 : i64} // CIR: cir.store %arg0, %[[XPTR]] : !u32i, !cir.ptr @@ -18,7 +18,7 @@ unsigned char cxxstaticcast_0(unsigned int x) { // CIR: cir.return %[[R]] : !u8i // CIR: } -// LLVM: define i8 @_Z15cxxstaticcast_0j(i32 %{{[0-9]+}}) +// LLVM: define{{.*}} i8 @_Z15cxxstaticcast_0j(i32 %{{[0-9]+}}) // LLVM: %[[LOAD:[0-9]+]] = load i32, ptr %{{[0-9]+}}, align 4 // LLVM: %[[TRUNC:[0-9]+]] = trunc i32 %[[LOAD]] to i8 // LLVM: store i8 %[[TRUNC]], ptr %[[RV:[0-9]+]], align 1 @@ -26,8 +26,8 @@ unsigned char cxxstaticcast_0(unsigned int x) { // LLVM: ret i8 %[[R]] int cStyleCasts_0(unsigned x1, int x2, float x3, short x4, double x5) { -// CIR: cir.func @_Z13cStyleCasts_0jifsd -// LLVM: define i32 @_Z13cStyleCasts_0jifsd +// CIR: cir.func{{.*}} @_Z13cStyleCasts_0jifsd +// LLVM: define{{.*}} i32 @_Z13cStyleCasts_0jifsd char a = (char)x1; // truncate // CIR: %{{[0-9]+}} = cir.cast(integral, %{{[0-9]+}} : !u32i), !s8i @@ -89,13 +89,13 @@ bool cptr(void *d) { return x; } -// CIR: cir.func @_Z4cptrPv(%arg0: !cir.ptr +// CIR: cir.func{{.*}} @_Z4cptrPv(%arg0: !cir.ptr // CIR: %[[DPTR:[0-9]+]] = cir.alloca !cir.ptr, !cir.ptr>, ["d", init] {alignment = 8 : i64} // CIR: %[[DVAL:[0-9]+]] = cir.load{{.*}} %[[DPTR]] : !cir.ptr>, !cir.ptr // CIR: %{{[0-9]+}} = cir.cast(ptr_to_bool, %[[DVAL]] : !cir.ptr), !cir.bool -// LLVM-LABEL: define i1 @_Z4cptrPv(ptr %0) +// LLVM-LABEL: define{{.*}} i1 @_Z4cptrPv(ptr %0) // LLVM: %[[ARG_STORAGE:.*]] = alloca ptr, i64 1 // LLVM: %[[RETVAL:.*]] = alloca i8, i64 1 // LLVM: %[[X_STORAGE:.*]] = alloca i8, i64 1 @@ -114,7 +114,7 @@ void should_not_cast() { (void) ib; // void cast } -// CIR: cir.func @_Z15should_not_castv +// CIR: cir.func{{.*}} @_Z15should_not_castv // CIR-NOT: cir.cast // CIR: cir.return diff --git a/clang/test/CIR/CodeGen/class.cpp b/clang/test/CIR/CodeGen/class.cpp index d7f3772c95826..43dde12df40f0 100644 --- a/clang/test/CIR/CodeGen/class.cpp +++ b/clang/test/CIR/CodeGen/class.cpp @@ -51,7 +51,7 @@ class Derived : public Base { int use(Derived *d) { return d->b; } -// CIR: cir.func @_Z3useP7Derived(%[[ARG0:.*]]: !cir.ptr +// CIR: cir.func{{.*}} @_Z3useP7Derived(%[[ARG0:.*]]: !cir.ptr // CIR: %[[D_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["d", init] // CIR: cir.store %[[ARG0]], %[[D_ADDR]] // CIR: %[[D_PTR:.*]] = cir.load align(8) %0 @@ -69,7 +69,7 @@ int use_base() { return d.a; } -// CIR: cir.func @_Z8use_basev +// CIR: cir.func{{.*}} @_Z8use_basev // CIR: %[[D_ADDR:.*]] = cir.alloca !rec_Derived, !cir.ptr, ["d"] // CIR: %[[BASE_ADDR:.*]] cir.base_class_addr %[[D_ADDR]] : !cir.ptr nonnull [0] -> !cir.ptr // CIR: %[[D_A_ADDR:.*]] = cir.get_member %2[0] {name = "a"} : !cir.ptr -> !cir.ptr @@ -87,7 +87,7 @@ int use_base_via_pointer(Derived *d) { return d->a; } -// CIR: cir.func @_Z20use_base_via_pointerP7Derived(%[[ARG0:.*]]: !cir.ptr +// CIR: cir.func{{.*}} @_Z20use_base_via_pointerP7Derived(%[[ARG0:.*]]: !cir.ptr // CIR: %[[D_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["d", init] // CIR: cir.store %[[ARG0]], %[[D_ADDR]] // CIR: %[[D:.*]] = cir.load align(8) %[[D_ADDR]] diff --git a/clang/test/CIR/CodeGen/cmp.cpp b/clang/test/CIR/CodeGen/cmp.cpp index 40529d92b2a05..75c8cda0c3603 100644 --- a/clang/test/CIR/CodeGen/cmp.cpp +++ b/clang/test/CIR/CodeGen/cmp.cpp @@ -14,7 +14,7 @@ void c0(int a, int b) { x = a == b; } -// CIR-LABEL: cir.func @_Z2c0ii( +// CIR-LABEL: cir.func{{.*}} @_Z2c0ii( // CIR: %[[A_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CIR: %[[B_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["b", init] @@ -45,7 +45,7 @@ void c0(int a, int b) { // CIR: %[[B6:.*]] = cir.load{{.*}} %[[B_PTR]] // CIR: %{{.*}} = cir.cmp(eq, %[[A6]], %[[B6]]) : !s32i, !cir.bool -// LLVM-LABEL: define void @_Z2c0ii(i32 %0, i32 %1) { +// LLVM-LABEL: define{{.*}} void @_Z2c0ii(i32 %0, i32 %1) { // LLVM: %[[PTR1:.*]] = alloca i32, i64 1 // LLVM: %[[PTR2:.*]] = alloca i32, i64 1 // LLVM: %[[BOOL_PTR:.*]] = alloca i8, i64 1 @@ -88,7 +88,7 @@ void c0(int a, int b) { // LLVM: %[[ZEXT6:.*]] = zext i1 %[[CMP6]] to i8 // LLVM: store i8 %[[ZEXT6]], ptr %[[BOOL_PTR]] -// OGCG-LABEL: define dso_local void @_Z2c0ii(i32 {{.*}} %a, i32 {{.*}} %b) {{.*}} { +// OGCG-LABEL: define{{.*}} void @_Z2c0ii(i32 {{.*}} %a, i32 {{.*}} %b) {{.*}} { // OGCG: %[[PTR1:.*]] = alloca i32 // OGCG: %[[PTR2:.*]] = alloca i32 // OGCG: %[[BOOL_PTR:.*]] = alloca i8 @@ -140,7 +140,7 @@ void c0_unsigned(unsigned int a, unsigned int b) { x = a == b; } -// CIR-LABEL: cir.func @_Z11c0_unsignedjj( +// CIR-LABEL: cir.func{{.*}} @_Z11c0_unsignedjj( // CIR: %[[U_A_PTR:.*]] = cir.alloca !u32i, !cir.ptr, ["a", init] // CIR: %[[U_B_PTR:.*]] = cir.alloca !u32i, !cir.ptr, ["b", init] @@ -170,7 +170,7 @@ void c0_unsigned(unsigned int a, unsigned int b) { // CIR: %[[UB6:.*]] = cir.load{{.*}} %[[U_B_PTR]] // CIR: %{{.*}} = cir.cmp(eq, %[[UA6]], %[[UB6]]) : !u32i, !cir.bool -// LLVM-LABEL: define void @_Z11c0_unsignedjj(i32 %0, i32 %1) { +// LLVM-LABEL: define{{.*}} void @_Z11c0_unsignedjj(i32 %0, i32 %1) { // LLVM: %[[U_PTR1:.*]] = alloca i32, i64 1 // LLVM: %[[U_PTR2:.*]] = alloca i32, i64 1 // LLVM: %[[U_BOOL_PTR:.*]] = alloca i8, i64 1 @@ -213,7 +213,7 @@ void c0_unsigned(unsigned int a, unsigned int b) { // LLVM: %[[UZEXT6:.*]] = zext i1 %[[UCMP6]] to i8 // LLVM: store i8 %[[UZEXT6]], ptr %[[U_BOOL_PTR]] -// OGCG-LABEL: define dso_local void @_Z11c0_unsignedjj(i32 {{.*}} %a, i32 {{.*}} %b) {{.*}} { +// OGCG-LABEL: define{{.*}} void @_Z11c0_unsignedjj(i32 {{.*}} %a, i32 {{.*}} %b) {{.*}} { // OGCG: %[[U_PTR1:.*]] = alloca i32 // OGCG: %[[U_PTR2:.*]] = alloca i32 // OGCG: %[[U_BOOL_PTR:.*]] = alloca i8 @@ -265,7 +265,7 @@ void c0_float(float a, float b) { x = a == b; } -// CIR-LABEL: cir.func @_Z8c0_floatff(%arg0: !cir.float{{.*}}, %arg1: !cir.float{{.*}}) { +// CIR-LABEL: cir.func{{.*}} @_Z8c0_floatff(%arg0: !cir.float{{.*}}, %arg1: !cir.float{{.*}}) { // CIR: %[[A_PTR:.*]] = cir.alloca !cir.float, !cir.ptr, ["a", init] // CIR: %[[B_PTR:.*]] = cir.alloca !cir.float, !cir.ptr, ["b", init] // CIR: %[[X_PTR:.*]] = cir.alloca !cir.bool, !cir.ptr, ["x", init] @@ -303,7 +303,7 @@ void c0_float(float a, float b) { // CIR: %[[CMP6:.*]] = cir.cmp(eq, %[[A6]], %[[B6]]) : !cir.float, !cir.bool // CIR: cir.store{{.*}} %[[CMP6]], %[[X_PTR]] : !cir.bool, !cir.ptr -// LLVM-LABEL: define void @_Z8c0_floatff(float %0, float %1) { +// LLVM-LABEL: define{{.*}} void @_Z8c0_floatff(float %0, float %1) { // LLVM: %[[A_PTR:.*]] = alloca float // LLVM: %[[B_PTR:.*]] = alloca float // LLVM: store float %0, ptr %[[A_PTR]] @@ -320,7 +320,7 @@ void c0_float(float a, float b) { // LLVM: fcmp une float %{{.*}}, %{{.*}} // LLVM: fcmp oeq float %{{.*}}, %{{.*}} -// OGCG-LABEL: define dso_local void @_Z8c0_floatff(float {{.*}} %a, float {{.*}} %b) {{.*}} { +// OGCG-LABEL: define{{.*}} void @_Z8c0_floatff(float {{.*}} %a, float {{.*}} %b) {{.*}} { // OGCG: %[[A_PTR:.*]] = alloca float // OGCG: %[[B_PTR:.*]] = alloca float // OGCG: store float %a, ptr %[[A_PTR]] @@ -346,7 +346,7 @@ void pointer_cmp(int *a, int *b) { x = a != b; } -// CIR-LABEL: cir.func @_Z11pointer_cmpPiS_(%arg0: !cir.ptr{{.*}}, %arg1: !cir.ptr{{.*}}) { +// CIR-LABEL: cir.func{{.*}} @_Z11pointer_cmpPiS_(%arg0: !cir.ptr{{.*}}, %arg1: !cir.ptr{{.*}}) { // CIR: %[[A_PTR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["a", init] // CIR: %[[B_PTR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["b", init] @@ -360,7 +360,7 @@ void pointer_cmp(int *a, int *b) { // CIR: cir.cmp(eq, {{.*}}, {{.*}}) : !cir.ptr, !cir.bool // CIR: cir.cmp(ne, {{.*}}, {{.*}}) : !cir.ptr, !cir.bool -// LLVM-LABEL: define void @_Z11pointer_cmpPiS_(ptr %0, ptr %1) { +// LLVM-LABEL: define{{.*}} void @_Z11pointer_cmpPiS_(ptr %0, ptr %1) { // LLVM: %[[A_PTR:.*]] = alloca ptr // LLVM: %[[B_PTR:.*]] = alloca ptr // LLVM: store ptr %0, ptr %[[A_PTR]] @@ -376,7 +376,7 @@ void pointer_cmp(int *a, int *b) { // LLVM: icmp eq ptr %{{.*}}, %{{.*}} // LLVM: icmp ne ptr %{{.*}}, %{{.*}} -// OGCG-LABEL: define dso_local void @_Z11pointer_cmpPiS_(ptr {{.*}} %a, ptr {{.*}} %b) {{.*}} { +// OGCG-LABEL: define{{.*}} void @_Z11pointer_cmpPiS_(ptr {{.*}} %a, ptr {{.*}} %b) {{.*}} { // OGCG: %[[A_PTR:.*]] = alloca ptr // OGCG: %[[B_PTR:.*]] = alloca ptr // OGCG: store ptr %a, ptr %[[A_PTR]] @@ -401,7 +401,7 @@ void bool_cmp(bool a, bool b) { x = a != b; } -// CIR-LABEL: cir.func @_Z8bool_cmpbb(%arg0: !cir.bool{{.*}}, %arg1: !cir.bool{{.*}}) { +// CIR-LABEL: cir.func{{.*}} @_Z8bool_cmpbb(%arg0: !cir.bool{{.*}}, %arg1: !cir.bool{{.*}}) { // CIR: %[[A_PTR:.*]] = cir.alloca !cir.bool, !cir.ptr, ["a", init] // CIR: %[[B_PTR:.*]] = cir.alloca !cir.bool, !cir.ptr, ["b", init] // CIR: %[[X_PTR:.*]] = cir.alloca !cir.bool, !cir.ptr, ["x", init] @@ -419,7 +419,7 @@ void bool_cmp(bool a, bool b) { // CIR: cir.cmp(eq // CIR: cir.cmp(ne -// LLVM-LABEL: define void @_Z8bool_cmpbb(i1 %0, i1 %1) { +// LLVM-LABEL: define{{.*}} void @_Z8bool_cmpbb(i1 %0, i1 %1) { // LLVM: %[[A_PTR:.*]] = alloca i8 // LLVM: %[[B_PTR:.*]] = alloca i8 // LLVM: %[[X_PTR:.*]] = alloca i8 @@ -444,7 +444,7 @@ void bool_cmp(bool a, bool b) { // LLVM: icmp eq // LLVM: icmp ne -// OGCG-LABEL: define dso_local void @_Z8bool_cmpbb(i1 {{.*}} %a, i1 {{.*}} %b) {{.*}} { +// OGCG-LABEL: define{{.*}} void @_Z8bool_cmpbb(i1 {{.*}} %a, i1 {{.*}} %b) {{.*}} { // OGCG: %[[A_PTR:.*]] = alloca i8 // OGCG: %[[B_PTR:.*]] = alloca i8 // OGCG: %[[X_PTR:.*]] = alloca i8 diff --git a/clang/test/CIR/CodeGen/comma.c b/clang/test/CIR/CodeGen/comma.c index d811f5a72bddf..a1479b85d3f04 100644 --- a/clang/test/CIR/CodeGen/comma.c +++ b/clang/test/CIR/CodeGen/comma.c @@ -16,7 +16,7 @@ void comma(void) { i = 100, 200; } -// CIR-LABEL: cir.func @comma() { +// CIR-LABEL: cir.func{{.*}} @comma() { // CIR: %[[B:.*]] = cir.alloca !cir.bool, !cir.ptr, ["b"] // CIR: %[[C:.*]] = cir.alloca !s8i, !cir.ptr, ["c"] // CIR: %[[F:.*]] = cir.alloca !cir.float, !cir.ptr, ["f"] diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp index cfeed345b4f11..ad3720097a795 100644 --- a/clang/test/CIR/CodeGen/complex.cpp +++ b/clang/test/CIR/CodeGen/complex.cpp @@ -216,6 +216,53 @@ void foo9(double a, double b) { // OGCG: store double %[[TMP_A]], ptr %[[C_REAL_PTR]], align 8 // OGCG: store double %[[TMP_B]], ptr %[[C_IMAG_PTR]], align 8 +void foo12() { + double _Complex c; + double imag = __imag__ c; +} + +// CIR: %[[COMPLEX:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["c"] +// CIR: %[[INIT:.*]] = cir.alloca !cir.double, !cir.ptr, ["imag", init] +// CIR: %[[TMP:.*]] = cir.load{{.*}} %[[COMPLEX]] : !cir.ptr>, !cir.complex +// CIR: %[[IMAG:.*]] = cir.complex.imag %[[TMP]] : !cir.complex -> !cir.double +// CIR: cir.store{{.*}} %[[IMAG]], %[[INIT]] : !cir.double, !cir.ptr + +// LLVM: %[[COMPLEX:.*]] = alloca { double, double }, i64 1, align 8 +// LLVM: %[[INIT:.*]] = alloca double, i64 1, align 8 +// LLVM: %[[TMP:.*]] = load { double, double }, ptr %[[COMPLEX]], align 8 +// LLVM: %[[IMAG:.*]] = extractvalue { double, double } %[[TMP]], 1 +// LLVM: store double %[[IMAG]], ptr %[[INIT]], align 8 + +// OGCG: %[[COMPLEX:.*]] = alloca { double, double }, align 8 +// OGCG: %[[INIT:.*]] = alloca double, align 8 +// OGCG: %[[IMAG:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[COMPLEX]], i32 0, i32 1 +// OGCG: %[[TMP:.*]] = load double, ptr %[[IMAG]], align 8 +// OGCG: store double %[[TMP]], ptr %[[INIT]], align 8 + +void foo13() { + double _Complex c; + double real = __real__ c; +} + +// CIR: %[[COMPLEX:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["c"] +// CIR: %[[INIT:.*]] = cir.alloca !cir.double, !cir.ptr, ["real", init] +// CIR: %[[TMP:.*]] = cir.load{{.*}} %[[COMPLEX]] : !cir.ptr>, !cir.complex +// CIR: %[[REAL:.*]] = cir.complex.real %[[TMP]] : !cir.complex -> !cir.double +// CIR: cir.store{{.*}} %[[REAL]], %[[INIT]] : !cir.double, !cir.ptr + +// LLVM: %[[COMPLEX:.*]] = alloca { double, double }, i64 1, align 8 +// LLVM: %[[INIT:.*]] = alloca double, i64 1, align 8 +// LLVM: %[[TMP:.*]] = load { double, double }, ptr %[[COMPLEX]], align 8 +// LLVM: %[[REAL:.*]] = extractvalue { double, double } %[[TMP]], 0 +// LLVM: store double %[[REAL]], ptr %[[INIT]], align 8 + +// OGCG: %[[COMPLEX:.*]] = alloca { double, double }, align 8 +// OGCG: %[[INIT:.*]] = alloca double, align 8 +// OGCG: %[[REAL:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[COMPLEX]], i32 0, i32 0 +// OGCG: %[[TMP:.*]] = load double, ptr %[[REAL]], align 8 +// OGCG: store double %[[TMP]], ptr %[[INIT]], align 8 + + void foo14() { int _Complex c = 2i; } @@ -256,3 +303,69 @@ void foo15() { // OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_B]], i32 0, i32 1 // OGCG: store i32 %[[A_REAL]], ptr %[[B_REAL_PTR]], align 4 // OGCG: store i32 %[[A_IMAG]], ptr %[[B_IMAG_PTR]], align 4 + +int foo16(int _Complex a, int _Complex b) { + return __imag__ a + __imag__ b; +} + +// CIR: %[[RET:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] +// CIR: %[[COMPLEX_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr>, !cir.complex +// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[COMPLEX_A]] : !cir.complex -> !s32i +// CIR: %[[COMPLEX_B:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr>, !cir.complex +// CIR: %[[B_IMAG:.*]] = cir.complex.imag %[[COMPLEX_B]] : !cir.complex -> !s32i +// CIR: %[[ADD:.*]] = cir.binop(add, %[[A_IMAG]], %[[B_IMAG]]) nsw : !s32i +// CIR: cir.store %[[ADD]], %[[RET]] : !s32i, !cir.ptr +// CIR: %[[TMP:.*]] = cir.load %[[RET]] : !cir.ptr, !s32i +// CIR: cir.return %[[TMP]] : !s32i + +// LLVM: %[[RET:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[COMPLEX_A:.*]] = load { i32, i32 }, ptr {{.*}}, align 4 +// LLVM: %[[A_IMAG:.*]] = extractvalue { i32, i32 } %[[COMPLEX_A]], 1 +// LLVM: %[[COMPLEX_B:.*]] = load { i32, i32 }, ptr {{.*}}, align 4 +// LLVM: %[[B_IMAG:.*]] = extractvalue { i32, i32 } %[[COMPLEX_B]], 1 +// LLVM: %[[ADD:.*]] = add nsw i32 %[[A_IMAG]], %[[B_IMAG]] +// LLVM: store i32 %[[ADD]], ptr %[[RET]], align 4 +// LLVM: %[[TMP:.*]] = load i32, ptr %[[RET]], align 4 +// LLVM: ret i32 %[[TMP]] + +// OGCG: %[[COMPLEX_A:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[A_IMAG:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_A]], i32 0, i32 1 +// OGCG: %[[TMP_A:.*]] = load i32, ptr %[[A_IMAG]], align 4 +// OGCG: %[[B_IMAG:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_B]], i32 0, i32 1 +// OGCG: %[[TMP_B:.*]] = load i32, ptr %[[B_IMAG]], align 4 +// OGCG: %[[ADD:.*]] = add nsw i32 %[[TMP_A]], %[[TMP_B]] +// OGCG: ret i32 %[[ADD]] + +int foo17(int _Complex a, int _Complex b) { + return __real__ a + __real__ b; +} + +// CIR: %[[RET:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] +// CIR: %[[COMPLEX_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr>, !cir.complex +// CIR: %[[A_REAL:.*]] = cir.complex.real %[[COMPLEX_A]] : !cir.complex -> !s32i +// CIR: %[[COMPLEX_B:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr>, !cir.complex +// CIR: %[[B_REAL:.*]] = cir.complex.real %[[COMPLEX_B]] : !cir.complex -> !s32i +// CIR: %[[ADD:.*]] = cir.binop(add, %[[A_REAL]], %[[B_REAL]]) nsw : !s32i +// CIR: cir.store %[[ADD]], %[[RET]] : !s32i, !cir.ptr +// CIR: %[[TMP:.*]] = cir.load %[[RET]] : !cir.ptr, !s32i +// CIR: cir.return %[[TMP]] : !s32i + +// LLVM: %[[RET:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[COMPLEX_A:.*]] = load { i32, i32 }, ptr {{.*}}, align 4 +// LLVM: %[[A_REAL:.*]] = extractvalue { i32, i32 } %[[COMPLEX_A]], 0 +// LLVM: %[[COMPLEX_B:.*]] = load { i32, i32 }, ptr {{.*}}, align 4 +// LLVM: %[[B_REAL:.*]] = extractvalue { i32, i32 } %[[COMPLEX_B]], 0 +// LLVM: %[[ADD:.*]] = add nsw i32 %[[A_REAL]], %[[B_REAL]] +// LLVM: store i32 %[[ADD]], ptr %[[RET]], align 4 +// LLVM: %[[TMP:.*]] = load i32, ptr %[[RET]], align 4 +// LLVM: ret i32 %[[TMP]] + +// OGCG: %[[COMPLEX_A:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, align 4 +// OGCG: %[[A_REAL:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_A]], i32 0, i32 0 +// OGCG: %[[TMP_A:.*]] = load i32, ptr %[[A_REAL]], align 4 +// OGCG: %[[B_REAL:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_B]], i32 0, i32 0 +// OGCG: %[[TMP_B:.*]] = load i32, ptr %[[B_REAL]], align 4 +// OGCG: %[[ADD:.*]] = add nsw i32 %[[TMP_A]], %[[TMP_B]] +// OGCG: ret i32 %[[ADD]] \ No newline at end of file diff --git a/clang/test/CIR/CodeGen/compound_assign.cpp b/clang/test/CIR/CodeGen/compound_assign.cpp index 60442bcdf912e..04bf406d6dd2a 100644 --- a/clang/test/CIR/CodeGen/compound_assign.cpp +++ b/clang/test/CIR/CodeGen/compound_assign.cpp @@ -20,7 +20,7 @@ int compound_assign(int b) { return x; } -// CIR: cir.func @_Z15compound_assigni +// CIR: cir.func{{.*}} @_Z15compound_assigni // CIR: %[[MUL:.*]] = cir.binop(mul, %{{.*}}, %{{.*}}) nsw : !s32i // CIR: cir.store{{.*}} %[[MUL]], %{{.*}} : !s32i, !cir.ptr // CIR: %[[DIV:.*]] = cir.binop(div, %{{.*}}, %{{.*}}) : !s32i diff --git a/clang/test/CIR/CodeGen/ctor.cpp b/clang/test/CIR/CodeGen/ctor.cpp index 0b009442b2f87..4c2877f8460d0 100644 --- a/clang/test/CIR/CodeGen/ctor.cpp +++ b/clang/test/CIR/CodeGen/ctor.cpp @@ -16,20 +16,20 @@ void baz() { // constructors here. The handling of constructor aliases is currently // NYI, but when it is added this test should be updated to add a RUN // line that passes '-mconstructor-aliases' to clang_cc1. -// CHECK: cir.func @_ZN5StrukC2Ev(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN5StrukC2Ev(%arg0: !cir.ptr // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] {alignment = 8 : i64} // CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] : !cir.ptr, !cir.ptr> // CHECK-NEXT: %[[THIS:.*]] = cir.load %[[THIS_ADDR]] : !cir.ptr>, !cir.ptr // CHECK-NEXT: cir.return -// CHECK: cir.func @_ZN5StrukC1Ev(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN5StrukC1Ev(%arg0: !cir.ptr // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] {alignment = 8 : i64} // CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] : !cir.ptr, !cir.ptr> // CHECK-NEXT: %[[THIS:.*]] = cir.load %[[THIS_ADDR]] : !cir.ptr>, !cir.ptr // CHECK-NEXT: cir.call @_ZN5StrukC2Ev(%[[THIS]]) : (!cir.ptr) -> () // CHECK-NEXT: cir.return -// CHECK: cir.func @_Z3bazv() +// CHECK: cir.func{{.*}} @_Z3bazv() // CHECK-NEXT: %[[S_ADDR:.*]] = cir.alloca !rec_Struk, !cir.ptr, ["s", init] {alignment = 4 : i64} // CHECK-NEXT: cir.call @_ZN5StrukC1Ev(%[[S_ADDR]]) : (!cir.ptr) -> () // CHECK-NEXT: cir.return @@ -45,9 +45,9 @@ void bar() { // When a variadic constructor is present, we call the C2 constructor directly. -// CHECK-NOT: cir.func @_ZN13VariadicStrukC2Eiz +// CHECK-NOT: cir.func{{.*}} @_ZN13VariadicStrukC2Eiz -// CHECK: cir.func @_ZN13VariadicStrukC1Eiz(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN13VariadicStrukC1Eiz(%arg0: !cir.ptr // CHECK-SAME: %arg1: !s32i // CHECK-SAME: ...) { // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] @@ -60,7 +60,7 @@ void bar() { // CHECK-NEXT: cir.store{{.*}} %[[N]], %[[A_ADDR]] // CHECK-NEXT: cir.return -// CHECK: cir.func @_Z3barv +// CHECK: cir.func{{.*}} @_Z3barv // CHECK-NEXT: %[[S_ADDR:.*]] = cir.alloca !rec_VariadicStruk, !cir.ptr, ["s", init] // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i @@ -78,7 +78,7 @@ void bam() { DelegatingStruk s; } -// CHECK: cir.func @_ZN15DelegatingStrukC2Ei(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN15DelegatingStrukC2Ei(%arg0: !cir.ptr // CHECK-SAME: %arg1: !s32i // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] // CHECK-NEXT: %[[N_ADDR:.*]] = cir.alloca {{.*}} ["n", init] @@ -90,7 +90,7 @@ void bam() { // CHECK-NEXT: cir.store{{.*}} %[[N]], %[[A_ADDR]] // CHECK-NEXT: cir.return -// CHECK: cir.func @_ZN15DelegatingStrukC1Ei(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN15DelegatingStrukC1Ei(%arg0: !cir.ptr // CHECK-SAME: %arg1: !s32i // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] // CHECK-NEXT: %[[N_ADDR:.*]] = cir.alloca {{.*}} ["n", init] @@ -101,7 +101,7 @@ void bam() { // CHECK-NEXT: cir.call @_ZN15DelegatingStrukC2Ei(%[[THIS]], %[[N]]) // CHECK-NEXT: cir.return -// CHECK: cir.func @_ZN15DelegatingStrukC1Ev(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN15DelegatingStrukC1Ev(%arg0: !cir.ptr // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] // CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] // CHECK-NEXT: %[[THIS:.*]] = cir.load{{.*}} %[[THIS_ADDR]] @@ -109,7 +109,7 @@ void bam() { // CHECK-NEXT: cir.call @_ZN15DelegatingStrukC1Ei(%[[THIS]], %[[ZERO]]) // CHECK-NEXT: cir.return -// CHECK: cir.func @_Z3bamv +// CHECK: cir.func{{.*}} @_Z3bamv // CHECK-NEXT: %[[S_ADDR:.*]] = cir.alloca {{.*}} ["s", init] // CHECK-NEXT: cir.call @_ZN15DelegatingStrukC1Ev(%[[S_ADDR]]) // CHECK-NEXT: cir.return @@ -123,7 +123,7 @@ void init_member() { MemberInitStruk s; } -// CHECK: cir.func @_ZN15MemberInitStrukC2Ev(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN15MemberInitStrukC2Ev(%arg0: !cir.ptr // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] // CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] // CHECK-NEXT: %[[THIS:.*]] = cir.load %[[THIS_ADDR]] @@ -132,14 +132,14 @@ void init_member() { // CHECK-NEXT: cir.store align(4) %[[ZERO]], %[[A_ADDR]] // CHECK-NEXT: cir.return -// CHECK: cir.func @_ZN15MemberInitStrukC1Ev(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN15MemberInitStrukC1Ev(%arg0: !cir.ptr // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] // CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] // CHECK-NEXT: %[[THIS:.*]] = cir.load %[[THIS_ADDR]] // CHECK-NEXT: cir.call @_ZN15MemberInitStrukC2Ev(%[[THIS]]) // CHECK-NEXT: cir.return -// CHECK: cir.func @_Z11init_memberv +// CHECK: cir.func{{.*}} @_Z11init_memberv // CHECK-NEXT: %[[S_ADDR:.*]] = cir.alloca {{.*}} ["s", init] // CHECK-NEXT: cir.call @_ZN15MemberInitStrukC1Ev(%[[S_ADDR]]) // CHECK-NEXT: cir.return @@ -153,7 +153,7 @@ void init_param_member() { ParamMemberInitStruk s(0); } -// CHECK: cir.func @_ZN20ParamMemberInitStrukC2Ei(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN20ParamMemberInitStrukC2Ei(%arg0: !cir.ptr // CHECK-SAME: %arg1: !s32i // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] // CHECK-NEXT: %[[N_ADDR:.*]] = cir.alloca {{.*}} ["n", init] @@ -165,7 +165,7 @@ void init_param_member() { // CHECK-NEXT: cir.store{{.*}} %[[N]], %[[A_ADDR]] // CHECK-NEXT: cir.return -// CHECK: cir.func @_ZN20ParamMemberInitStrukC1Ei(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN20ParamMemberInitStrukC1Ei(%arg0: !cir.ptr // CHECK-SAME: %arg1: !s32i // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] // CHECK-NEXT: %[[N_ADDR:.*]] = cir.alloca {{.*}} ["n", init] @@ -176,7 +176,7 @@ void init_param_member() { // CHECK-NEXT: cir.call @_ZN20ParamMemberInitStrukC2Ei(%[[THIS]], %[[N]]) // CHECK-NEXT: cir.return -// CHECK: cir.func @_Z17init_param_memberv +// CHECK: cir.func{{.*}} @_Z17init_param_memberv // CHECK-NEXT: %[[S_ADDR:.*]] = cir.alloca {{.*}} ["s", init] // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> // CHECK-NEXT: cir.call @_ZN20ParamMemberInitStrukC1Ei(%[[S_ADDR]], %[[ZERO]]) @@ -197,7 +197,7 @@ void init_union() { UnionInitStruk s; } -// CHECK: cir.func @_ZN14UnionInitStrukC2Ev(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN14UnionInitStrukC2Ev(%arg0: !cir.ptr // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] // CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] // CHECK-NEXT: %[[THIS:.*]] = cir.load %[[THIS_ADDR]] @@ -208,14 +208,14 @@ void init_union() { // CHECK-NEXT: cir.store{{.*}} %[[ZERO]], %[[C_ADDR]] // CHECK-NEXT: cir.return -// CHECK: cir.func @_ZN14UnionInitStrukC1Ev(%arg0: !cir.ptr +// CHECK: cir.func{{.*}} @_ZN14UnionInitStrukC1Ev(%arg0: !cir.ptr // CHECK-NEXT: %[[THIS_ADDR:.*]] = cir.alloca {{.*}} ["this", init] // CHECK-NEXT: cir.store %arg0, %[[THIS_ADDR]] // CHECK-NEXT: %[[THIS:.*]] = cir.load %[[THIS_ADDR]] // CHECK-NEXT: cir.call @_ZN14UnionInitStrukC2Ev // CHECK-NEXT: cir.return -// CHECK: cir.func @_Z10init_unionv +// CHECK: cir.func{{.*}} @_Z10init_unionv // CHECK-NEXT: %[[S_ADDR:.*]] = cir.alloca {{.*}} ["s", init] // CHECK-NEXT: cir.call @_ZN14UnionInitStrukC1Ev(%[[S_ADDR]]) // CHECK-NEXT: cir.return diff --git a/clang/test/CIR/CodeGen/dso-local.c b/clang/test/CIR/CodeGen/dso-local.c index 07c833d2fbc94..01c93cbd81ee1 100644 --- a/clang/test/CIR/CodeGen/dso-local.c +++ b/clang/test/CIR/CodeGen/dso-local.c @@ -3,7 +3,9 @@ // These are here so we find this test when grepping for missing features. // cir::MissingFeatures::opGlobalThreadLocal() -// cir::MissingFeatures::opFuncDsoLocal() + +// Note: Unlike CIR doesn't set dso_local on function declarations. This is +// a difference from classic codege in the STATIC checks. /// Static relocation model defaults to -fdirect-access-external-data and sets /// dso_local on most global objects. @@ -13,6 +15,9 @@ // STATIC-NEXT: @import_var = external dso_local global i32 // STATIC-NEXT: @weak_bar = extern_weak dso_local global i32 // STATIC-NEXT: @bar = external dso_local global i32 +// STATIC-DAG: declare void @foo() +// STATIC-DAG: define dso_local ptr @zed() +// STATIC-DAG: declare void @import_func() /// If -fno-direct-access-external-data is set, drop dso_local from global variable /// declarations. @@ -21,30 +26,45 @@ // STATIC-INDIRECT-NEXT: @import_var = external global i32 // STATIC-INDIRECT-NEXT: @weak_bar = extern_weak global i32 // STATIC-INDIRECT-NEXT: @bar = external global i32 +// STATIC-INDIRECT-DAG: declare void @import_func() +// STATIC-INDIRECT-DAG: define dso_local ptr @zed() +// STATIC-INDIRECT-DAG: declare void @foo() // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -pic-level 1 -pic-is-pie %s -o - | FileCheck --check-prefix=PIE %s // PIE: @baz = dso_local global i32 42 // PIE-NEXT: @import_var = external global i32 // PIE-NEXT: @weak_bar = extern_weak global i32 // PIE-NEXT: @bar = external global i32 +// PIE-DAG: declare void @foo() +// PIE-DAG: define dso_local ptr @zed() +// PIE-DAG: declare void @import_func() // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -pic-level 1 -pic-is-pie -fdirect-access-external-data %s -o - | FileCheck --check-prefix=PIE-DIRECT %s // PIE-DIRECT: @baz = dso_local global i32 42 // PIE-DIRECT-NEXT: @import_var = external dso_local global i32 // PIE-DIRECT-NEXT: @weak_bar = extern_weak global i32 // PIE-DIRECT-NEXT: @bar = external dso_local global i32 +// PIE-DIRECT-DAG: declare void @foo() +// PIE-DIRECT-DAG: define dso_local ptr @zed() +// PIE-DIRECT-DAG: declare void @import_func() // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -mrelocation-model static -fno-plt %s -o - | FileCheck --check-prefix=NOPLT %s // NOPLT: @baz = dso_local global i32 42 // NOPLT-NEXT: @import_var = external dso_local global i32 // NOPLT-NEXT: @weak_bar = extern_weak dso_local global i32 // NOPLT-NEXT: @bar = external dso_local global i32 +// NOPLT-DAG: declare void @foo() +// NOPLT-DAG: define dso_local ptr @zed() +// NOPLT-DAG: declare void @import_func() // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-plt -pic-level 1 -pic-is-pie -fdirect-access-external-data %s -o - | FileCheck --check-prefix=PIE-DIRECT-NOPLT %s // PIE-DIRECT-NOPLT: @baz = dso_local global i32 42 // PIE-DIRECT-NOPLT-NEXT: @import_var = external dso_local global i32 // PIE-DIRECT-NOPLT-NEXT: @weak_bar = extern_weak global i32 // PIE-DIRECT-NOPLT-NEXT: @bar = external dso_local global i32 +// PIE-DIRECT-NOPLT-DAG: declare void @foo() +// PIE-DIRECT-NOPLT-DAG: define dso_local ptr @zed() +// PIE-DIRECT-NOPLT-DAG: declare void @import_func() // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -pic-level 1 -pic-is-pie -fno-plt %s -o - | FileCheck --check-prefix=PIE-NO-PLT %s // RUN: %clang_cc1 -triple powerpc64le -fclangir -emit-llvm -mrelocation-model static %s -o - | FileCheck --check-prefix=PIE-NO-PLT %s @@ -52,24 +72,34 @@ // PIE-NO-PLT-NEXT: @import_var = external global i32 // PIE-NO-PLT-NEXT: @weak_bar = extern_weak global i32 // PIE-NO-PLT-NEXT: @bar = external global i32 +// PIE-NO-PLT-DAG: declare void @import_func() +// PIE-NO-PLT-DAG: define dso_local ptr @zed() +// PIE-NO-PLT-DAG: declare void @foo() /// -fdirect-access-external-data is currently ignored for -fPIC. // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -pic-level 2 %s -o - | FileCheck --check-prefix=SHARED %s // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -pic-level 2 -fdirect-access-external-data %s -o - | FileCheck --check-prefix=SHARED %s // SHARED-DAG: @bar = external global i32 // SHARED-DAG: @weak_bar = extern_weak global i32 +// SHARED-DAG: declare void @foo() // SHARED-DAG: @baz ={{.*}} global i32 42 +// SHARED-DAG: define{{.*}} ptr @zed() int baz = 42; __attribute__((dllimport)) extern int import_var; __attribute__((weak)) extern int weak_bar; extern int bar; +__attribute__((dllimport)) void import_func(void); int *use_import(void) { + import_func(); return &import_var; } +void foo(void); + int *zed(void) { + foo(); if (baz) return &weak_bar; return &bar; diff --git a/clang/test/CIR/CodeGen/forrange.cpp b/clang/test/CIR/CodeGen/forrange.cpp index 45e146e9091d0..485e9c331417b 100644 --- a/clang/test/CIR/CodeGen/forrange.cpp +++ b/clang/test/CIR/CodeGen/forrange.cpp @@ -13,10 +13,10 @@ void for_range() { ; } -// CIR: cir.func @_Z5beginR9Container(!cir.ptr) -> !cir.ptr -// CIR: cir.func @_Z3endR9Container(!cir.ptr) -> !cir.ptr) -> !cir.ptr +// CIR: cir.func{{.*}} @_Z3endR9Container(!cir.ptr) -> !cir.ptr{{.*}} ["__range1", init, const] @@ -59,7 +59,7 @@ void for_range2() { ; } -// CIR: cir.func @_Z10for_range2v() +// CIR: cir.func{{.*}} @_Z10for_range2v() // CIR: %[[C_ADDR:.*]] = cir.alloca !rec_C2{{.*}} ["c"] // CIR: cir.scope { // CIR: %[[RANGE_ADDR:.*]] = cir.alloca !cir.ptr{{.*}} ["__range1", init, const] @@ -111,7 +111,7 @@ void for_range3() { ; } -// CIR: cir.func @_Z10for_range3v() +// CIR: cir.func{{.*}} @_Z10for_range3v() // CIR: %[[C_ADDR:.*]] = cir.alloca !rec_C3{{.*}} ["c"] // CIR: cir.scope { // CIR: %[[RANGE_ADDR:.*]] = cir.alloca !cir.ptr{{.*}} ["__range1", init, const] diff --git a/clang/test/CIR/CodeGen/if.cpp b/clang/test/CIR/CodeGen/if.cpp index c78ca103de63b..daaec8a61484d 100644 --- a/clang/test/CIR/CodeGen/if.cpp +++ b/clang/test/CIR/CodeGen/if.cpp @@ -14,7 +14,7 @@ int if0(bool a) { } -// CIR: cir.func @_Z3if0b(%arg0: !cir.bool loc({{.*}})) -> !s32i +// CIR: cir.func{{.*}} @_Z3if0b(%arg0: !cir.bool loc({{.*}})) -> !s32i // CIR: cir.scope { // CIR: %4 = cir.load{{.*}} %0 : !cir.ptr, !cir.bool // CIR-NEXT: cir.if %4 { @@ -26,7 +26,7 @@ int if0(bool a) { // CIR-NEXT: } -// LLVM: define i32 @_Z3if0b(i1 %0) +// LLVM: define{{.*}} i32 @_Z3if0b(i1 %0) // LLVM: br label %[[ENTRY:.*]] // LLVM: [[ENTRY]]: // LLVM: %6 = load i8, ptr %2, align 1 @@ -43,7 +43,7 @@ int if0(bool a) { // LLVM: %12 = load i32, ptr %3, align 4 // LLVM: ret i32 %12 -// OGCG: define dso_local noundef i32 @_Z3if0b(i1 noundef zeroext %a) +// OGCG: define{{.*}} i32 @_Z3if0b(i1 noundef zeroext %a) // OGCG: entry: // OGCG: %[[RETVAL:.*]] = alloca i32, align 4 // OGCG: %[[A_ADDR:.*]] = alloca i8, align 1 @@ -71,7 +71,7 @@ void if1(int a) { } } -// CIR: cir.func @_Z3if1i(%arg0: !s32i loc({{.*}})) +// CIR: cir.func{{.*}} @_Z3if1i(%arg0: !s32i loc({{.*}})) // CIR: cir.scope { // CIR: %3 = cir.load{{.*}} %0 : !cir.ptr, !s32i // CIR: %4 = cir.cast(int_to_bool, %3 : !s32i), !cir.bool @@ -84,7 +84,7 @@ void if1(int a) { // CIR-NEXT: } // CIR: } -// LLVM: define void @_Z3if1i(i32 %0) +// LLVM: define{{.*}} void @_Z3if1i(i32 %0) // LLVM: %[[A:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[X:.*]] = alloca i32, i64 1, align 4 // LLVM: store i32 %0, ptr %[[A]], align 4 @@ -105,7 +105,7 @@ void if1(int a) { // LLVM: [[EXIT]]: // LLVM: ret void -// OGCG: define dso_local void @_Z3if1i(i32 noundef %[[A:.*]]) +// OGCG: define{{.*}} void @_Z3if1i(i32 noundef %[[A:.*]]) // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[X:.*]] = alloca i32, align 4 @@ -138,7 +138,7 @@ void if2(int a, bool b, bool c) { } } -// CIR: cir.func @_Z3if2ibb(%arg0: !s32i loc({{.*}}), %arg1: !cir.bool loc({{.*}}), %arg2: !cir.bool loc({{.*}})) +// CIR: cir.func{{.*}} @_Z3if2ibb(%arg0: !s32i loc({{.*}}), %arg1: !cir.bool loc({{.*}}), %arg2: !cir.bool loc({{.*}})) // CIR: cir.scope { // CIR: %5 = cir.load{{.*}} %0 : !cir.ptr, !s32i // CIR: %6 = cir.cast(int_to_bool, %5 : !s32i), !cir.bool @@ -165,7 +165,7 @@ void if2(int a, bool b, bool c) { // CIR: } // CIR: } -// LLVM: define void @_Z3if2ibb(i32 %[[A:.*]], i1 %[[B:.*]], i1 %[[C:.*]]) +// LLVM: define{{.*}} void @_Z3if2ibb(i32 %[[A:.*]], i1 %[[B:.*]], i1 %[[C:.*]]) // LLVM: %[[VARA:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[VARB:.*]] = alloca i8, i64 1, align 1 // LLVM: %[[VARC:.*]] = alloca i8, i64 1, align 1 @@ -214,7 +214,7 @@ void if2(int a, bool b, bool c) { // LLVM: [[LABEL28]]: // LLVM: ret void -// OGCG: define dso_local void @_Z3if2ibb(i32 noundef %[[A:.*]], i1 noundef zeroext %[[B:.*]], i1 noundef zeroext %[[C:.*]]) +// OGCG: define{{.*}} void @_Z3if2ibb(i32 noundef %[[A:.*]], i1 noundef zeroext %[[B:.*]], i1 noundef zeroext %[[C:.*]]) // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[B_ADDR:.*]] = alloca i8, align 1 @@ -260,7 +260,7 @@ int if_init() { } } -// CIR: cir.func @_Z7if_initv() -> !s32i +// CIR: cir.func{{.*}} @_Z7if_initv() -> !s32i // CIR: %[[RETVAL:.*]] = cir.alloca !s32i, !cir.ptr // CIR: cir.scope { // CIR: %[[X:.*]] = cir.alloca !s32i, !cir.ptr, @@ -285,7 +285,7 @@ int if_init() { // CIR: } // CIR: } -// LLVM: define i32 @_Z7if_initv() +// LLVM: define{{.*}} i32 @_Z7if_initv() // LLVM: %[[X:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[RETVAL:.*]] = alloca i32, i64 1, align 4 // LLVM: store i32 42, ptr %[[X]], align 4 @@ -305,7 +305,7 @@ int if_init() { // LLVM: %[[RETVAL_LOAD2:.*]] = load i32, ptr %[[RETVAL]], align 4 // LLVM: ret i32 %[[RETVAL_LOAD2]] -// OGCG: define dso_local noundef i32 @_Z7if_initv() +// OGCG: define{{.*}} i32 @_Z7if_initv() // OGCG: entry: // OGCG: %[[RETVAL:.*]] = alloca i32, align 4 // OGCG: %[[X:.*]] = alloca i32, align 4 diff --git a/clang/test/CIR/CodeGen/inline-cxx-func.cpp b/clang/test/CIR/CodeGen/inline-cxx-func.cpp index 31d0255f18df9..d121daf816173 100644 --- a/clang/test/CIR/CodeGen/inline-cxx-func.cpp +++ b/clang/test/CIR/CodeGen/inline-cxx-func.cpp @@ -17,7 +17,7 @@ struct S { // LLVM: %struct.S = type { i32 } // OGCG: %struct.S = type { i32 } -// CIR: cir.func @_ZN1S10InlineFuncEv(%arg0: !cir.ptr {{.*}}) -> !s32i +// CIR: cir.func{{.*}} @_ZN1S10InlineFuncEv(%arg0: !cir.ptr {{.*}}) -> !s32i // CIR: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] // CIR: %[[RET_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] // CIR: cir.store %arg0, %[[THIS_ADDR]] : !cir.ptr, !cir.ptr> @@ -46,7 +46,7 @@ void use() { s.InlineFunc(); } -// CIR: cir.func @_Z3usev() +// CIR: cir.func{{.*}} @_Z3usev() // CIR: %[[S_ADDR:.*]] = cir.alloca !rec_S, !cir.ptr, ["s"] // CIR: %[[RET_VAL:.*]] = cir.call @_ZN1S10InlineFuncEv(%[[S_ADDR]]) : (!cir.ptr) -> !s32i // CIR: cir.return diff --git a/clang/test/CIR/CodeGen/int-to-bool.cpp b/clang/test/CIR/CodeGen/int-to-bool.cpp index 1dd15edbbf55a..ad36af4552c2f 100644 --- a/clang/test/CIR/CodeGen/int-to-bool.cpp +++ b/clang/test/CIR/CodeGen/int-to-bool.cpp @@ -9,14 +9,14 @@ bool f1(unsigned char c) { return c; } -// CIR: cir.func @_Z2f1h +// CIR: cir.func{{.*}} @_Z2f1h // CIR: cir.cast(int_to_bool, %{{.*}} : !u8i), !cir.bool // Note: The full zext/store/load/trunc sequence is checked here to show what // CIR is being lowered to. There's no need to check it for every function since // the lowering is the same for all of them. -// LLVM: define i1 @_Z2f1h +// LLVM: define{{.*}} i1 @_Z2f1h // LLVM: %[[CMP:.*]] = icmp ne i8 %4, 0 // LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i8 // LLVM: store i8 %[[ZEXT]], ptr %{{.*}} @@ -32,10 +32,10 @@ bool f2(short s) { return s; } -// CIR: cir.func @_Z2f2s +// CIR: cir.func{{.*}} @_Z2f2s // CIR: cir.cast(int_to_bool, %{{.*}} : !s16i), !cir.bool -// LLVM: define i1 @_Z2f2s +// LLVM: define{{.*}} i1 @_Z2f2s // LLVM: %[[CMP:.*]] = icmp ne i16 %4, 0 // LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i8 @@ -47,10 +47,10 @@ bool f3(unsigned u) { return u; } -// CIR: cir.func @_Z2f3j +// CIR: cir.func{{.*}} @_Z2f3j // CIR: cir.cast(int_to_bool, %{{.*}} : !u32i), !cir.bool -// LLVM: define i1 @_Z2f3j +// LLVM: define{{.*}} i1 @_Z2f3j // LLVM: %[[CMP:.*]] = icmp ne i32 %4, 0 // LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i8 @@ -62,10 +62,10 @@ bool f4(long l) { return l; } -// CIR: cir.func @_Z2f4l +// CIR: cir.func{{.*}} @_Z2f4l // CIR: cir.cast(int_to_bool, %{{.*}} : !s64i), !cir.bool -// LLVM: define i1 @_Z2f4l +// LLVM: define{{.*}} i1 @_Z2f4l // LLVM: %[[CMP:.*]] = icmp ne i64 %4, 0 // LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i8 diff --git a/clang/test/CIR/CodeGen/linkage-spec.cpp b/clang/test/CIR/CodeGen/linkage-spec.cpp index 01c4e3fbe181d..eb6c7b0a546a9 100644 --- a/clang/test/CIR/CodeGen/linkage-spec.cpp +++ b/clang/test/CIR/CodeGen/linkage-spec.cpp @@ -1,42 +1,42 @@ // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - 2>&1 | FileCheck %s extern "C" void TopLevelC(){} -// CHECK: cir.func @TopLevelC() { +// CHECK: cir.func{{.*}} @TopLevelC() { extern "C++" void TopLevelCpp(){} -// CHECK: cir.func @_Z11TopLevelCppv() { +// CHECK: cir.func{{.*}} @_Z11TopLevelCppv() { extern "C++" { void ExternCppEmpty(){} - // CHECK: cir.func @_Z14ExternCppEmptyv() { + // CHECK: cir.func{{.*}} @_Z14ExternCppEmptyv() { extern "C" void ExternCpp_C(){} - // CHECK: cir.func @ExternCpp_C() { + // CHECK: cir.func{{.*}} @ExternCpp_C() { extern "C++" void ExternCpp_Cpp(){} - // CHECK: cir.func @_Z13ExternCpp_Cppv() { + // CHECK: cir.func{{.*}} @_Z13ExternCpp_Cppv() { extern "C" { void ExternCpp_CEmpty(){} - // CHECK: cir.func @ExternCpp_CEmpty() { + // CHECK: cir.func{{.*}} @ExternCpp_CEmpty() { extern "C" void ExternCpp_C_C(){} - // CHECK: cir.func @ExternCpp_C_C() { + // CHECK: cir.func{{.*}} @ExternCpp_C_C() { extern "C++" void ExternCpp_C_Cpp(){} - // CHECK: cir.func @_Z15ExternCpp_C_Cppv() { + // CHECK: cir.func{{.*}} @_Z15ExternCpp_C_Cppv() { } } extern "C" { void ExternCEmpty(){} - // CHECK: cir.func @ExternCEmpty() { + // CHECK: cir.func{{.*}} @ExternCEmpty() { extern "C" void ExternC_C(){} - // CHECK: cir.func @ExternC_C() { + // CHECK: cir.func{{.*}} @ExternC_C() { extern "C++" void ExternC_Cpp(){} - // CHECK: cir.func @_Z11ExternC_Cppv() { + // CHECK: cir.func{{.*}} @_Z11ExternC_Cppv() { extern "C++" { void ExternC_CppEmpty(){} - // CHECK: cir.func @_Z16ExternC_CppEmptyv() { + // CHECK: cir.func{{.*}} @_Z16ExternC_CppEmptyv() { extern "C" void ExternC_Cpp_C(){} - // CHECK: cir.func @ExternC_Cpp_C() { + // CHECK: cir.func{{.*}} @ExternC_Cpp_C() { extern "C++" void ExternC_Cpp_Cpp(){} - // CHECK: cir.func @_Z15ExternC_Cpp_Cppv() { + // CHECK: cir.func{{.*}} @_Z15ExternC_Cpp_Cppv() { } } diff --git a/clang/test/CIR/CodeGen/local-vars.cpp b/clang/test/CIR/CodeGen/local-vars.cpp index 42d6433645354..9385fdfa65601 100644 --- a/clang/test/CIR/CodeGen/local-vars.cpp +++ b/clang/test/CIR/CodeGen/local-vars.cpp @@ -22,7 +22,7 @@ void test() { } // CHECK: module -// CHECK: cir.func @_Z4testv() +// CHECK: cir.func{{.*}} @_Z4testv() // CHECK: %[[I_PTR:.*]] = cir.alloca !s32i, !cir.ptr, ["i", init] {alignment = 4 : i64} // CHECK: %[[L_PTR:.*]] = cir.alloca !s64i, !cir.ptr, ["l", init] {alignment = 8 : i64} // CHECK: %[[F_PTR:.*]] = cir.alloca !cir.float, !cir.ptr, ["f", init] {alignment = 4 : i64} diff --git a/clang/test/CIR/CodeGen/loop.cpp b/clang/test/CIR/CodeGen/loop.cpp index ba117c54b743d..0eba0bbc97c15 100644 --- a/clang/test/CIR/CodeGen/loop.cpp +++ b/clang/test/CIR/CodeGen/loop.cpp @@ -10,7 +10,7 @@ void l0() { } } -// CIR: cir.func @_Z2l0v +// CIR: cir.func{{.*}} @_Z2l0v // CIR: cir.scope { // CIR: cir.for : cond { // CIR: %[[TRUE:.*]] = cir.const #true @@ -24,7 +24,7 @@ void l0() { // CIR: cir.return // CIR: } -// LLVM: define void @_Z2l0v() +// LLVM: define{{.*}} void @_Z2l0v() // LLVM: br label %[[LABEL1:.*]] // LLVM: [[LABEL1]]: // LLVM: br label %[[LABEL2:.*]] @@ -50,7 +50,7 @@ void l1() { } } -// CIR: cir.func @_Z2l1v +// CIR: cir.func{{.*}} @_Z2l1v // CIR-NEXT: cir.scope { // CIR-NEXT: %[[I:.*]] = cir.alloca !s32i, !cir.ptr, ["i", init] {alignment = 4 : i64} // CIR-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i @@ -67,7 +67,7 @@ void l1() { // CIR-NEXT: cir.return // CIR-NEXT: } -// LLVM: define void @_Z2l1v() +// LLVM: define{{.*}} void @_Z2l1v() // LLVM: %[[I:.*]] = alloca i32, i64 1, align 4 // LLVM: br label %[[LABEL1:.*]] // LLVM: [[LABEL1]]: @@ -98,7 +98,7 @@ void l2() { } } -// CIR: cir.func @_Z2l2v +// CIR: cir.func{{.*}} @_Z2l2v // CIR-NEXT: cir.scope { // CIR-NEXT: cir.for : cond { // CIR-NEXT: %[[TRUE:.*]] = cir.const #true @@ -117,7 +117,7 @@ void l2() { // CIR-NEXT: cir.return // CIR-NEXT: } -// LLVM: define void @_Z2l2v() +// LLVM: define{{.*}} void @_Z2l2v() // LLVM: %[[I:.*]] = alloca i32, i64 1, align 4 // LLVM: br label %[[LABEL1:.*]] // LLVM: [[LABEL1]]: @@ -148,7 +148,7 @@ void l3() { int i = 0; } -// CIR: cir.func @_Z2l3v +// CIR: cir.func{{.*}} @_Z2l3v // CIR-NEXT: cir.scope { // CIR-NEXT: %[[I:.*]] = cir.alloca !s32i, !cir.ptr, ["i", init] {alignment = 4 : i64} // CIR-NEXT: cir.for : cond { @@ -165,7 +165,7 @@ void l3() { // CIR-NEXT: cir.return // CIR-NEXT: } -// LLVM: define void @_Z2l3v() +// LLVM: define{{.*}} void @_Z2l3v() // LLVM: %[[I:.*]] = alloca i32, i64 1, align 4 // LLVM: br label %[[LABEL1:.*]] // LLVM: [[LABEL1]]: @@ -196,7 +196,7 @@ void l4() { ; } -// CIR: cir.func @_Z2l4v +// CIR: cir.func{{.*}} @_Z2l4v // CIR: %[[A_ADDR:.*]] = cir.alloca {{.*}} ["a"] // CIR: cir.scope { // CIR: %[[RANGE_ADDR:.*]] = cir.alloca {{.*}} ["__range1", init, const] @@ -231,7 +231,7 @@ void l4() { // CIR: } // CIR: } -// LLVM: define void @_Z2l4v() { +// LLVM: define{{.*}} void @_Z2l4v() { // LLVM: %[[RANGE_ADDR:.*]] = alloca ptr // LLVM: %[[BEGIN_ADDR:.*]] = alloca ptr // LLVM: %[[END_ADDR:.*]] = alloca ptr @@ -305,7 +305,7 @@ void l5() { for (int arr[]{1,2,3,4}; auto x : arr) {} } -// CIR: cir.func @_Z2l5v +// CIR: cir.func{{.*}} @_Z2l5v // CIR: cir.scope { // CIR: %[[ARR_ADDR:.*]] = cir.alloca {{.*}} ["arr", init] // CIR: %[[RANGE_ADDR:.*]] = cir.alloca {{.*}} ["__range1", init, const] @@ -355,7 +355,7 @@ void l5() { // CIR: } // CIR: } -// LLVM: define void @_Z2l5v() { +// LLVM: define{{.*}} void @_Z2l5v() { // LLVM: %[[ARR_ADDR:.*]] = alloca [4 x i32] // LLVM: %[[RANGE_ADDR:.*]] = alloca ptr // LLVM: %[[BEGIN_ADDR:.*]] = alloca ptr @@ -439,7 +439,7 @@ void test_do_while_false() { } while (0); } -// CIR: cir.func @_Z19test_do_while_falsev() +// CIR: cir.func{{.*}} @_Z19test_do_while_falsev() // CIR-NEXT: cir.scope { // CIR-NEXT: cir.do { // CIR-NEXT: cir.yield @@ -448,7 +448,7 @@ void test_do_while_false() { // CIR-NEXT: %[[FALSE:.*]] = cir.cast(int_to_bool, %[[ZERO]] : !s32i), !cir.bool // CIR-NEXT: cir.condition(%[[FALSE]]) -// LLVM: define void @_Z19test_do_while_falsev() +// LLVM: define{{.*}} void @_Z19test_do_while_falsev() // LLVM: br label %[[LABEL1:.*]] // LLVM: [[LABEL1]]: // LLVM: br label %[[LABEL3:.*]] @@ -475,7 +475,7 @@ void test_empty_while_true() { } } -// CIR: cir.func @_Z21test_empty_while_truev() +// CIR: cir.func{{.*}} @_Z21test_empty_while_truev() // CIR-NEXT: cir.scope { // CIR-NEXT: cir.while { // CIR-NEXT: %[[TRUE:.*]] = cir.const #true @@ -486,7 +486,7 @@ void test_empty_while_true() { // CIR-NEXT: } // CIR-NEXT: cir.yield -// LLVM: define void @_Z21test_empty_while_truev() +// LLVM: define{{.*}} void @_Z21test_empty_while_truev() // LLVM: br label %[[LABEL1:.*]] // LLVM: [[LABEL1]]: // LLVM: br label %[[LABEL2:.*]] @@ -517,7 +517,7 @@ void unreachable_after_continue() { } } -// CIR: cir.func @_Z26unreachable_after_continuev() +// CIR: cir.func{{.*}} @_Z26unreachable_after_continuev() // CIR: cir.scope { // CIR: cir.for : cond { // CIR: %[[TRUE:.*]] = cir.const #true @@ -539,7 +539,7 @@ void unreachable_after_continue() { // CIR: cir.return // CIR: } -// LLVM: define void @_Z26unreachable_after_continuev() +// LLVM: define{{.*}} void @_Z26unreachable_after_continuev() // LLVM: %[[X:.*]] = alloca i32, i64 1, align 4 // LLVM: br label %[[LABEL1:.*]] // LLVM: [[LABEL1]]: @@ -577,7 +577,7 @@ void unreachable_after_break() { } } -// CIR: cir.func @_Z23unreachable_after_breakv() +// CIR: cir.func{{.*}} @_Z23unreachable_after_breakv() // CIR: cir.scope { // CIR: cir.for : cond { // CIR: %[[TRUE:.*]] = cir.const #true @@ -599,7 +599,7 @@ void unreachable_after_break() { // CIR: cir.return // CIR: } -// LLVM: define void @_Z23unreachable_after_breakv() +// LLVM: define{{.*}} void @_Z23unreachable_after_breakv() // LLVM: %[[X:.*]] = alloca i32, i64 1, align 4 // LLVM: br label %[[LABEL1:.*]] // LLVM: [[LABEL1]]: diff --git a/clang/test/CIR/CodeGen/member-functions.cpp b/clang/test/CIR/CodeGen/member-functions.cpp index c1d49ac4d8f3a..8be2c7fc2edbe 100644 --- a/clang/test/CIR/CodeGen/member-functions.cpp +++ b/clang/test/CIR/CodeGen/member-functions.cpp @@ -10,7 +10,7 @@ struct C { void C::f() {} -// CIR: cir.func @_ZN1C1fEv(%[[THIS_ARG:.*]]: !cir.ptr +// CIR: cir.func{{.*}} @_ZN1C1fEv(%[[THIS_ARG:.*]]: !cir.ptr // CIR: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] // CIR: cir.store %[[THIS_ARG]], %[[THIS_ADDR]] : !cir.ptr, !cir.ptr> // CIR: %[[THIS:.*]] = cir.load %[[THIS_ADDR]] : !cir.ptr>, !cir.ptr @@ -19,7 +19,7 @@ void C::f() {} void C::f2(int a, int b) {} -// CIR: cir.func @_ZN1C2f2Eii(%[[THIS_ARG:.*]]: !cir.ptr {{.*}}, %[[A_ARG:.*]]: !s32i {{.*}}, %[[B_ARG:.*]]: !s32i {{.*}}) { +// CIR: cir.func{{.*}} @_ZN1C2f2Eii(%[[THIS_ARG:.*]]: !cir.ptr {{.*}}, %[[A_ARG:.*]]: !s32i {{.*}}, %[[B_ARG:.*]]: !s32i {{.*}}) { // CIR-NEXT: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] // CIR-NEXT: %[[A_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CIR-NEXT: %[[B_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["b", init] @@ -36,7 +36,7 @@ void test1() { c.f2(1, 2); } -// CIR: cir.func @_Z5test1v() { +// CIR: cir.func{{.*}} @_Z5test1v() { // CIR-NEXT: %[[C_ADDR:.*]] = cir.alloca !rec_C, !cir.ptr, ["c"] // CIR-NEXT: cir.call @_ZN1C1fEv(%[[C_ADDR]]) : (!cir.ptr) -> () // CIR-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i diff --git a/clang/test/CIR/CodeGen/namespace.cpp b/clang/test/CIR/CodeGen/namespace.cpp index cf02673c07787..efae1f2f2f236 100644 --- a/clang/test/CIR/CodeGen/namespace.cpp +++ b/clang/test/CIR/CodeGen/namespace.cpp @@ -23,9 +23,9 @@ namespace test { // CHECK-DAG: cir.global "private" internal dso_local @_ZN12_GLOBAL__N_12g1E = #cir.int<1> : !s32i // CHECK-DAG: cir.global external @_ZN4test2g2E = #cir.int<2> : !s32i // CHECK-DAG: cir.global external @_ZN4test5test22g3E = #cir.int<3> : !s32i -// CHECK-DAG: cir.func @_ZN12_GLOBAL__N_12f1Ev() -// CHECK-DAG: cir.func @_ZN4test2f2Ev() -// CHECK-DAG: cir.func @_ZN4test5test22f3Ev() +// CHECK-DAG: cir.func{{.*}} @_ZN12_GLOBAL__N_12f1Ev() +// CHECK-DAG: cir.func{{.*}} @_ZN4test2f2Ev() +// CHECK-DAG: cir.func{{.*}} @_ZN4test5test22f3Ev() using namespace test; @@ -38,7 +38,7 @@ int f4(void) { } // The namespace gets added during name mangling, so this is wrong but expected. -// CHECK: cir.func @_Z2f4v() +// CHECK: cir.func{{.*}} @_Z2f4v() // CHECK: cir.call @_ZN12_GLOBAL__N_12f1Ev() // CHECK: cir.call @_ZN4test2f2Ev() // CHECK: cir.call @_ZN4test5test22f3Ev() @@ -59,7 +59,7 @@ int f5() { return g3; } -// CHECK: cir.func @_Z2f5v() +// CHECK: cir.func{{.*}} @_Z2f5v() // CHECK: cir.call @_ZN4test5test22f3Ev() // CHECK: %[[G3_ADDR:.*]] = cir.get_global @_ZN4test5test22g3E : !cir.ptr // CHECK: %[[G3_VAL:.*]] = cir.load{{.*}} %[[G3_ADDR]] : !cir.ptr, !s32i @@ -76,7 +76,7 @@ int f6() { return s.a; } -// CHECK: cir.func @_Z2f6v() +// CHECK: cir.func{{.*}} @_Z2f6v() // CHECK: cir.get_global @_ZN5test31sE : !cir.ptr // CHECK: cir.get_member %{{.*}}[0] {name = "a"} @@ -92,4 +92,4 @@ void f7() { shadow::shadowedFunc(); } -// CHECK: cir.func @_Z2f7v() +// CHECK: cir.func{{.*}} @_Z2f7v() diff --git a/clang/test/CIR/CodeGen/nullptr-init.cpp b/clang/test/CIR/CodeGen/nullptr-init.cpp index 76965ce78469e..091269d09c985 100644 --- a/clang/test/CIR/CodeGen/nullptr-init.cpp +++ b/clang/test/CIR/CodeGen/nullptr-init.cpp @@ -11,7 +11,7 @@ void t1() { int *p3 = (int*)0; } -// CIR: cir.func @_Z2t1v() +// CIR: cir.func{{.*}} @_Z2t1v() // CIR-NEXT: %[[P1:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["p1", init] {alignment = 8 : i64} // CIR-NEXT: %[[P2:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["p2", init] {alignment = 8 : i64} // CIR-NEXT: %[[P3:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["p3", init] {alignment = 8 : i64} diff --git a/clang/test/CIR/CodeGen/string-literals.c b/clang/test/CIR/CodeGen/string-literals.c index be9622f9abe27..44fd191173c33 100644 --- a/clang/test/CIR/CodeGen/string-literals.c +++ b/clang/test/CIR/CodeGen/string-literals.c @@ -33,10 +33,10 @@ char *f1() { return "1"; } -// CIR: cir.func @f1() +// CIR: cir.func{{.*}} @f1() // CIR: %[[STR:.*]] = cir.get_global @[[STR1_GLOBAL]] : !cir.ptr> -// LLVM: define ptr @f1() +// LLVM: define{{.*}} ptr @f1() // LLVM: store ptr @[[STR1_GLOBAL]], ptr {{.*}} // OGCG: define {{.*}}ptr @f1() @@ -46,24 +46,24 @@ char *f2() { return ""; } -// CIR: cir.func @f2() +// CIR: cir.func{{.*}} @f2() // CIR: %[[STR2:.*]] = cir.get_global @[[STR2_GLOBAL]] : !cir.ptr> -// LLVM: define ptr @f2() +// LLVM: define{{.*}} ptr @f2() // LLVM: store ptr @[[STR2_GLOBAL]], ptr {{.*}} -// OGCG: define {{.*}}ptr @f2() +// OGCG: define{{.*}} ptr @f2() // OGCG: ret ptr @[[STR2_GLOBAL]] char *f3() { return "\00"; } -// CIR: cir.func @f3() +// CIR: cir.func{{.*}} @f3() // CIR: %[[STR3:.*]] = cir.get_global @[[STR3_GLOBAL]] : !cir.ptr> -// LLVM: define ptr @f3() +// LLVM: define{{.*}} ptr @f3() // LLVM: store ptr @[[STR3_GLOBAL]], ptr {{.*}} -// OGCG: define {{.*}}ptr @f3() +// OGCG: define{{.*}} ptr @f3() // OGCG: ret ptr @[[STR3_GLOBAL]] diff --git a/clang/test/CIR/CodeGen/struct.c b/clang/test/CIR/CodeGen/struct.c index b722b64eeb583..aa7c4cf1c295a 100644 --- a/clang/test/CIR/CodeGen/struct.c +++ b/clang/test/CIR/CodeGen/struct.c @@ -166,11 +166,11 @@ void f(void) { struct IncompleteS *p; } -// CIR: cir.func @f() +// CIR: cir.func{{.*}} @f() // CIR-NEXT: cir.alloca !cir.ptr, !cir.ptr>, ["p"] {alignment = 8 : i64} // CIR-NEXT: cir.return -// LLVM: define void @f() +// LLVM: define{{.*}} void @f() // LLVM-NEXT: %[[P:.*]] = alloca ptr, i64 1, align 8 // LLVM-NEXT: ret void @@ -183,11 +183,11 @@ void f2(void) { struct CompleteS s; } -// CIR: cir.func @f2() +// CIR: cir.func{{.*}} @f2() // CIR-NEXT: cir.alloca !rec_CompleteS, !cir.ptr, ["s"] {alignment = 4 : i64} // CIR-NEXT: cir.return -// LLVM: define void @f2() +// LLVM: define{{.*}} void @f2() // LLVM-NEXT: %[[S:.*]] = alloca %struct.CompleteS, i64 1, align 4 // LLVM-NEXT: ret void @@ -201,7 +201,7 @@ char f3(int a) { return cs.b; } -// CIR: cir.func @f3(%[[ARG_A:.*]]: !s32i +// CIR: cir.func{{.*}} @f3(%[[ARG_A:.*]]: !s32i // CIR-NEXT: %[[A_ADDR:.*]] = cir.alloca {{.*}} ["a", init] {alignment = 4 : i64} // CIR-NEXT: %[[RETVAL_ADDR:.*]] = cir.alloca {{.*}} ["__retval"] {alignment = 1 : i64} // CIR-NEXT: cir.store{{.*}} %[[ARG_A]], %[[A_ADDR]] @@ -216,7 +216,7 @@ char f3(int a) { // CIR-NEXT: %[[RETVAL:.*]] = cir.load{{.*}} %[[RETVAL_ADDR]] // CIR-NEXT: cir.return %[[RETVAL]] -// LLVM: define i8 @f3(i32 %[[ARG_A:.*]]) +// LLVM: define{{.*}} i8 @f3(i32 %[[ARG_A:.*]]) // LLVM-NEXT: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4 // LLVM-NEXT: %[[RETVAL_ADDR:.*]] = alloca i8, i64 1, align 1 // LLVM-NEXT: store i32 %[[ARG_A]], ptr %[[A_ADDR]], align 4 @@ -241,7 +241,7 @@ char f4(int a, struct CompleteS *p) { return p->b; } -// CIR: cir.func @f4(%[[ARG_A:.*]]: !s32i {{.*}}, %[[ARG_P:.*]]: !cir.ptr +// CIR: cir.func{{.*}} @f4(%[[ARG_A:.*]]: !s32i {{.*}}, %[[ARG_P:.*]]: !cir.ptr // CIR-NEXT: %[[A_ADDR:.*]] = cir.alloca {{.*}} ["a", init] {alignment = 4 : i64} // CIR-NEXT: %[[P_ADDR:.*]] = cir.alloca {{.*}} ["p", init] {alignment = 8 : i64} // CIR-NEXT: %[[RETVAL_ADDR:.*]] = cir.alloca {{.*}} ["__retval"] {alignment = 1 : i64} @@ -258,7 +258,7 @@ char f4(int a, struct CompleteS *p) { // CIR-NEXT: %[[RETVAL:.*]] = cir.load{{.*}} %[[RETVAL_ADDR]] // CIR-NEXT: cir.return %[[RETVAL]] -// LLVM: define i8 @f4(i32 %[[ARG_A:.*]], ptr %[[ARG_P:.*]]) +// LLVM: define{{.*}} i8 @f4(i32 %[[ARG_A:.*]], ptr %[[ARG_P:.*]]) // LLVM-NEXT: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4 // LLVM-NEXT: %[[P_ADDR:.*]] = alloca ptr, i64 1, align 8 // LLVM-NEXT: %[[RETVAL_ADDR:.*]] = alloca i8, i64 1, align 1 @@ -294,7 +294,7 @@ void f5(struct NodeS* a) { a->next = 0; } -// CIR: cir.func @f5 +// CIR: cir.func{{.*}} @f5 // CIR: %[[NEXT:.*]] = cir.get_member {{%.}}[0] {name = "next"} : !cir.ptr -> !cir.ptr> // CIR: cir.store {{.*}}, %[[NEXT]] @@ -312,7 +312,7 @@ void f6(struct CycleStart *start) { struct CycleStart *start2 = end->start; } -// CIR: cir.func @f6 +// CIR: cir.func{{.*}} @f6 // CIR: %[[MIDDLE:.*]] = cir.get_member {{.*}}[0] {name = "middle"} : !cir.ptr -> !cir.ptr> // CIR: %[[END:.*]] = cir.get_member %{{.*}}[0] {name = "end"} : !cir.ptr -> !cir.ptr> // CIR: %[[START2:.*]] = cir.get_member %{{.*}}[0] {name = "start"} : !cir.ptr -> !cir.ptr> diff --git a/clang/test/CIR/CodeGen/struct.cpp b/clang/test/CIR/CodeGen/struct.cpp index c8406f811a462..ee6c4cab7341f 100644 --- a/clang/test/CIR/CodeGen/struct.cpp +++ b/clang/test/CIR/CodeGen/struct.cpp @@ -27,11 +27,11 @@ void f(void) { IncompleteS *p; } -// CIR: cir.func @_Z1fv() +// CIR: cir.func{{.*}} @_Z1fv() // CIR-NEXT: cir.alloca !cir.ptr, !cir.ptr>, ["p"] // CIR-NEXT: cir.return -// LLVM: define void @_Z1fv() +// LLVM: define{{.*}} void @_Z1fv() // LLVM-NEXT: %[[P:.*]] = alloca ptr, i64 1, align 8 // LLVM-NEXT: ret void @@ -44,14 +44,14 @@ char f2(CompleteS &s) { return s.b; } -// CIR: cir.func @_Z2f2R9CompleteS(%[[ARG_S:.*]]: !cir.ptr{{.*}}) +// CIR: cir.func{{.*}} @_Z2f2R9CompleteS(%[[ARG_S:.*]]: !cir.ptr{{.*}}) // CIR: %[[S_ADDR:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["s", init, const] // CIR: cir.store %[[ARG_S]], %[[S_ADDR]] // CIR: %[[S_REF:.*]] = cir.load{{.*}} %[[S_ADDR]] // CIR: %[[S_ADDR2:.*]] = cir.get_member %[[S_REF]][1] {name = "b"} // CIR: %[[S_B:.*]] = cir.load{{.*}} %[[S_ADDR2]] -// LLVM: define i8 @_Z2f2R9CompleteS(ptr %[[ARG_S:.*]]) +// LLVM: define{{.*}} i8 @_Z2f2R9CompleteS(ptr %[[ARG_S:.*]]) // LLVM: %[[S_ADDR:.*]] = alloca ptr // LLVM: store ptr %[[ARG_S]], ptr %[[S_ADDR]] // LLVM: %[[S_REF:.*]] = load ptr, ptr %[[S_ADDR]], align 8 @@ -79,7 +79,7 @@ void f3() { o.i.n; } -// CIR: cir.func @_Z2f3v() +// CIR: cir.func{{.*}} @_Z2f3v() // CIR: %[[O:.*]] = cir.alloca !rec_Outer, !cir.ptr, ["o"] // CIR: %[[O_I:.*]] = cir.get_member %[[O]][0] {name = "i"} // CIR: %[[O_I_N:.*]] = cir.get_member %[[O_I]][0] {name = "n"} diff --git a/clang/test/CIR/CodeGen/switch.cpp b/clang/test/CIR/CodeGen/switch.cpp index 8786c2350c192..e13aa8f4f4953 100644 --- a/clang/test/CIR/CodeGen/switch.cpp +++ b/clang/test/CIR/CodeGen/switch.cpp @@ -19,7 +19,7 @@ void sw1(int a) { } } -// CIR: cir.func @_Z3sw1i +// CIR: cir.func{{.*}} @_Z3sw1i // CIR: cir.switch (%[[COND:.*]] : !s32i) { // CIR-NEXT: cir.case(equal, [#cir.int<0> : !s32i]) { // CIR: cir.break @@ -30,7 +30,7 @@ void sw1(int a) { // CIR: cir.alloca !s32i, !cir.ptr, ["yolo", init] // CIR: cir.break -// LLVM: define void @_Z3sw1i +// LLVM: define{{.*}} void @_Z3sw1i // LLVM: store i32 1, ptr %[[B_ADDR:.*]], align 4 // LLVM: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR:.*]], align 4 // LLVM: br label %[[BB7:.*]] @@ -60,7 +60,7 @@ void sw1(int a) { // LLVM: [[DEFAULT]]: // LLVM: ret void -// OGCG: define dso_local void @_Z3sw1i +// OGCG: define{{.*}} void @_Z3sw1i // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[B:.*]] = alloca i32, align 4 @@ -97,7 +97,7 @@ void sw2(int a) { } } -// CIR: cir.func @_Z3sw2i +// CIR: cir.func{{.*}} @_Z3sw2i // CIR: cir.scope { // CIR-NEXT: %[[YOLO:.*]] = cir.alloca !s32i, !cir.ptr, ["yolo", init] // CIR-NEXT: %[[FOMO:.*]] = cir.alloca !s32i, !cir.ptr, ["fomo", init] @@ -106,7 +106,7 @@ void sw2(int a) { // CIR-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i // CIR-NEXT: cir.store{{.*}} %[[ZERO]], %[[FOMO]] : !s32i, !cir.ptr -// LLVM: define void @_Z3sw2i +// LLVM: define{{.*}} void @_Z3sw2i // LLVM: store i32 2, ptr %[[YOLO_ADDR:.*]], align 4 // LLVM: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR:.*]], align 4 // LLVM: br label %[[SWITCH:.*]] @@ -126,7 +126,7 @@ void sw2(int a) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z3sw2i +// OGCG: define{{.*}} void @_Z3sw2i // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[YOLO:.*]] = alloca i32, align 4 @@ -151,7 +151,7 @@ void sw3(int a) { } } -// CIR: cir.func @_Z3sw3i +// CIR: cir.func{{.*}} @_Z3sw3i // CIR: cir.scope { // CIR-NEXT: %[[COND:.*]] = cir.load{{.*}} %[[A:.*]] : !cir.ptr, !s32i // CIR-NEXT: cir.switch (%[[COND]] : !s32i) { @@ -161,7 +161,7 @@ void sw3(int a) { // CIR-NEXT: cir.yield // CIR-NEXT: } -// LLVM-LABEL: define void @_Z3sw3i +// LLVM-LABEL: define{{.*}} void @_Z3sw3i // LLVM: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR:.*]], align 4 // LLVM: br label %[[SWITCH:.*]] // LLVM: [[SWITCH]]: @@ -174,7 +174,7 @@ void sw3(int a) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z3sw3i +// OGCG: define{{.*}} void @_Z3sw3i // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4 @@ -195,7 +195,7 @@ int sw4(int a) { return 0; } -// CIR: cir.func @_Z3sw4i +// CIR: cir.func{{.*}} @_Z3sw4i // CIR: cir.switch (%[[COND:.*]] : !s32i) { // CIR-NEXT: cir.case(equal, [#cir.int<42> : !s32i]) { // CIR-NEXT: cir.scope { @@ -215,7 +215,7 @@ int sw4(int a) { // CIR-NEXT: cir.yield // CIR-NEXT: } -// LLVM: define i32 @_Z3sw4i +// LLVM: define{{.*}} i32 @_Z3sw4i // LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[RET_ADDR:.*]] = alloca i32, i64 1, align 4 // LLVM: br label %[[ENTRY:.*]] @@ -241,7 +241,7 @@ int sw4(int a) { // LLVM: %[[RET0:.*]] = load i32, ptr %[[RET_ADDR]], align 4 // LLVM: ret i32 %[[RET0]] -// OGCG: define dso_local noundef i32 @_Z3sw4i +// OGCG: define{{.*}} i32 @_Z3sw4i // OGCG: entry: // OGCG: %[[RETVAL:.*]] = alloca i32, align 4 // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 @@ -263,7 +263,7 @@ void sw5(int a) { } } -// CIR: cir.func @_Z3sw5i +// CIR: cir.func{{.*}} @_Z3sw5i // CIR: cir.switch (%[[A:.*]] : !s32i) { // CIR-NEXT: cir.case(equal, [#cir.int<1> : !s32i]) { // CIR-NEXT: cir.yield @@ -271,7 +271,7 @@ void sw5(int a) { // CIR-NEXT: cir.yield // CIR-NEXT: } -// LLVM-LABEL: define void @_Z3sw5i +// LLVM-LABEL: define{{.*}} void @_Z3sw5i // LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4 // LLVM: br label %[[ENTRY:.*]] // LLVM: [[ENTRY]]: @@ -288,7 +288,7 @@ void sw5(int a) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z3sw5i +// OGCG: define{{.*}} void @_Z3sw5i // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4 @@ -313,7 +313,7 @@ void sw6(int a) { } } -// CIR: cir.func @_Z3sw6i +// CIR: cir.func{{.*}} @_Z3sw6i // CIR: cir.switch (%[[A:.*]] : !s32i) { // CIR-NEXT: cir.case(equal, [#cir.int<0> : !s32i]) { // CIR-NEXT: cir.yield @@ -334,7 +334,7 @@ void sw6(int a) { // CIR-NEXT: cir.break // CIR-NEXT: } -// LLVM: define void @_Z3sw6i +// LLVM: define{{.*}} void @_Z3sw6i // LLVM: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR:.*]], align 4 // LLVM: br label %[[SWITCH:.*]] // LLVM: [[SWITCH]]: @@ -371,7 +371,7 @@ void sw6(int a) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z3sw6i +// OGCG: define{{.*}} void @_Z3sw6i // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: store i32 %a, ptr %[[A_ADDR]], align 4 @@ -404,7 +404,7 @@ void sw7(int a) { } } -// CIR: cir.func @_Z3sw7i +// CIR: cir.func{{.*}} @_Z3sw7i // CIR: %[[X:.*]] = cir.alloca !s32i, !cir.ptr, ["x"] // CIR: cir.switch (%[[A:.*]] : !s32i) // CIR-NEXT: cir.case(equal, [#cir.int<0> : !s32i]) { @@ -428,7 +428,7 @@ void sw7(int a) { // CIR-NEXT: cir.yield // CIR: } -// LLVM: define void @_Z3sw7i +// LLVM: define{{.*}} void @_Z3sw7i // LLVM: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR:.*]], align 4 // LLVM: br label %[[SWITCH:.*]] // LLVM: [[SWITCH]]: @@ -467,7 +467,7 @@ void sw7(int a) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z3sw7i +// OGCG: define{{.*}} void @_Z3sw7i // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4 @@ -498,7 +498,7 @@ void sw8(int a) { } } -// CIR: cir.func @_Z3sw8i +// CIR: cir.func{{.*}} @_Z3sw8i // CIR: cir.switch (%[[A:.*]] : !s32i) // CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) { // CIR-NEXT: cir.break @@ -510,7 +510,7 @@ void sw8(int a) { // CIR-NEXT: cir.break // CIR-NEXT: } -// LLVM: define void @_Z3sw8i +// LLVM: define{{.*}} void @_Z3sw8i // LLVM: switch i32 %[[COND:.*]], label %[[DEFAULT:.*]] [ // LLVM-DAG: i32 3, label %[[CASE3:.*]] // LLVM-DAG: i32 4, label %[[CASE4:.*]] @@ -528,7 +528,7 @@ void sw8(int a) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z3sw8i +// OGCG: define{{.*}} void @_Z3sw8i // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4 @@ -556,7 +556,7 @@ void sw9(int a) { } } -// CIR: cir.func @_Z3sw9i +// CIR: cir.func{{.*}} @_Z3sw9i // CIR: cir.switch (%[[A:.*]] : !s32i) // CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) { // CIR-NEXT: cir.break @@ -568,7 +568,7 @@ void sw9(int a) { // CIR-NEXT: cir.break // CIR-NEXT: } -// LLVM: define void @_Z3sw9i +// LLVM: define{{.*}} void @_Z3sw9i // LLVM: switch i32 %[[COND:.*]], label %[[DEFAULT:.*]] [ // LLVM-DAG: i32 3, label %[[CASE3:.*]] // LLVM-DAG: i32 4, label %[[CASE4:.*]] @@ -586,7 +586,7 @@ void sw9(int a) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z3sw9i +// OGCG: define{{.*}} void @_Z3sw9i // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4 @@ -615,7 +615,7 @@ void sw10(int a) { } } -// CIR: cir.func @_Z4sw10i +// CIR: cir.func{{.*}} @_Z4sw10i // CIR: cir.switch (%[[A:.*]] : !s32i) // CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) { // CIR-NEXT: cir.break @@ -630,7 +630,7 @@ void sw10(int a) { // CIR-NEXT: cir.break // CIR-NEXT: } -// LLVM: define void @_Z4sw10i +// LLVM: define{{.*}} void @_Z4sw10i // LLVM: switch i32 %[[COND:.*]], label %[[DEFAULT:.*]] [ // LLVM-DAG: i32 3, label %[[CASE_3:.*]] // LLVM-DAG: i32 4, label %[[CASE_4:.*]] @@ -653,7 +653,7 @@ void sw10(int a) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z4sw10i +// OGCG: define{{.*}} void @_Z4sw10i // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4 @@ -687,7 +687,7 @@ void sw11(int a) { } } -// CIR: cir.func @_Z4sw11i +// CIR: cir.func{{.*}} @_Z4sw11i // CIR: cir.switch (%[[A:.*]] : !s32i) // CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) { // CIR-NEXT: cir.break @@ -708,7 +708,7 @@ void sw11(int a) { // CIR-NEXT: cir.break // CIR-NEXT: } -// LLVM: define void @_Z4sw11i +// LLVM: define{{.*}} void @_Z4sw11i // LLVM: switch i32 %[[COND:.*]], label %[[DEFAULT:.*]] [ // LLVM-DAG: i32 3, label %[[CASE_3:.*]] // LLVM-DAG: i32 4, label %[[CASE_4:.*]] @@ -741,7 +741,7 @@ void sw11(int a) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z4sw11i +// OGCG: define{{.*}} void @_Z4sw11i // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4 @@ -772,7 +772,7 @@ void sw12(int a) { } } -// CIR: cir.func @_Z4sw12i +// CIR: cir.func{{.*}} @_Z4sw12i // CIR: cir.scope { // CIR: cir.switch // CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) { @@ -781,7 +781,7 @@ void sw12(int a) { // CIR-NEXT: cir.break // CIR-NEXT: } -// LLVM: define void @_Z4sw12i +// LLVM: define{{.*}} void @_Z4sw12i // LLVM: switch i32 %[[COND:.*]], label %[[EXIT:.*]] [ // LLVM-DAG: i32 3, label %[[CASE_3:.*]] // LLVM: ] @@ -794,7 +794,7 @@ void sw12(int a) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z4sw12i +// OGCG: define{{.*}} void @_Z4sw12i // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4 @@ -816,7 +816,7 @@ void sw13(int a, int b) { } } -// CIR: cir.func @_Z4sw13ii +// CIR: cir.func{{.*}} @_Z4sw13ii // CIR: cir.scope { // CIR: cir.switch // CIR-NEXT: cir.case(equal, [#cir.int<1> : !s32i]) { @@ -832,7 +832,7 @@ void sw13(int a, int b) { // CIR: } // CIR: cir.return -// LLVM: define void @_Z4sw13ii +// LLVM: define{{.*}} void @_Z4sw13ii // LLVM: switch i32 %[[COND:.*]], label %[[OUTER_EXIT:.*]] [ // LLVM-DAG: i32 1, label %[[CASE_A_1:.*]] // LLVM: ] @@ -858,7 +858,7 @@ void sw13(int a, int b) { // LLVM: [[EXIT]]: // LLVM: ret void -// OGCG: define dso_local void @_Z4sw13ii +// OGCG: define{{.*}} void @_Z4sw13ii // OGCG: entry: // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[B_ADDR:.*]] = alloca i32, align 4 @@ -890,7 +890,7 @@ void sw14(int x) { } } -// CIR: cir.func @_Z4sw14i +// CIR: cir.func{{.*}} @_Z4sw14i // CIR: cir.switch // CIR-NEXT: cir.case(equal, [#cir.int<1> : !s32i]) { // CIR-NEXT: cir.yield @@ -908,7 +908,7 @@ void sw14(int x) { // CIR-NEXT: cir.break // CIR-NEXT: } -// LLVM: define void @_Z4sw14i +// LLVM: define{{.*}} void @_Z4sw14i // LLVM: switch i32 %[[COND:.*]], label %[[DEFAULT:.*]] [ // LLVM-DAG: i32 1, label %[[CASE1:.*]] // LLVM-DAG: i32 2, label %[[CASE2:.*]] @@ -939,7 +939,7 @@ void sw14(int x) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z4sw14i +// OGCG: define{{.*}} void @_Z4sw14i // OGCG: entry: // OGCG: %[[X_ADDR:.*]] = alloca i32, align 4 // OGCG: store i32 %x, ptr %[[X_ADDR]], align 4 @@ -977,7 +977,7 @@ void sw15(int x) { } } -// CIR: cir.func @_Z4sw15i +// CIR: cir.func{{.*}} @_Z4sw15i // CIR: %[[Y:.*]] = cir.alloca !s32i, !cir.ptr, ["y"] // CIR: cir.switch // CIR-NEXT: cir.case(equal, [#cir.int<1> : !s32i]) { @@ -995,7 +995,7 @@ void sw15(int x) { // CIR-NEXT: cir.break // CIR-NEXT: } -// LLVM: define void @_Z4sw15i +// LLVM: define{{.*}} void @_Z4sw15i // LLVM: switch i32 %[[COND:.*]], label %[[DEFAULT:.*]] [ // LLVM-DAG: i32 1, label %[[CASE1:.*]] // LLVM-DAG: i32 2, label %[[CASE2:.*]] @@ -1019,7 +1019,7 @@ void sw15(int x) { // LLVM: [[RET]]: // LLVM: ret void -// OGCG: define dso_local void @_Z4sw15i +// OGCG: define{{.*}} void @_Z4sw15i // OGCG: entry: // OGCG: %[[X_ADDR:.*]] = alloca i32, align 4 // OGCG: %[[Y:.*]] = alloca i32, align 4 @@ -1081,7 +1081,7 @@ int nested_switch(int a) { // CIR: cir.case(equal, [#cir.int<7> : !s32i]) { // CIR: cir.return -// LLVM: define i32 @_Z13nested_switchi +// LLVM: define{{.*}} i32 @_Z13nested_switchi // LLVM: %[[B_ADDR:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[RES_ADDR:.*]] = alloca i32, i64 1, align 4 @@ -1136,7 +1136,7 @@ int nested_switch(int a) { // LLVM: %[[RET0:.*]] = load i32, ptr %[[RES_ADDR]], align 4 // LLVM: ret i32 %[[RET0]] -// OGCG: define dso_local noundef i32 @_Z13nested_switchi +// OGCG: define{{.*}} i32 @_Z13nested_switchi // OGCG: entry: // OGCG: %[[RETVAL:.*]] = alloca i32, align 4 // OGCG: %[[A_ADDR:.*]] = alloca i32, align 4 diff --git a/clang/test/CIR/CodeGen/switch_flat_op.cpp b/clang/test/CIR/CodeGen/switch_flat_op.cpp index f917bd59f7ce3..a3ea7e7a15547 100644 --- a/clang/test/CIR/CodeGen/switch_flat_op.cpp +++ b/clang/test/CIR/CodeGen/switch_flat_op.cpp @@ -18,7 +18,7 @@ void swf(int a) { } -// BEFORE: cir.func @_Z3swfi +// BEFORE: cir.func{{.*}} @_Z3swfi // BEFORE: %[[VAR_B:.*]] = cir.alloca !s32i, !cir.ptr, ["b", init] {alignment = 4 : i64} // BEFORE: %[[CONST_3:.*]] = cir.const #cir.int<3> : !s32i // BEFORE: cir.switch (%[[COND:.*]] : !s32i) { @@ -44,7 +44,7 @@ void swf(int a) { // BEFORE: } // BEFORE: cir.return -// AFTER: cir.func @_Z3swfi +// AFTER: cir.func{{.*}} @_Z3swfi // AFTER: %[[VAR_A:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] {alignment = 4 : i64} // AFTER: cir.store{{.*}} %arg0, %[[VAR_A]] : !s32i, !cir.ptr // AFTER: %[[VAR_B:.*]] = cir.alloca !s32i, !cir.ptr, ["b", init] {alignment = 4 : i64} diff --git a/clang/test/CIR/CodeGen/ternary.cpp b/clang/test/CIR/CodeGen/ternary.cpp index 3b66f7ccdf54f..781286a94cc2e 100644 --- a/clang/test/CIR/CodeGen/ternary.cpp +++ b/clang/test/CIR/CodeGen/ternary.cpp @@ -9,7 +9,7 @@ int x(int y) { return y > 0 ? 3 : 5; } -// CIR-LABEL: cir.func @_Z1xi( +// CIR-LABEL: cir.func{{.*}} @_Z1xi( // CIR-SAME: %[[ARG0:.*]]: !s32i {{.*}}) -> !s32i { // CIR: [[Y:%.+]] = cir.alloca !s32i, !cir.ptr, ["y", init] {alignment = 4 : i64} // CIR: [[RETVAL:%.+]] = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} @@ -24,7 +24,7 @@ int x(int y) { // CIR: [[RETVAL_VAL:%.+]] = cir.load [[RETVAL]] : !cir.ptr, !s32i // CIR: cir.return [[RETVAL_VAL]] : !s32i -// LLVM-LABEL: define i32 @_Z1xi( +// LLVM-LABEL: define{{.*}} i32 @_Z1xi( // LLVM-SAME: i32 %[[ARG0:.+]]) // LLVM: %[[Y:.*]] = alloca i32 // LLVM: %[[RETVAL:.*]] = alloca i32 @@ -36,7 +36,7 @@ int x(int y) { // LLVM: %[[RESULT:.*]] = load i32, ptr %[[RETVAL]] // LLVM: ret i32 %[[RESULT]] -// OGCG-LABEL: define dso_local noundef i32 @_Z1xi( +// OGCG-LABEL: define{{.*}} i32 @_Z1xi( // OGCG-SAME: i32 {{.*}} %[[ARG0:.+]]) // OGCG: %[[Y:.*]] = alloca i32 // OGCG: store i32 %[[ARG0]], ptr %[[Y]] @@ -51,7 +51,7 @@ int foo(int a, int b) { return 0; } -// CIR-LABEL: cir.func @_Z3fooii( +// CIR-LABEL: cir.func{{.*}} @_Z3fooii( // CIR-SAME: %[[ARG0:.*]]: !s32i {{.*}}, %[[ARG1:.*]]: !s32i {{.*}}) -> !s32i { // CIR: [[A:%.+]] = cir.alloca !s32i, !cir.ptr, ["a", init] {alignment = 4 : i64} // CIR: [[B:%.+]] = cir.alloca !s32i, !cir.ptr, ["b", init] {alignment = 4 : i64} @@ -83,7 +83,7 @@ int foo(int a, int b) { // CIR: [[RETVAL_VAL2:%.+]] = cir.load [[RETVAL]] : !cir.ptr, !s32i // CIR: cir.return [[RETVAL_VAL2]] : !s32i -// LLVM-LABEL: define i32 @_Z3fooii( +// LLVM-LABEL: define{{.*}} i32 @_Z3fooii( // LLVM-SAME: i32 %[[ARG0:.*]], i32 %[[ARG1:.*]]) // LLVM: %[[A:.*]] = alloca i32 // LLVM: %[[B:.*]] = alloca i32 @@ -116,7 +116,7 @@ int foo(int a, int b) { // LLVM: %[[RET2:.*]] = load i32, ptr %[[RETVAL]] // LLVM: ret i32 %[[RET2]] -// OGCG-LABEL: define dso_local noundef i32 @_Z3fooii( +// OGCG-LABEL: define{{.*}} i32 @_Z3fooii( // OGCG-SAME: i32 {{.*}} %[[ARG0:.*]], i32 {{.*}} %[[ARG1:.*]]) // OGCG: %[[RETVAL:.*]] = alloca i32 // OGCG: %[[A:.*]] = alloca i32 diff --git a/clang/test/CIR/CodeGen/typedef.c b/clang/test/CIR/CodeGen/typedef.c index a87e6ffb1843a..201df2e08ee2e 100644 --- a/clang/test/CIR/CodeGen/typedef.c +++ b/clang/test/CIR/CodeGen/typedef.c @@ -10,12 +10,12 @@ void local_typedef(void) { Struct s; } -// CIR: cir.func @local_typedef() +// CIR: cir.func{{.*}} @local_typedef() // CIR: cir.alloca !rec_Struct, !cir.ptr, ["s"] {alignment = 4 : i64} // CIR: cir.return // LLVM: %struct.Struct = type { i32 } -// LLVM: define void @local_typedef() +// LLVM: define{{.*}} void @local_typedef() // LLVM: alloca %struct.Struct, i64 1, align 4 // LLVM: ret void diff --git a/clang/test/CIR/CodeGen/unary.cpp b/clang/test/CIR/CodeGen/unary.cpp index 0633cc3fd8e15..a7c946eaffd03 100644 --- a/clang/test/CIR/CodeGen/unary.cpp +++ b/clang/test/CIR/CodeGen/unary.cpp @@ -10,12 +10,12 @@ unsigned up0() { return +a; } -// CHECK: cir.func @_Z3up0v() -> !u32i +// CHECK: cir.func{{.*}} @_Z3up0v() -> !u32i // CHECK: %[[A:.*]] = cir.alloca !u32i, !cir.ptr, ["a", init] // CHECK: %[[INPUT:.*]] = cir.load{{.*}} %[[A]] // CHECK: %[[OUTPUT:.*]] = cir.unary(plus, %[[INPUT]]) -// LLVM: define i32 @_Z3up0v() +// LLVM: define{{.*}} i32 @_Z3up0v() // LLVM: %[[RV:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[A:.*]] = alloca i32, i64 1, align 4 // LLVM: store i32 1, ptr %[[A]], align 4 @@ -31,12 +31,12 @@ unsigned um0() { return -a; } -// CHECK: cir.func @_Z3um0v() -> !u32i +// CHECK: cir.func{{.*}} @_Z3um0v() -> !u32i // CHECK: %[[A:.*]] = cir.alloca !u32i, !cir.ptr, ["a", init] // CHECK: %[[INPUT:.*]] = cir.load{{.*}} %[[A]] // CHECK: %[[OUTPUT:.*]] = cir.unary(minus, %[[INPUT]]) -// LLVM: define i32 @_Z3um0v() +// LLVM: define{{.*}} i32 @_Z3um0v() // LLVM: %[[RV:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[A:.*]] = alloca i32, i64 1, align 4 // LLVM: store i32 1, ptr %[[A]], align 4 @@ -54,12 +54,12 @@ unsigned un0() { return ~a; // a ^ -1 , not } -// CHECK: cir.func @_Z3un0v() -> !u32i +// CHECK: cir.func{{.*}} @_Z3un0v() -> !u32i // CHECK: %[[A:.*]] = cir.alloca !u32i, !cir.ptr, ["a", init] // CHECK: %[[INPUT:.*]] = cir.load{{.*}} %[[A]] // CHECK: %[[OUTPUT:.*]] = cir.unary(not, %[[INPUT]]) -// LLVM: define i32 @_Z3un0v() +// LLVM: define{{.*}} i32 @_Z3un0v() // LLVM: %[[RV:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[A:.*]] = alloca i32, i64 1, align 4 // LLVM: store i32 1, ptr %[[A]], align 4 @@ -78,7 +78,7 @@ int inc0() { return a; } -// CHECK: cir.func @_Z4inc0v() -> !s32i +// CHECK: cir.func{{.*}} @_Z4inc0v() -> !s32i // CHECK: %[[A:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CHECK: %[[ATMP:.*]] = cir.const #cir.int<1> : !s32i // CHECK: cir.store{{.*}} %[[ATMP]], %[[A]] : !s32i @@ -87,7 +87,7 @@ int inc0() { // CHECK: cir.store{{.*}} %[[INCREMENTED]], %[[A]] // CHECK: %[[A_TO_OUTPUT:.*]] = cir.load{{.*}} %[[A]] -// LLVM: define i32 @_Z4inc0v() +// LLVM: define{{.*}} i32 @_Z4inc0v() // LLVM: %[[RV:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[A:.*]] = alloca i32, i64 1, align 4 // LLVM: store i32 1, ptr %[[A]], align 4 @@ -106,7 +106,7 @@ int dec0() { return a; } -// CHECK: cir.func @_Z4dec0v() -> !s32i +// CHECK: cir.func{{.*}} @_Z4dec0v() -> !s32i // CHECK: %[[A:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CHECK: %[[ATMP:.*]] = cir.const #cir.int<1> : !s32i // CHECK: cir.store{{.*}} %[[ATMP]], %[[A]] : !s32i @@ -115,7 +115,7 @@ int dec0() { // CHECK: cir.store{{.*}} %[[DECREMENTED]], %[[A]] // CHECK: %[[A_TO_OUTPUT:.*]] = cir.load{{.*}} %[[A]] -// LLVM: define i32 @_Z4dec0v() +// LLVM: define{{.*}} i32 @_Z4dec0v() // LLVM: %[[RV:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[A:.*]] = alloca i32, i64 1, align 4 // LLVM: store i32 1, ptr %[[A]], align 4 @@ -134,7 +134,7 @@ int inc1() { return a; } -// CHECK: cir.func @_Z4inc1v() -> !s32i +// CHECK: cir.func{{.*}} @_Z4inc1v() -> !s32i // CHECK: %[[A:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CHECK: %[[ATMP:.*]] = cir.const #cir.int<1> : !s32i // CHECK: cir.store{{.*}} %[[ATMP]], %[[A]] : !s32i @@ -143,7 +143,7 @@ int inc1() { // CHECK: cir.store{{.*}} %[[INCREMENTED]], %[[A]] // CHECK: %[[A_TO_OUTPUT:.*]] = cir.load{{.*}} %[[A]] -// LLVM: define i32 @_Z4inc1v() +// LLVM: define{{.*}} i32 @_Z4inc1v() // LLVM: %[[RV:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[A:.*]] = alloca i32, i64 1, align 4 // LLVM: store i32 1, ptr %[[A]], align 4 @@ -162,7 +162,7 @@ int dec1() { return a; } -// CHECK: cir.func @_Z4dec1v() -> !s32i +// CHECK: cir.func{{.*}} @_Z4dec1v() -> !s32i // CHECK: %[[A:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CHECK: %[[ATMP:.*]] = cir.const #cir.int<1> : !s32i // CHECK: cir.store{{.*}} %[[ATMP]], %[[A]] : !s32i @@ -171,7 +171,7 @@ int dec1() { // CHECK: cir.store{{.*}} %[[DECREMENTED]], %[[A]] // CHECK: %[[A_TO_OUTPUT:.*]] = cir.load{{.*}} %[[A]] -// LLVM: define i32 @_Z4dec1v() +// LLVM: define{{.*}} i32 @_Z4dec1v() // LLVM: %[[RV:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[A:.*]] = alloca i32, i64 1, align 4 // LLVM: store i32 1, ptr %[[A]], align 4 @@ -191,7 +191,7 @@ int inc2() { return b; } -// CHECK: cir.func @_Z4inc2v() -> !s32i +// CHECK: cir.func{{.*}} @_Z4inc2v() -> !s32i // CHECK: %[[A:.*]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CHECK: %[[B:.*]] = cir.alloca !s32i, !cir.ptr, ["b", init] // CHECK: %[[ATMP:.*]] = cir.const #cir.int<1> : !s32i @@ -202,7 +202,7 @@ int inc2() { // CHECK: cir.store{{.*}} %[[ATOB]], %[[B]] // CHECK: %[[B_TO_OUTPUT:.*]] = cir.load{{.*}} %[[B]] -// LLVM: define i32 @_Z4inc2v() +// LLVM: define{{.*}} i32 @_Z4inc2v() // LLVM: %[[RV:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[A:.*]] = alloca i32, i64 1, align 4 // LLVM: %[[B:.*]] = alloca i32, i64 1, align 4 @@ -228,12 +228,12 @@ float fpPlus() { return +a; } -// CHECK: cir.func @_Z6fpPlusv() -> !cir.float +// CHECK: cir.func{{.*}} @_Z6fpPlusv() -> !cir.float // CHECK: %[[A:.*]] = cir.alloca !cir.float, !cir.ptr, ["a", init] // CHECK: %[[INPUT:.*]] = cir.load{{.*}} %[[A]] // CHECK: %[[OUTPUT:.*]] = cir.unary(plus, %[[INPUT]]) -// LLVM: define float @_Z6fpPlusv() +// LLVM: define{{.*}} float @_Z6fpPlusv() // LLVM: %[[RV:.*]] = alloca float, i64 1, align 4 // LLVM: %[[A:.*]] = alloca float, i64 1, align 4 // LLVM: store float 1.000000e+00, ptr %[[A]], align 4 @@ -249,12 +249,12 @@ float fpMinus() { return -a; } -// CHECK: cir.func @_Z7fpMinusv() -> !cir.float +// CHECK: cir.func{{.*}} @_Z7fpMinusv() -> !cir.float // CHECK: %[[A:.*]] = cir.alloca !cir.float, !cir.ptr, ["a", init] // CHECK: %[[INPUT:.*]] = cir.load{{.*}} %[[A]] // CHECK: %[[OUTPUT:.*]] = cir.unary(minus, %[[INPUT]]) -// LLVM: define float @_Z7fpMinusv() +// LLVM: define{{.*}} float @_Z7fpMinusv() // LLVM: %[[RV:.*]] = alloca float, i64 1, align 4 // LLVM: %[[A:.*]] = alloca float, i64 1, align 4 // LLVM: store float 1.000000e+00, ptr %[[A]], align 4 @@ -272,14 +272,14 @@ float fpPreInc() { return ++a; } -// CHECK: cir.func @_Z8fpPreIncv() -> !cir.float +// CHECK: cir.func{{.*}} @_Z8fpPreIncv() -> !cir.float // CHECK: %[[A:.*]] = cir.alloca !cir.float, !cir.ptr, ["a", init] // CHECK: %[[ATMP:.*]] = cir.const #cir.fp<1.000000e+00> : !cir.float // CHECK: cir.store{{.*}} %[[ATMP]], %[[A]] : !cir.float // CHECK: %[[INPUT:.*]] = cir.load{{.*}} %[[A]] // CHECK: %[[INCREMENTED:.*]] = cir.unary(inc, %[[INPUT]]) -// LLVM: define float @_Z8fpPreIncv() +// LLVM: define{{.*}} float @_Z8fpPreIncv() // LLVM: %[[RV:.*]] = alloca float, i64 1, align 4 // LLVM: %[[A:.*]] = alloca float, i64 1, align 4 // LLVM: store float 1.000000e+00, ptr %[[A]], align 4 @@ -297,14 +297,14 @@ float fpPreDec() { return --a; } -// CHECK: cir.func @_Z8fpPreDecv() -> !cir.float +// CHECK: cir.func{{.*}} @_Z8fpPreDecv() -> !cir.float // CHECK: %[[A:.*]] = cir.alloca !cir.float, !cir.ptr, ["a", init] // CHECK: %[[ATMP:.*]] = cir.const #cir.fp<1.000000e+00> : !cir.float // CHECK: cir.store{{.*}} %[[ATMP]], %[[A]] : !cir.float // CHECK: %[[INPUT:.*]] = cir.load{{.*}} %[[A]] // CHECK: %[[DECREMENTED:.*]] = cir.unary(dec, %[[INPUT]]) -// LLVM: define float @_Z8fpPreDecv() +// LLVM: define{{.*}} float @_Z8fpPreDecv() // LLVM: %[[RV:.*]] = alloca float, i64 1, align 4 // LLVM: %[[A:.*]] = alloca float, i64 1, align 4 // LLVM: store float 1.000000e+00, ptr %[[A]], align 4 @@ -322,14 +322,14 @@ float fpPostInc() { return a++; } -// CHECK: cir.func @_Z9fpPostIncv() -> !cir.float +// CHECK: cir.func{{.*}} @_Z9fpPostIncv() -> !cir.float // CHECK: %[[A:.*]] = cir.alloca !cir.float, !cir.ptr, ["a", init] // CHECK: %[[ATMP:.*]] = cir.const #cir.fp<1.000000e+00> : !cir.float // CHECK: cir.store{{.*}} %[[ATMP]], %[[A]] : !cir.float // CHECK: %[[INPUT:.*]] = cir.load{{.*}} %[[A]] // CHECK: %[[INCREMENTED:.*]] = cir.unary(inc, %[[INPUT]]) -// LLVM: define float @_Z9fpPostIncv() +// LLVM: define{{.*}} float @_Z9fpPostIncv() // LLVM: %[[RV:.*]] = alloca float, i64 1, align 4 // LLVM: %[[A:.*]] = alloca float, i64 1, align 4 // LLVM: store float 1.000000e+00, ptr %[[A]], align 4 @@ -347,14 +347,14 @@ float fpPostDec() { return a--; } -// CHECK: cir.func @_Z9fpPostDecv() -> !cir.float +// CHECK: cir.func{{.*}} @_Z9fpPostDecv() -> !cir.float // CHECK: %[[A:.*]] = cir.alloca !cir.float, !cir.ptr, ["a", init] // CHECK: %[[ATMP:.*]] = cir.const #cir.fp<1.000000e+00> : !cir.float // CHECK: cir.store{{.*}} %[[ATMP]], %[[A]] : !cir.float // CHECK: %[[INPUT:.*]] = cir.load{{.*}} %[[A]] // CHECK: %[[DECREMENTED:.*]] = cir.unary(dec, %[[INPUT]]) -// LLVM: define float @_Z9fpPostDecv() +// LLVM: define{{.*}} float @_Z9fpPostDecv() // LLVM: %[[RV:.*]] = alloca float, i64 1, align 4 // LLVM: %[[A:.*]] = alloca float, i64 1, align 4 // LLVM: store float 1.000000e+00, ptr %[[A]], align 4 @@ -374,7 +374,7 @@ float fpPostInc2() { return b; } -// CHECK: cir.func @_Z10fpPostInc2v() -> !cir.float +// CHECK: cir.func{{.*}} @_Z10fpPostInc2v() -> !cir.float // CHECK: %[[A:.*]] = cir.alloca !cir.float, !cir.ptr, ["a", init] // CHECK: %[[B:.*]] = cir.alloca !cir.float, !cir.ptr, ["b", init] // CHECK: %[[ATMP:.*]] = cir.const #cir.fp<1.000000e+00> : !cir.float @@ -385,7 +385,7 @@ float fpPostInc2() { // CHECK: cir.store{{.*}} %[[ATOB]], %[[B]] // CHECK: %[[B_TO_OUTPUT:.*]] = cir.load{{.*}} %[[B]] -// LLVM: define float @_Z10fpPostInc2v() +// LLVM: define{{.*}} float @_Z10fpPostInc2v() // LLVM: %[[RV:.*]] = alloca float, i64 1, align 4 // LLVM: %[[A:.*]] = alloca float, i64 1, align 4 // LLVM: %[[B:.*]] = alloca float, i64 1, align 4 @@ -407,7 +407,7 @@ float fpPostInc2() { // OGCG: %[[B_TO_OUTPUT:.*]] = load float, ptr %[[B]], align 4 void chars(char c) { -// CHECK: cir.func @_Z5charsc +// CHECK: cir.func{{.*}} @_Z5charsc int c1 = +c; // CHECK: %[[PROMO:.*]] = cir.cast(integral, %{{.+}} : !s8i), !s32i @@ -429,13 +429,13 @@ _Float16 fp16UPlus(_Float16 f) { return +f; } -// CHECK: cir.func @_Z9fp16UPlusDF16_({{.*}}) -> !cir.f16 +// CHECK: cir.func{{.*}} @_Z9fp16UPlusDF16_({{.*}}) -> !cir.f16 // CHECK: %[[INPUT:.*]] = cir.load{{.*}} %[[F:.*]] // CHECK: %[[PROMOTED:.*]] = cir.cast(floating, %[[INPUT]] : !cir.f16), !cir.float // CHECK: %[[RESULT:.*]] = cir.unary(plus, %[[PROMOTED]]) // CHECK: %[[UNPROMOTED:.*]] = cir.cast(floating, %[[RESULT]] : !cir.float), !cir.f16 -// LLVM: define half @_Z9fp16UPlusDF16_({{.*}}) +// LLVM: define{{.*}} half @_Z9fp16UPlusDF16_({{.*}}) // LLVM: %[[F_LOAD:.*]] = load half, ptr %{{.*}}, align 2 // LLVM: %[[PROMOTED:.*]] = fpext half %[[F_LOAD]] to float // LLVM: %[[UNPROMOTED:.*]] = fptrunc float %[[PROMOTED]] to half @@ -449,13 +449,13 @@ _Float16 fp16UMinus(_Float16 f) { return -f; } -// CHECK: cir.func @_Z10fp16UMinusDF16_({{.*}}) -> !cir.f16 +// CHECK: cir.func{{.*}} @_Z10fp16UMinusDF16_({{.*}}) -> !cir.f16 // CHECK: %[[INPUT:.*]] = cir.load{{.*}} %[[F:.*]] // CHECK: %[[PROMOTED:.*]] = cir.cast(floating, %[[INPUT]] : !cir.f16), !cir.float // CHECK: %[[RESULT:.*]] = cir.unary(minus, %[[PROMOTED]]) // CHECK: %[[UNPROMOTED:.*]] = cir.cast(floating, %[[RESULT]] : !cir.float), !cir.f16 -// LLVM: define half @_Z10fp16UMinusDF16_({{.*}}) +// LLVM: define{{.*}} half @_Z10fp16UMinusDF16_({{.*}}) // LLVM: %[[F_LOAD:.*]] = load half, ptr %{{.*}}, align 2 // LLVM: %[[PROMOTED:.*]] = fpext half %[[F_LOAD]] to float // LLVM: %[[RESULT:.*]] = fneg float %[[PROMOTED]] @@ -480,7 +480,7 @@ void test_logical_not() { b = !d; } -// CHECK: cir.func @_Z16test_logical_notv() +// CHECK: cir.func{{.*}} @_Z16test_logical_notv() // CHECK: %[[A:.*]] = cir.load{{.*}} %[[A_ADDR:.*]] : !cir.ptr, !s32i // CHECK: %[[A_BOOL:.*]] = cir.cast(int_to_bool, %[[A]] : !s32i), !cir.bool // CHECK: %[[A_NOT:.*]] = cir.unary(not, %[[A_BOOL]]) : !cir.bool, !cir.bool @@ -503,7 +503,7 @@ void test_logical_not() { // CHECK: %[[D_NOT:.*]] = cir.unary(not, %[[D_BOOL]]) : !cir.bool, !cir.bool // CHECK: cir.store{{.*}} %[[D_NOT]], %[[B_ADDR]] : !cir.bool, !cir.ptr -// LLVM: define void @_Z16test_logical_notv() +// LLVM: define{{.*}} void @_Z16test_logical_notv() // LLVM: %[[A:.*]] = load i32, ptr %[[A_ADDR:.*]], align 4 // LLVM: %[[A_BOOL:.*]] = icmp ne i32 %[[A]], 0 // LLVM: %[[A_NOT:.*]] = xor i1 %[[A_BOOL]], true diff --git a/clang/test/CIR/CodeGen/union.c b/clang/test/CIR/CodeGen/union.c index d998b5add41a3..23e862b24517d 100644 --- a/clang/test/CIR/CodeGen/union.c +++ b/clang/test/CIR/CodeGen/union.c @@ -54,11 +54,11 @@ void f1(void) { union IncompleteU *p; } -// CIR: cir.func @f1() +// CIR: cir.func{{.*}} @f1() // CIR-NEXT: cir.alloca !cir.ptr, !cir.ptr>, ["p"] // CIR-NEXT: cir.return -// LLVM: define void @f1() +// LLVM: define{{.*}} void @f1() // LLVM-NEXT: %[[P:.*]] = alloca ptr, i64 1, align 8 // LLVM-NEXT: ret void @@ -73,7 +73,7 @@ int f2(void) { return u.n; } -// CIR: cir.func @f2() -> !s32i +// CIR: cir.func{{.*}} @f2() -> !s32i // CIR-NEXT: %[[RETVAL_ADDR:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CIR-NEXT: %[[U:.*]] = cir.alloca !rec_U1, !cir.ptr, ["u"] {alignment = 4 : i64} // CIR-NEXT: %[[I:.*]] = cir.const #cir.int<42> : !s32i @@ -85,7 +85,7 @@ int f2(void) { // CIR-NEXT: %[[RET:.*]] = cir.load{{.*}} %[[RETVAL_ADDR]] : !cir.ptr, !s32i // CIR-NEXT: cir.return %[[RET]] : !s32i -// LLVM: define i32 @f2() +// LLVM: define{{.*}} i32 @f2() // LLVM-NEXT: %[[RETVAL:.*]] = alloca i32, i64 1, align 4 // LLVM-NEXT: %[[U:.*]] = alloca %union.U1, i64 1, align 4 // LLVM-NEXT: store i32 42, ptr %[[U]], align 4 @@ -94,7 +94,7 @@ int f2(void) { // LLVM-NEXT: %[[RET:.*]] = load i32, ptr %[[RETVAL]], align 4 // LLVM-NEXT: ret i32 %[[RET]] -// OGCG: define dso_local i32 @f2() +// OGCG: define{{.*}} i32 @f2() // OGCG-NEXT: entry: // OGCG-NEXT: %[[U:.*]] = alloca %union.U1, align 4 // OGCG-NEXT: store i32 42, ptr %[[U]], align 4 @@ -112,7 +112,7 @@ void shouldGenerateUnionAccess(union U2 u) { u.d; } -// CIR: cir.func @shouldGenerateUnionAccess(%[[ARG:.*]]: !rec_U2 +// CIR: cir.func{{.*}} @shouldGenerateUnionAccess(%[[ARG:.*]]: !rec_U2 // CIR-NEXT: %[[U:.*]] = cir.alloca !rec_U2, !cir.ptr, ["u", init] {alignment = 8 : i64} // CIR-NEXT: cir.store{{.*}} %[[ARG]], %[[U]] : !rec_U2, !cir.ptr // CIR-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i @@ -138,7 +138,7 @@ void shouldGenerateUnionAccess(union U2 u) { // CIR-NEXT: %[[D_VAL:.*]] = cir.load{{.*}} %[[D_PTR2]] : !cir.ptr, !cir.double // CIR-NEXT: cir.return -// LLVM: define void @shouldGenerateUnionAccess(%union.U2 %[[ARG:.*]]) +// LLVM: define{{.*}} void @shouldGenerateUnionAccess(%union.U2 %[[ARG:.*]]) // LLVM-NEXT: %[[U:.*]] = alloca %union.U2, i64 1, align 8 // LLVM-NEXT: store %union.U2 %[[ARG]], ptr %[[U]], align 8 // LLVM-NEXT: store i8 0, ptr %[[U]], align 8 @@ -151,7 +151,7 @@ void shouldGenerateUnionAccess(union U2 u) { // LLVM-NEXT: %[[D_VAL:.*]] = load double, ptr %[[U]], align 8 // LLVM-NEXT: ret void -// OGCG: define dso_local void @shouldGenerateUnionAccess(i64 %[[ARG:.*]]) +// OGCG: define{{.*}} void @shouldGenerateUnionAccess(i64 %[[ARG:.*]]) // OGCG-NEXT: entry: // OGCG-NEXT: %[[U:.*]] = alloca %union.U2, align 8 // OGCG-NEXT: %[[COERCE_DIVE:.*]] = getelementptr inbounds nuw %union.U2, ptr %[[U]], i32 0, i32 0 @@ -170,7 +170,7 @@ void f3(union U3 u) { u.c[2] = 0; } -// CIR: cir.func @f3(%[[ARG:.*]]: !rec_U3 +// CIR: cir.func{{.*}} @f3(%[[ARG:.*]]: !rec_U3 // CIR-NEXT: %[[U:.*]] = cir.alloca !rec_U3, !cir.ptr, ["u", init] {alignment = 1 : i64} // CIR-NEXT: cir.store{{.*}} %[[ARG]], %[[U]] : !rec_U3, !cir.ptr // CIR-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i @@ -182,7 +182,7 @@ void f3(union U3 u) { // CIR-NEXT: cir.store{{.*}} %[[ZERO_CHAR]], %[[ELEM_PTR]] : !s8i, !cir.ptr // CIR-NEXT: cir.return -// LLVM: define void @f3(%union.U3 %[[ARG:.*]]) +// LLVM: define{{.*}} void @f3(%union.U3 %[[ARG:.*]]) // LLVM-NEXT: %[[U:.*]] = alloca %union.U3, i64 1, align 1 // LLVM-NEXT: store %union.U3 %[[ARG]], ptr %[[U]], align 1 // LLVM-NEXT: %[[C_PTR:.*]] = getelementptr i8, ptr %[[U]], i32 0 @@ -190,7 +190,7 @@ void f3(union U3 u) { // LLVM-NEXT: store i8 0, ptr %[[ELEM_PTR]], align 1 // LLVM-NEXT: ret void -// OGCG: define dso_local void @f3(i40 %[[ARG:.*]]) +// OGCG: define{{.*}} void @f3(i40 %[[ARG:.*]]) // OGCG-NEXT: entry: // OGCG-NEXT: %[[U:.*]] = alloca %union.U3, align 1 // OGCG-NEXT: store i40 %[[ARG]], ptr %[[U]], align 1 @@ -202,7 +202,7 @@ void f5(union U4 u) { u.c[4] = 65; } -// CIR: cir.func @f5(%[[ARG:.*]]: !rec_U4 +// CIR: cir.func{{.*}} @f5(%[[ARG:.*]]: !rec_U4 // CIR-NEXT: %[[U:.*]] = cir.alloca !rec_U4, !cir.ptr, ["u", init] {alignment = 4 : i64} // CIR-NEXT: cir.store{{.*}} %[[ARG]], %[[U]] : !rec_U4, !cir.ptr // CIR-NEXT: %[[CHAR_VAL:.*]] = cir.const #cir.int<65> : !s32i @@ -214,7 +214,7 @@ void f5(union U4 u) { // CIR-NEXT: cir.store{{.*}} %[[CHAR_CAST]], %[[ELEM_PTR]] : !s8i, !cir.ptr // CIR-NEXT: cir.return -// LLVM: define void @f5(%union.U4 %[[ARG:.*]]) +// LLVM: define{{.*}} void @f5(%union.U4 %[[ARG:.*]]) // LLVM-NEXT: %[[U:.*]] = alloca %union.U4, i64 1, align 4 // LLVM-NEXT: store %union.U4 %[[ARG]], ptr %[[U]], align 4 // LLVM-NEXT: %[[C_PTR:.*]] = getelementptr i8, ptr %[[U]], i32 0 @@ -222,7 +222,7 @@ void f5(union U4 u) { // LLVM-NEXT: store i8 65, ptr %[[ELEM_PTR]], align 4 // LLVM-NEXT: ret void -// OGCG: define dso_local void @f5(i64 %[[ARG:.*]]) +// OGCG: define{{.*}} void @f5(i64 %[[ARG:.*]]) // OGCG-NEXT: entry: // OGCG-NEXT: %[[U:.*]] = alloca %union.U4, align 4 // OGCG-NEXT: store i64 %[[ARG]], ptr %[[U]], align 4 diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp index fe4919ec0478d..2ee42187a6e94 100644 --- a/clang/test/CIR/CodeGen/vector-ext.cpp +++ b/clang/test/CIR/CodeGen/vector-ext.cpp @@ -1161,3 +1161,18 @@ void foo20() { // OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[VEC_A]], align 16 // OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[VEC_B]], align 16 // OGCG: %[[SHUF:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> %[[TMP_B]], <4 x i32> + +void foo21() { + vi4 a; + unsigned long size = __builtin_vectorelements(a); +} + +// CIR: %[[INIT:.*]] = cir.alloca !u64i, !cir.ptr, ["size", init] +// CIR: %[[SIZE:.*]] = cir.const #cir.int<4> : !u64i +// CIR: cir.store align(8) %[[SIZE]], %[[INIT]] : !u64i, !cir.ptr + +// LLVM: %[[SIZE:.*]] = alloca i64, i64 1, align 8 +// LLVM: store i64 4, ptr %[[SIZE]], align 8 + +// OGCG: %[[SIZE:.*]] = alloca i64, align 8 +// OGCG: store i64 4, ptr %[[SIZE]], align 8 diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp index d0c5b83cd5b04..18fa90bd2cb3f 100644 --- a/clang/test/CIR/CodeGen/vector.cpp +++ b/clang/test/CIR/CodeGen/vector.cpp @@ -1203,3 +1203,18 @@ void foo23() { // OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[VEC_A]], align 16 // OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[VEC_B]], align 16 // OGCG: %[[SHUF:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> %[[TMP_B]], <4 x i32> + +void foo24() { + vi4 a; + unsigned long size = __builtin_vectorelements(a); +} + +// CIR: %[[INIT:.*]] = cir.alloca !u64i, !cir.ptr, ["size", init] +// CIR: %[[SIZE:.*]] = cir.const #cir.int<4> : !u64i +// CIR: cir.store align(8) %[[SIZE]], %[[INIT]] : !u64i, !cir.ptr + +// LLVM: %[[SIZE:.*]] = alloca i64, i64 1, align 8 +// LLVM: store i64 4, ptr %[[SIZE]], align 8 + +// OGCG: %[[SIZE:.*]] = alloca i64, align 8 +// OGCG: store i64 4, ptr %[[SIZE]], align 8 diff --git a/clang/test/CIR/CodeGenOpenACC/combined-copy.c b/clang/test/CIR/CodeGenOpenACC/combined-copy.c index 72471d4ec7874..1c94fa8238ce8 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-copy.c +++ b/clang/test/CIR/CodeGenOpenACC/combined-copy.c @@ -2,7 +2,7 @@ int global; void acc_compute(int parmVar) { - // CHECK: cir.func @acc_compute(%[[ARG:.*]]: !s32i{{.*}}) { + // CHECK: cir.func{{.*}} @acc_compute(%[[ARG:.*]]: !s32i{{.*}}) { // CHECK-NEXT: %[[PARM:.*]] = cir.alloca !s32i, !cir.ptr, ["parmVar", init] int localVar1; short localVar2; @@ -773,7 +773,7 @@ typedef struct StructTy { } Struct ; void acc_compute_members() { - // CHECK: cir.func @acc_compute_members() + // CHECK: cir.func{{.*}} @acc_compute_members() Struct localStruct; // CHECK-NEXT: %[[LOCALSTRUCT:.*]] = cir.alloca !rec_StructTy, !cir.ptr, ["localStruct"] @@ -1082,7 +1082,7 @@ typedef struct OuterTy { } Outer; void copy_member_of_array_element_member() { - // CHECK: cir.func @copy_member_of_array_element_member() { + // CHECK: cir.func{{.*}} @copy_member_of_array_element_member() { Outer outer; // CHECK-NEXT: %[[OUTER:.*]] = cir.alloca !rec_OuterTy, !cir.ptr, ["outer"] diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp index 5b83a9cb91898..b8140335f7c29 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s extern "C" void acc_combined(int N, int cond) { - // CHECK: cir.func @acc_combined(%[[ARG_N:.*]]: !s32i loc{{.*}}, %[[ARG_COND:.*]]: !s32i loc{{.*}}) { + // CHECK: cir.func{{.*}} @acc_combined(%[[ARG_N:.*]]: !s32i loc{{.*}}, %[[ARG_COND:.*]]: !s32i loc{{.*}}) { // CHECK-NEXT: %[[ALLOCA_N:.*]] = cir.alloca !s32i, !cir.ptr, ["N", init] // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr, ["cond", init] // CHECK-NEXT: cir.store %[[ARG_N]], %[[ALLOCA_N]] : !s32i, !cir.ptr @@ -1012,7 +1012,7 @@ extern "C" void acc_combined(int N, int cond) { // CHECK-NEXT: } loc } extern "C" void acc_combined_data_clauses(int *arg1, int *arg2) { - // CHECK: cir.func @acc_combined_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr{{.*}}) { + // CHECK: cir.func{{.*}} @acc_combined_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr{{.*}}) { // CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["arg1", init] // CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["arg2", init] // CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr, !cir.ptr> diff --git a/clang/test/CIR/CodeGenOpenACC/compute-copy.c b/clang/test/CIR/CodeGenOpenACC/compute-copy.c index 888bad29caa7c..0fb150475bc72 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-copy.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-copy.c @@ -2,7 +2,7 @@ int global; void acc_compute(int parmVar) { - // CHECK: cir.func @acc_compute(%[[ARG:.*]]: !s32i{{.*}}) { + // CHECK: cir.func{{.*}} @acc_compute(%[[ARG:.*]]: !s32i{{.*}}) { // CHECK-NEXT: %[[PARM:.*]] = cir.alloca !s32i, !cir.ptr, ["parmVar", init] int localVar1; short localVar2; @@ -646,7 +646,7 @@ typedef struct StructTy { } Struct ; void acc_compute_members() { - // CHECK: cir.func @acc_compute_members() + // CHECK: cir.func{{.*}} @acc_compute_members() Struct localStruct; // CHECK-NEXT: %[[LOCALSTRUCT:.*]] = cir.alloca !rec_StructTy, !cir.ptr, ["localStruct"] diff --git a/clang/test/CIR/CodeGenOpenACC/data.c b/clang/test/CIR/CodeGenOpenACC/data.c index 948119f66e93d..77a373f9c049f 100644 --- a/clang/test/CIR/CodeGenOpenACC/data.c +++ b/clang/test/CIR/CodeGenOpenACC/data.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s void acc_data(int cond) { - // CHECK: cir.func @acc_data(%[[ARG:.*]]: !s32i{{.*}}) { + // CHECK: cir.func{{.*}} @acc_data(%[[ARG:.*]]: !s32i{{.*}}) { // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr, ["cond", init] // CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr diff --git a/clang/test/CIR/CodeGenOpenACC/host_data.c b/clang/test/CIR/CodeGenOpenACC/host_data.c index 4c3f7dd092a2f..aeaf3d2f047b5 100644 --- a/clang/test/CIR/CodeGenOpenACC/host_data.c +++ b/clang/test/CIR/CodeGenOpenACC/host_data.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s void acc_host_data(int cond, int var1, int var2) { - // CHECK: cir.func @acc_host_data(%[[ARG_COND:.*]]: !s32i {{.*}}, %[[ARG_V1:.*]]: !s32i {{.*}}, %[[ARG_V2:.*]]: !s32i {{.*}}) { + // CHECK: cir.func{{.*}} @acc_host_data(%[[ARG_COND:.*]]: !s32i {{.*}}, %[[ARG_V1:.*]]: !s32i {{.*}}, %[[ARG_V2:.*]]: !s32i {{.*}}) { // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr, ["cond", init] // CHECK-NEXT: %[[V1:.*]] = cir.alloca !s32i, !cir.ptr, ["var1", init] // CHECK-NEXT: %[[V2:.*]] = cir.alloca !s32i, !cir.ptr, ["var2", init] diff --git a/clang/test/CIR/CodeGenOpenACC/init.c b/clang/test/CIR/CodeGenOpenACC/init.c index 54f686dbe8ebc..177e5a6ea2117 100644 --- a/clang/test/CIR/CodeGenOpenACC/init.c +++ b/clang/test/CIR/CodeGenOpenACC/init.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s void acc_init(int cond) { - // CHECK: cir.func @acc_init(%[[ARG:.*]]: !s32i{{.*}}) { + // CHECK: cir.func{{.*}} @acc_init(%[[ARG:.*]]: !s32i{{.*}}) { // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr, ["cond", init] // CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr #pragma acc init diff --git a/clang/test/CIR/CodeGenOpenACC/kernels.c b/clang/test/CIR/CodeGenOpenACC/kernels.c index d0c6f1134c8d2..9b10b7489e814 100644 --- a/clang/test/CIR/CodeGenOpenACC/kernels.c +++ b/clang/test/CIR/CodeGenOpenACC/kernels.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s void acc_kernels(int cond) { - // CHECK: cir.func @acc_kernels(%[[ARG:.*]]: !s32i{{.*}}) { + // CHECK: cir.func{{.*}} @acc_kernels(%[[ARG:.*]]: !s32i{{.*}}) { // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr, ["cond", init] // CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr #pragma acc kernels @@ -418,7 +418,7 @@ void acc_kernels(int cond) { } void acc_kernels_data_clauses(int *arg1, int *arg2) { - // CHECK: cir.func @acc_kernels_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr{{.*}}) { + // CHECK: cir.func{{.*}} @acc_kernels_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr{{.*}}) { // CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["arg1", init] // CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["arg2", init] // CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr, !cir.ptr> diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp index c0bf11e353951..d8707ba78fb5b 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s extern "C" void acc_loop(int *A, int *B, int *C, int N) { - // CHECK: cir.func @acc_loop(%[[ARG_A:.*]]: !cir.ptr loc{{.*}}, %[[ARG_B:.*]]: !cir.ptr loc{{.*}}, %[[ARG_C:.*]]: !cir.ptr loc{{.*}}, %[[ARG_N:.*]]: !s32i loc{{.*}}) { + // CHECK: cir.func{{.*}} @acc_loop(%[[ARG_A:.*]]: !cir.ptr loc{{.*}}, %[[ARG_B:.*]]: !cir.ptr loc{{.*}}, %[[ARG_C:.*]]: !cir.ptr loc{{.*}}, %[[ARG_N:.*]]: !s32i loc{{.*}}) { // CHECK-NEXT: %[[ALLOCA_A:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["A", init] // CHECK-NEXT: %[[ALLOCA_B:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["B", init] // CHECK-NEXT: %[[ALLOCA_C:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["C", init] diff --git a/clang/test/CIR/CodeGenOpenACC/parallel.c b/clang/test/CIR/CodeGenOpenACC/parallel.c index 0127613233eca..5db174fb6549b 100644 --- a/clang/test/CIR/CodeGenOpenACC/parallel.c +++ b/clang/test/CIR/CodeGenOpenACC/parallel.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s void acc_parallel(int cond) { - // CHECK: cir.func @acc_parallel(%[[ARG:.*]]: !s32i{{.*}}) { + // CHECK: cir.func{{.*}} @acc_parallel(%[[ARG:.*]]: !s32i{{.*}}) { // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr, ["cond", init] // CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr #pragma acc parallel @@ -445,7 +445,7 @@ void acc_parallel(int cond) { } void acc_parallel_data_clauses(int *arg1, int *arg2) { - // CHECK: cir.func @acc_parallel_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr{{.*}}) { + // CHECK: cir.func{{.*}} @acc_parallel_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr{{.*}}) { // CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["arg1", init] // CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["arg2", init] // CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr, !cir.ptr> diff --git a/clang/test/CIR/CodeGenOpenACC/serial.c b/clang/test/CIR/CodeGenOpenACC/serial.c index 1c9695b34833f..9e3359141838f 100644 --- a/clang/test/CIR/CodeGenOpenACC/serial.c +++ b/clang/test/CIR/CodeGenOpenACC/serial.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s void acc_serial(int cond) { - // CHECK: cir.func @acc_serial(%[[ARG:.*]]: !s32i{{.*}}) { + // CHECK: cir.func{{.*}} @acc_serial(%[[ARG:.*]]: !s32i{{.*}}) { // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr, ["cond", init] // CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr #pragma acc serial @@ -268,7 +268,7 @@ void acc_serial(int cond) { } void acc_serial_data_clauses(int *arg1, int *arg2) { - // CHECK: cir.func @acc_serial_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr{{.*}}) { + // CHECK: cir.func{{.*}} @acc_serial_data_clauses(%[[ARG1_PARAM:.*]]: !cir.ptr{{.*}}, %[[ARG2_PARAM:.*]]: !cir.ptr{{.*}}) { // CHECK-NEXT: %[[ARG1:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["arg1", init] // CHECK-NEXT: %[[ARG2:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["arg2", init] // CHECK-NEXT: cir.store %[[ARG1_PARAM]], %[[ARG1]] : !cir.ptr, !cir.ptr> diff --git a/clang/test/CIR/CodeGenOpenACC/set.c b/clang/test/CIR/CodeGenOpenACC/set.c index ced581680c037..0b87f42603776 100644 --- a/clang/test/CIR/CodeGenOpenACC/set.c +++ b/clang/test/CIR/CodeGenOpenACC/set.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s void acc_set(int cond) { - // CHECK: cir.func @acc_set(%[[ARG:.*]]: !s32i{{.*}}) { + // CHECK: cir.func{{.*}} @acc_set(%[[ARG:.*]]: !s32i{{.*}}) { // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr, ["cond", init] // CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr diff --git a/clang/test/CIR/CodeGenOpenACC/shutdown.c b/clang/test/CIR/CodeGenOpenACC/shutdown.c index e8ab6bd75d0e9..52db382df217e 100644 --- a/clang/test/CIR/CodeGenOpenACC/shutdown.c +++ b/clang/test/CIR/CodeGenOpenACC/shutdown.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s void acc_shutdown(int cond) { - // CHECK: cir.func @acc_shutdown(%[[ARG:.*]]: !s32i{{.*}}) { + // CHECK: cir.func{{.*}} @acc_shutdown(%[[ARG:.*]]: !s32i{{.*}}) { // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr, ["cond", init] // CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr #pragma acc shutdown diff --git a/clang/test/CIR/CodeGenOpenACC/wait.c b/clang/test/CIR/CodeGenOpenACC/wait.c index ec2ab6e9446cc..aeda8b955a6d0 100644 --- a/clang/test/CIR/CodeGenOpenACC/wait.c +++ b/clang/test/CIR/CodeGenOpenACC/wait.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s void acc_wait(int cond) { - // CHECK: cir.func @acc_wait(%[[ARG:.*]]: !s32i{{.*}}) { + // CHECK: cir.func{{.*}} @acc_wait(%[[ARG:.*]]: !s32i{{.*}}) { // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr, ["cond", init] // CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr diff --git a/clang/test/CIR/IR/array.cir b/clang/test/CIR/IR/array.cir index 4930fc493c5a7..bba536062d740 100644 --- a/clang/test/CIR/IR/array.cir +++ b/clang/test/CIR/IR/array.cir @@ -33,7 +33,7 @@ cir.func @func() { cir.return } -// CHECK: cir.func @func() { +// CHECK: cir.func{{.*}} @func() { // CHECK: %0 = cir.alloca !cir.array, !cir.ptr>, ["l"] {alignment = 4 : i64} // CHECK: cir.return // CHECK: } @@ -44,7 +44,7 @@ cir.func @func2(%arg0: !cir.ptr) { cir.return } -// CHECK: cir.func @func2(%arg0: !cir.ptr) { +// CHECK: cir.func{{.*}} @func2(%arg0: !cir.ptr) { // CHECK: %0 = cir.alloca !cir.ptr, !cir.ptr>, ["p", init] {alignment = 8 : i64} // CHECK: cir.store %arg0, %0 : !cir.ptr, !cir.ptr> // CHECK: cir.return @@ -56,7 +56,7 @@ cir.func @func3(%arg0: !cir.ptr>) { cir.return } -// CHECK: cir.func @func3(%arg0: !cir.ptr>) { +// CHECK: cir.func{{.*}} @func3(%arg0: !cir.ptr>) { // CHECK: %0 = cir.alloca !cir.ptr>, !cir.ptr>>, ["pp", init] {alignment = 8 : i64} // CHECK: cir.store %arg0, %0 : !cir.ptr>, !cir.ptr>> // CHECK: cir.return diff --git a/clang/test/CIR/IR/binassign.cir b/clang/test/CIR/IR/binassign.cir index 24ed95d3c29c7..a25729635094e 100644 --- a/clang/test/CIR/IR/binassign.cir +++ b/clang/test/CIR/IR/binassign.cir @@ -26,7 +26,7 @@ module { // CHECK: !s8i = !cir.int // CHECK: #true = #cir.bool : !cir.bool // CHECK: module { -// CHECK: cir.func @binary_assign() { +// CHECK: cir.func{{.*}} @binary_assign() { // CHECK: %0 = cir.alloca !cir.bool, !cir.ptr, ["b"] {alignment = 1 : i64} // CHECK: %1 = cir.alloca !s8i, !cir.ptr, ["c"] {alignment = 1 : i64} // CHECK: %2 = cir.alloca !cir.float, !cir.ptr, ["f"] {alignment = 4 : i64} diff --git a/clang/test/CIR/IR/call.cir b/clang/test/CIR/IR/call.cir index 5f0916775479e..9607df7202e0f 100644 --- a/clang/test/CIR/IR/call.cir +++ b/clang/test/CIR/IR/call.cir @@ -4,7 +4,7 @@ module { -cir.func @f1() +cir.func private @f1() cir.func @f2() { cir.call @f1() : () -> () @@ -13,26 +13,26 @@ cir.func @f2() { cir.return } -// CHECK: cir.func @f2() { +// CHECK: cir.func{{.*}} @f2() { // CHECK-NEXT: cir.call @f1() : () -> () // CHECK-NEXT: cir.call @f1() side_effect(pure) : () -> () // CHECK-NEXT: cir.call @f1() side_effect(const) : () -> () // CHECK-NEXT: cir.return // CHECK-NEXT: } -cir.func @f3() -> !s32i +cir.func private @f3() -> !s32i cir.func @f4() -> !s32i { %0 = cir.call @f3() : () -> !s32i cir.return %0 : !s32i } -// CHECK: cir.func @f4() -> !s32i { +// CHECK: cir.func{{.*}} @f4() -> !s32i { // CHECK-NEXT: %[[#x:]] = cir.call @f3() : () -> !s32i // CHECK-NEXT: cir.return %[[#x]] : !s32i // CHECK-NEXT: } -cir.func @f5(!s32i, !s32i) -> !s32i +cir.func private @f5(!s32i, !s32i) -> !s32i cir.func @f6() -> !s32i { %0 = cir.const #cir.int<1> : !s32i %1 = cir.const #cir.int<2> : !s32i @@ -40,7 +40,7 @@ cir.func @f6() -> !s32i { cir.return %2 : !s32i } -// CHECK: cir.func @f6() -> !s32i { +// CHECK: cir.func{{.*}} @f6() -> !s32i { // CHECK-NEXT: %[[#a:]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[#b:]] = cir.const #cir.int<2> : !s32i // CHECK-NEXT: %[[#c:]] = cir.call @f5(%[[#a]], %[[#b]]) : (!s32i, !s32i) -> !s32i @@ -54,7 +54,7 @@ cir.func @f7(%arg0: !cir.ptr !s32i>>) -> !s32i { cir.return %2 : !s32i } -// CHECK: cir.func @f7(%[[ptr:.+]]: !cir.ptr !s32i>>) -> !s32i { +// CHECK: cir.func{{.*}} @f7(%[[ptr:.+]]: !cir.ptr !s32i>>) -> !s32i { // CHECK-NEXT: %[[#a:]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[#b:]] = cir.const #cir.int<2> : !s32i // CHECK-NEXT: %[[#ret:]] = cir.call %[[ptr]](%[[#a]], %[[#b]]) : (!cir.ptr !s32i>>, !s32i, !s32i) -> !s32i diff --git a/clang/test/CIR/IR/cast.cir b/clang/test/CIR/IR/cast.cir index 4881db7fc271f..a335887de7ec7 100644 --- a/clang/test/CIR/IR/cast.cir +++ b/clang/test/CIR/IR/cast.cir @@ -15,9 +15,9 @@ module { } } -// CHECK: cir.func @yolo(%arg0: !s32i) +// CHECK: cir.func{{.*}} @yolo(%arg0: !s32i) // CHECK: %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool // CHECK: %1 = cir.const #cir.int<0> : !s32i -// CHECK: cir.func @bitcast +// CHECK: cir.func{{.*}} @bitcast // CHECK: %0 = cir.cast(bitcast, %arg0 : !cir.ptr), !cir.ptr diff --git a/clang/test/CIR/IR/cmp.cir b/clang/test/CIR/IR/cmp.cir index a049dc51f1401..818527189af01 100644 --- a/clang/test/CIR/IR/cmp.cir +++ b/clang/test/CIR/IR/cmp.cir @@ -36,7 +36,7 @@ module { cir.return } - // CHECK: cir.func @c0(%arg0: !s32i, %arg1: !s32i) { + // CHECK: cir.func{{.*}} @c0(%arg0: !s32i, %arg1: !s32i) { // CHECK-NEXT: %0 = cir.alloca !s32i, !cir.ptr, ["a", init] {alignment = 4 : i64} // CHECK-NEXT: %1 = cir.alloca !s32i, !cir.ptr, ["b", init] {alignment = 4 : i64} // CHECK-NEXT: %2 = cir.alloca !cir.bool, !cir.ptr, ["x", init] {alignment = 1 : i64} @@ -102,7 +102,7 @@ module { cir.return } - // CHECK: cir.func @c0_unsigned(%arg0: !u32i, %arg1: !u32i) { + // CHECK: cir.func{{.*}} @c0_unsigned(%arg0: !u32i, %arg1: !u32i) { // CHECK-NEXT: %0 = cir.alloca !u32i, !cir.ptr, ["a", init] {alignment = 4 : i64} // CHECK-NEXT: %1 = cir.alloca !u32i, !cir.ptr, ["b", init] {alignment = 4 : i64} // CHECK-NEXT: %2 = cir.alloca !cir.bool, !cir.ptr, ["x", init] {alignment = 1 : i64} @@ -168,7 +168,7 @@ module { cir.return } - // CHECK: cir.func @c0_float(%arg0: !cir.float, %arg1: !cir.float) { + // CHECK: cir.func{{.*}} @c0_float(%arg0: !cir.float, %arg1: !cir.float) { // CHECK-NEXT: %0 = cir.alloca !cir.float, !cir.ptr, ["a", init] {alignment = 4 : i64} // CHECK-NEXT: %1 = cir.alloca !cir.float, !cir.ptr, ["b", init] {alignment = 4 : i64} // CHECK-NEXT: %2 = cir.alloca !cir.bool, !cir.ptr, ["x", init] {alignment = 1 : i64} @@ -234,7 +234,7 @@ module { cir.return } - // CHECK: cir.func @pointer_cmp(%arg0: !cir.ptr, %arg1: !cir.ptr) { + // CHECK: cir.func{{.*}} @pointer_cmp(%arg0: !cir.ptr, %arg1: !cir.ptr) { // CHECK-NEXT: %0 = cir.alloca !cir.ptr, !cir.ptr>, ["a", init] {alignment = 8 : i64} // CHECK-NEXT: %1 = cir.alloca !cir.ptr, !cir.ptr>, ["b", init] {alignment = 8 : i64} // CHECK-NEXT: %2 = cir.alloca !cir.bool, !cir.ptr, ["x", init] {alignment = 1 : i64} @@ -312,7 +312,7 @@ module { cir.return } - // CHECK: cir.func @bool_cmp(%arg0: !cir.bool, %arg1: !cir.bool) { + // CHECK: cir.func{{.*}} @bool_cmp(%arg0: !cir.bool, %arg1: !cir.bool) { // CHECK-NEXT: %0 = cir.alloca !cir.bool, !cir.ptr, ["a", init] {alignment = 1 : i64} // CHECK-NEXT: %1 = cir.alloca !cir.bool, !cir.ptr, ["b", init] {alignment = 1 : i64} // CHECK-NEXT: %2 = cir.alloca !cir.bool, !cir.ptr, ["x", init] {alignment = 1 : i64} diff --git a/clang/test/CIR/IR/func.cir b/clang/test/CIR/IR/func.cir index 9ed44611e195d..865ab7ecc39a2 100644 --- a/clang/test/CIR/IR/func.cir +++ b/clang/test/CIR/IR/func.cir @@ -10,7 +10,7 @@ module { cir.func @empty() { cir.return } -// CHECK: cir.func @empty() { +// CHECK: cir.func{{.*}} @empty() { // CHECK: cir.return // CHECK: } @@ -18,7 +18,7 @@ cir.func @empty() { cir.func @voidret() { cir.return } -// CHECK: cir.func @voidret() { +// CHECK: cir.func{{.*}} @voidret() { // CHECK: cir.return // CHECK: } @@ -27,7 +27,7 @@ cir.func @intfunc() -> !s32i { %0 = cir.const #cir.int<42> : !s32i cir.return %0 : !s32i } -// CHECK: cir.func @intfunc() -> !s32i { +// CHECK: cir.func{{.*}} @intfunc() -> !s32i { // CHECK: %[[VAL:.*]] = cir.const #cir.int<42> : !s32i // CHECK: cir.return %[[VAL]] : !s32i // CHECK: } @@ -48,7 +48,7 @@ cir.func @scopes() -> !s32i { } cir.trap } -// CHECK: cir.func @scopes() -> !s32i { +// CHECK: cir.func{{.*}} @scopes() -> !s32i { // CHECK: cir.scope { // CHECK: cir.scope { // CHECK: %[[VAL:.*]] = cir.const #cir.int<99> : !s32i @@ -63,7 +63,7 @@ cir.func @longfunc() -> !s64i { %0 = cir.const #cir.int<42> : !s64i cir.return %0 : !s64i } -// CHECK: cir.func @longfunc() -> !s64i +// CHECK: cir.func{{.*}} @longfunc() -> !s64i // CHECK: %0 = cir.const #cir.int<42> : !s64i // CHECK: cir.return %0 : !s64i // CHECK: } @@ -73,7 +73,7 @@ cir.func @unsignedfunc() -> !u32i { %0 = cir.const #cir.int<42> : !u32i cir.return %0 : !u32i } -// CHECK: cir.func @unsignedfunc() -> !u32i +// CHECK: cir.func{{.*}} @unsignedfunc() -> !u32i // CHECK: %[[VAL:.*]] = cir.const #cir.int<42> : !u32i // CHECK: cir.return %[[VAL]] : !u32i // CHECK: } @@ -83,7 +83,7 @@ cir.func @ullfunc() -> !u64i { %0 = cir.const #cir.int<42> : !u64i cir.return %0 : !u64i } -// CHECK: cir.func @ullfunc() -> !u64i +// CHECK: cir.func{{.*}} @ullfunc() -> !u64i // CHECK: %[[VAL:.*]] = cir.const #cir.int<42> : !u64i // CHECK: cir.return %[[VAL:.*]] : !u64i // CHECK: } diff --git a/clang/test/CIR/IR/invalid-call.cir b/clang/test/CIR/IR/invalid-call.cir index 3ebb771ed72e7..a9c7e38f73af6 100644 --- a/clang/test/CIR/IR/invalid-call.cir +++ b/clang/test/CIR/IR/invalid-call.cir @@ -12,7 +12,7 @@ cir.func @f1() { !u32i = !cir.int -cir.func @f2() +cir.func private @f2() cir.func @f3() { // expected-error @below {{callee returns void but call has results}} %0 = cir.call @f2() : () -> !u32i @@ -23,7 +23,7 @@ cir.func @f3() { !u32i = !cir.int -cir.func @f4() -> !u32i +cir.func private @f4() -> !u32i cir.func @f5() { // expected-error @below {{incorrect number of results for callee}} cir.call @f4() : () -> () @@ -35,7 +35,7 @@ cir.func @f5() { !s32i = !cir.int !u32i = !cir.int -cir.func @f6() -> !u32i +cir.func private @f6() -> !u32i cir.func @f7() { // expected-error @below {{result type mismatch}} %0 = cir.call @f6() : () -> !s32i @@ -47,7 +47,7 @@ cir.func @f7() { !s32i = !cir.int !u32i = !cir.int -cir.func @f8(!s32i, !s32i) +cir.func private @f8(!s32i, !s32i) cir.func @f9() { %0 = cir.const #cir.int<1> : !s32i // expected-error @below {{incorrect number of operands for callee}} @@ -60,7 +60,7 @@ cir.func @f9() { !s32i = !cir.int !u32i = !cir.int -cir.func @f10(!s32i, !s32i) +cir.func private @f10(!s32i, !s32i) cir.func @f11() { %0 = cir.const #cir.int<1> : !s32i %1 = cir.const #cir.int<2> : !u32i @@ -73,7 +73,7 @@ cir.func @f11() { !s32i = !cir.int -cir.func @f12(!s32i, !s32i, ...) +cir.func private @f12(!s32i, !s32i, ...) cir.func @f13() { %0 = cir.const #cir.int<1> : !s32i // expected-error @below {{too few operands for callee}} diff --git a/clang/test/CIR/IR/invalid-complex.cir b/clang/test/CIR/IR/invalid-complex.cir index 8c6d890579321..2414809f7dbca 100644 --- a/clang/test/CIR/IR/invalid-complex.cir +++ b/clang/test/CIR/IR/invalid-complex.cir @@ -21,3 +21,27 @@ module { cir.global external @ci2 = #cir.const_complex<#cir.int<1> : !s32i, #cir.int<2> : !s64i> : !cir.complex } + +// ----- + +module { + cir.func @complex_real_invalid_result_type() -> !cir.double { + %0 = cir.alloca !cir.complex, !cir.ptr>, ["c"] + %2 = cir.load align(8) %0 : !cir.ptr>, !cir.complex + // expected-error @below {{result type does not match operand type}} + %3 = cir.complex.real %2 : !cir.complex -> !cir.float + cir.return + } +} + +// ----- + +module { + cir.func @complex_imag_invalid_result_type() -> !cir.double { + %0 = cir.alloca !cir.complex, !cir.ptr>, ["c"] + %2 = cir.load align(8) %0 : !cir.ptr>, !cir.complex + // expected-error @below {{result type does not match operand type}} + %3 = cir.complex.imag %2 : !cir.complex -> !cir.float + cir.return + } +} diff --git a/clang/test/CIR/IR/ternary.cir b/clang/test/CIR/IR/ternary.cir index 3827dc77726df..e419c7f5af40c 100644 --- a/clang/test/CIR/IR/ternary.cir +++ b/clang/test/CIR/IR/ternary.cir @@ -16,7 +16,7 @@ module { // CHECK: module { -// CHECK: cir.func @blue(%arg0: !cir.bool) -> !u32i { +// CHECK: cir.func{{.*}} @blue(%arg0: !cir.bool) -> !u32i { // CHECK: %0 = cir.ternary(%arg0, true { // CHECK: %1 = cir.const #cir.int<0> : !u32i // CHECK: cir.yield %1 : !u32i diff --git a/clang/test/CIR/IR/unary.cir b/clang/test/CIR/IR/unary.cir index f01121adc106e..ba3bc20d574f5 100644 --- a/clang/test/CIR/IR/unary.cir +++ b/clang/test/CIR/IR/unary.cir @@ -16,7 +16,7 @@ module { %6 = cir.unary(dec, %1) : !u32i, !u32i cir.return } -// CHECK: cir.func @test_unary_unsigned() { +// CHECK: cir.func{{.*}} @test_unary_unsigned() { // CHECK: %0 = cir.alloca !u32i, !cir.ptr, ["a"] {alignment = 4 : i64} // CHECK: %1 = cir.load %0 : !cir.ptr, !u32i // CHECK: %2 = cir.unary(plus, %1) : !u32i, !u32i @@ -37,7 +37,7 @@ module { %6 = cir.unary(dec, %1) nsw : !s32i, !s32i cir.return } -// CHECK: cir.func @test_unary_signed() { +// CHECK: cir.func{{.*}} @test_unary_signed() { // CHECK: %0 = cir.alloca !s32i, !cir.ptr, ["a"] {alignment = 4 : i64} // CHECK: %1 = cir.load %0 : !cir.ptr, !s32i // CHECK: %2 = cir.unary(plus, %1) : !s32i, !s32i diff --git a/clang/test/CIR/IR/vector.cir b/clang/test/CIR/IR/vector.cir index f23f5de9692de..6d8e5beffd63f 100644 --- a/clang/test/CIR/IR/vector.cir +++ b/clang/test/CIR/IR/vector.cir @@ -26,7 +26,7 @@ cir.func @vec_int_test() { cir.return } -// CHECK: cir.func @vec_int_test() { +// CHECK: cir.func{{.*}} @vec_int_test() { // CHECK: %0 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["a"] // CHECK: %1 = cir.alloca !cir.vector<3 x !s32i>, !cir.ptr>, ["b"] // CHECK: %2 = cir.alloca !cir.vector<2 x !s32i>, !cir.ptr>, ["c"] @@ -38,7 +38,7 @@ cir.func @vec_double_test() { cir.return } -// CHECK: cir.func @vec_double_test() { +// CHECK: cir.func{{.*}} @vec_double_test() { // CHECK: %0 = cir.alloca !cir.vector<2 x !cir.double>, !cir.ptr>, ["a"] // CHECK: cir.return // CHECK: } @@ -54,7 +54,7 @@ cir.func @local_vector_create_test() { cir.return } -// CHECK: cir.func @local_vector_create_test() { +// CHECK: cir.func{{.*}} @local_vector_create_test() { // CHECK: %0 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["a", init] // CHECK: %1 = cir.const #cir.int<1> : !s32i // CHECK: %2 = cir.const #cir.int<2> : !s32i @@ -81,7 +81,7 @@ cir.func @vector_extract_element_test() { cir.return } -// CHECK: cir.func @vector_extract_element_test() { +// CHECK: cir.func{{.*}} @vector_extract_element_test() { // CHECK: %0 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["d", init] // CHECK: %1 = cir.alloca !s32i, !cir.ptr, ["e", init] // CHECK: %2 = cir.const #cir.int<1> : !s32i @@ -116,7 +116,7 @@ cir.func @vector_insert_element_test() { cir.return } -// CHECK: cir.func @vector_insert_element_test() { +// CHECK: cir.func{{.*}} @vector_insert_element_test() { // CHECK: %0 = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["a", init] // CHECK: %1 = cir.const #cir.int<1> : !s32i // CHECK: %2 = cir.const #cir.int<2> : !s32i @@ -150,7 +150,7 @@ cir.func @vector_compare_test() { cir.return } -// CHECK: cir.func @vector_compare_test() { +// CHECK: cir.func{{.*}} @vector_compare_test() { // CHECK: %[[VEC_A:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["a"] // CHECK: %[[VEC_B:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["b"] // CHECK: %[[INIT:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["o", init] @@ -176,7 +176,7 @@ cir.func @vector_shuffle_dynamic_test() { cir.return } -// CHECK: cir.func @vector_shuffle_dynamic_test() { +// CHECK: cir.func{{.*}} @vector_shuffle_dynamic_test() { // CHECK: %[[VEC_A:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["a"] // CHECK: %[[VEC_B:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["b"] // CHECK: %[[RES:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["r", init] @@ -204,7 +204,7 @@ cir.func @vector_splat_test() { cir.return } -// CHECK: cir.func @vector_splat_test() { +// CHECK: cir.func{{.*}} @vector_splat_test() { // CHECK-NEXT: %[[VEC:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["a", init] // CHECK-NEXT: %[[SHL_RES:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["shl", init] // CHECK-NEXT: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i diff --git a/clang/test/CIR/Lowering/array.cpp b/clang/test/CIR/Lowering/array.cpp index 9bd3464dc3145..438d41e2c2c2f 100644 --- a/clang/test/CIR/Lowering/array.cpp +++ b/clang/test/CIR/Lowering/array.cpp @@ -41,7 +41,7 @@ void func() { int e = arr[0]; int e2 = arr[1]; } -// CHECK: define void @_Z4funcv() +// CHECK: define{{.*}} void @_Z4funcv() // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = alloca [10 x i32], i64 1, align 16 // CHECK-NEXT: %[[INIT:.*]] = alloca i32, i64 1, align 4 // CHECK-NEXT: %[[INIT_2:.*]] = alloca i32, i64 1, align 4 @@ -57,7 +57,7 @@ void func() { void func2() { int arr[2] = {5}; } -// CHECK: define void @_Z5func2v() +// CHECK: define{{.*}} void @_Z5func2v() // CHECK: %[[ARR_ALLOCA:.*]] = alloca [2 x i32], i64 1, align 4 // CHECK: %[[TMP:.*]] = alloca ptr, i64 1, align 8 // CHECK: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR_ALLOCA]], i32 0 @@ -72,7 +72,7 @@ void func2() { void func3() { int arr3[2] = {5, 6}; } -// CHECK: define void @_Z5func3v() +// CHECK: define{{.*}} void @_Z5func3v() // CHECK: %[[ARR_ALLOCA:.*]] = alloca [2 x i32], i64 1, align 4 // CHECK: %[[ARR_PTR:.*]] = getelementptr i32, ptr %[[ARR_ALLOCA]], i32 0 // CHECK: store i32 5, ptr %[[ARR_PTR]], align 4 @@ -83,7 +83,7 @@ void func4() { int arr[2][1] = {{5}, {6}}; int e = arr[1][0]; } -// CHECK: define void @_Z5func4v() +// CHECK: define{{.*}} void @_Z5func4v() // CHECK: %[[ARR_ALLOCA:.*]] = alloca [2 x [1 x i32]], i64 1, align 4 // CHECK: %[[INIT:.*]] = alloca i32, i64 1, align 4 // CHECK: %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR_ALLOCA]], i32 0 @@ -102,7 +102,7 @@ void func4() { void func5() { int arr[2][1] = {{5}}; } -// CHECK: define void @_Z5func5v() +// CHECK: define{{.*}} void @_Z5func5v() // CHECK: %[[ARR_ALLOCA:.*]] = alloca [2 x [1 x i32]], i64 1, align 4 // CHECK: %[[TMP:.*]] = alloca ptr, i64 1, align 8 // CHECK: %[[ARR_PTR:.*]] = getelementptr [1 x i32], ptr %[[ARR_ALLOCA]], i32 0 @@ -119,7 +119,7 @@ void func6() { int x = 4; int arr[2] = { x, 5 }; } -// CHECK: define void @_Z5func6v() +// CHECK: define{{.*}} void @_Z5func6v() // CHECK: %[[VAR:.*]] = alloca i32, i64 1, align 4 // CHECK: %[[ARR:.*]] = alloca [2 x i32], i64 1, align 4 // CHECK: store i32 4, ptr %[[VAR]], align 4 @@ -132,7 +132,7 @@ void func6() { void func7() { int* arr[1] = {}; } -// CHECK: define void @_Z5func7v() +// CHECK: define{{.*}} void @_Z5func7v() // CHECK: %[[ARR:.*]] = alloca [1 x ptr], i64 1, align 8 // CHECK: %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8 // CHECK: %[[ELE_PTR:.*]] = getelementptr ptr, ptr %[[ARR]], i32 0 @@ -143,9 +143,9 @@ void func7() { // CHECK: store ptr %[[ELE]], ptr %[[ALLOCA]], align 8 void func8(int p[10]) {} -// CHECK: define void @_Z5func8Pi(ptr {{%.*}}) +// CHECK: define{{.*}} void @_Z5func8Pi(ptr {{%.*}}) // CHECK-NEXT: alloca ptr, i64 1, align 8 void func9(int pp[10][5]) {} -// CHECK: define void @_Z5func9PA5_i(ptr {{%.*}}) +// CHECK: define{{.*}} void @_Z5func9PA5_i(ptr {{%.*}}) // CHECK-NEXT: alloca ptr, i64 1, align 8 diff --git a/clang/test/CIR/Transforms/canonicalize.cir b/clang/test/CIR/Transforms/canonicalize.cir index 164d231db7bb4..7ba163eb30bb1 100644 --- a/clang/test/CIR/Transforms/canonicalize.cir +++ b/clang/test/CIR/Transforms/canonicalize.cir @@ -15,7 +15,7 @@ module { ^bb2: // pred: ^bb1 cir.return } - // CHECK: cir.func @redundant_br() { + // CHECK: cir.func{{.*}} @redundant_br() { // CHECK-NEXT: %[[A:.*]] = cir.alloca !u32i, !cir.ptr, ["a", init] {alignment = 4 : i64} // CHECK-NEXT: %[[FOUR:.*]] = cir.const #cir.int<4> : !u32i // CHECK-NEXT: cir.store %[[FOUR]], %[[A]] : !u32i, !cir.ptr @@ -27,7 +27,7 @@ module { } cir.return } - // CHECK: cir.func @empty_scope() { + // CHECK: cir.func{{.*}} @empty_scope() { // CHECK-NEXT: cir.return // CHECK-NEXT: } @@ -36,7 +36,7 @@ module { %1 = cir.unary(not, %0) : !cir.bool, !cir.bool cir.return %1 : !cir.bool } - // CHECK: cir.func @unary_not(%arg0: !cir.bool) -> !cir.bool + // CHECK: cir.func{{.*}} @unary_not(%arg0: !cir.bool) -> !cir.bool // CHECK-NEXT: cir.return %arg0 : !cir.bool cir.func @cast1(%arg0: !cir.bool) -> !cir.bool { @@ -44,7 +44,7 @@ module { %1 = cir.cast(int_to_bool, %0 : !s32i), !cir.bool cir.return %1 : !cir.bool } - // CHECK: cir.func @cast1(%[[ARG0:.*]]: !cir.bool) -> !cir.bool + // CHECK: cir.func{{.*}} @cast1(%[[ARG0:.*]]: !cir.bool) -> !cir.bool // CHECK-NEXT: cir.return %[[ARG0]] : !cir.bool cir.func @cast2(%arg0: !s32i) -> !cir.bool { @@ -54,7 +54,7 @@ module { %3 = cir.cast(int_to_bool, %2 : !s64i), !cir.bool cir.return %3 : !cir.bool } - // CHECK: cir.func @cast2(%[[ARG0:.*]]: !s32i) -> !cir.bool + // CHECK: cir.func{{.*}} @cast2(%[[ARG0:.*]]: !s32i) -> !cir.bool // CHECK-NEXT: %[[CAST:.*]] = cir.cast(int_to_bool, %[[ARG0]] : !s32i), !cir.bool // CHECK-NEXT: cir.return %[[CAST]] : !cir.bool @@ -64,7 +64,7 @@ module { %2 = cir.cast(integral, %1 : !s32i), !s64i cir.return %2 : !s64i } - // CHECK: cir.func @no_fold_cast(%[[ARG0:.*]]: !s32i) -> !s64i + // CHECK: cir.func{{.*}} @no_fold_cast(%[[ARG0:.*]]: !s32i) -> !s64i // CHECK-NEXT: %[[CAST:.*]] = cir.cast(int_to_bool, %[[ARG0]] : !s32i), !cir.bool // CHECK-NEXT: %[[CAST2:.*]] = cir.cast(bool_to_int, %[[CAST]] : !cir.bool), !s32i // CHECK-NEXT: %[[CAST3:.*]] = cir.cast(integral, %[[CAST2]] : !s32i), !s64i diff --git a/clang/test/CIR/Transforms/complex-create-fold.cir b/clang/test/CIR/Transforms/complex-create-fold.cir index 5d9d22112c8b7..370acaecc2222 100644 --- a/clang/test/CIR/Transforms/complex-create-fold.cir +++ b/clang/test/CIR/Transforms/complex-create-fold.cir @@ -16,7 +16,7 @@ module { cir.return %6 : !cir.complex } -// CHECK: cir.func @fold_complex_create_test() -> !cir.complex { +// CHECK: cir.func{{.*}} @fold_complex_create_test() -> !cir.complex { // CHECK: %[[RET:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["__retval"] // CHECK: %[[INIT:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["c", init] // CHECK: %[[COMPLEX:.*]] = cir.const #cir.const_complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex diff --git a/clang/test/CIR/Transforms/complex-imag-fold.cir b/clang/test/CIR/Transforms/complex-imag-fold.cir new file mode 100644 index 0000000000000..0d9a4e43142a3 --- /dev/null +++ b/clang/test/CIR/Transforms/complex-imag-fold.cir @@ -0,0 +1,23 @@ +// RUN: cir-opt %s -cir-canonicalize -o - | FileCheck %s + +!s32i = !cir.int + +module { + cir.func @fold_complex_imag_test() -> !s32i { + %0 = cir.alloca !s32i, !cir.ptr, ["__retval"] + %2 = cir.const #cir.const_complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex + %4 = cir.complex.imag %2 : !cir.complex -> !s32i + cir.store %4, %0 : !s32i, !cir.ptr + %5 = cir.load %0 : !cir.ptr, !s32i + cir.return %5 : !s32i + } + + // CHECK: cir.func @fold_complex_imag_test() -> !s32i { + // CHECK: %[[RET:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] + // CHECK: %[[IMAG:.*]] = cir.const #cir.int<2> : !s32i + // CHECK: cir.store %[[IMAG]], %[[RET]] : !s32i, !cir.ptr + // CHECK: %[[TMP:.]] = cir.load %[[RET]] : !cir.ptr, !s32i + // CHECK: cir.return %[[TMP]] : !s32i + // CHECK: } + +} diff --git a/clang/test/CIR/Transforms/complex-real-fold.cir b/clang/test/CIR/Transforms/complex-real-fold.cir new file mode 100644 index 0000000000000..1cab9be616af0 --- /dev/null +++ b/clang/test/CIR/Transforms/complex-real-fold.cir @@ -0,0 +1,23 @@ +// RUN: cir-opt %s -cir-canonicalize -o - | FileCheck %s + +!s32i = !cir.int + +module { + cir.func @fold_complex_real_test() -> !s32i { + %0 = cir.alloca !s32i, !cir.ptr, ["__retval"] + %2 = cir.const #cir.const_complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex + %4 = cir.complex.real %2 : !cir.complex -> !s32i + cir.store %4, %0 : !s32i, !cir.ptr + %5 = cir.load %0 : !cir.ptr, !s32i + cir.return %5 : !s32i + } + + // CHECK: cir.func @fold_complex_real_test() -> !s32i { + // CHECK: %[[RET:.*]] = cir.alloca !s32i, !cir.ptr, ["__retval"] + // CHECK: %[[REAL:.*]] = cir.const #cir.int<1> : !s32i + // CHECK: cir.store %[[REAL]], %[[RET]] : !s32i, !cir.ptr + // CHECK: %[[TMP:.]] = cir.load %[[RET]] : !cir.ptr, !s32i + // CHECK: cir.return %[[TMP]] : !s32i + // CHECK: } + +} diff --git a/clang/test/CIR/Transforms/hoist-allocas.cir b/clang/test/CIR/Transforms/hoist-allocas.cir index df7b9f48be9dc..04724f3073e57 100644 --- a/clang/test/CIR/Transforms/hoist-allocas.cir +++ b/clang/test/CIR/Transforms/hoist-allocas.cir @@ -20,7 +20,7 @@ module { } cir.return } - // CHECK: cir.func @l1 + // CHECK: cir.func{{.*}} @l1 // CHECK-NEXT: %[[I:.*]] = cir.alloca !s32i, !cir.ptr, ["i", init] {alignment = 4 : i64} // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i @@ -55,7 +55,7 @@ module { } cir.return } - // CHECK: cir.func @l2 + // CHECK: cir.func{{.*}} @l2 // CHECK-NEXT: %[[I:.*]] = cir.alloca !s32i, !cir.ptr, ["i", init] {alignment = 4 : i64} // CHECK-NEXT: cir.scope { // CHECK-NEXT: cir.for : cond { @@ -92,7 +92,7 @@ module { } cir.return } - // CHECK: cir.func @l3 + // CHECK: cir.func{{.*}} @l3 // CHECK-NEXT: %[[I:.*]] = cir.alloca !s32i, !cir.ptr, ["i", init] {alignment = 4 : i64} // CHECK-NEXT: cir.scope { // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/Transforms/if.cir b/clang/test/CIR/Transforms/if.cir index 03848bf8d0633..3f817c793643f 100644 --- a/clang/test/CIR/Transforms/if.cir +++ b/clang/test/CIR/Transforms/if.cir @@ -14,7 +14,7 @@ module { } cir.return %arg0 : !s32i } -// CHECK: cir.func @foo(%arg0: !s32i) -> !s32i { +// CHECK: cir.func{{.*}} @foo(%arg0: !s32i) -> !s32i { // CHECK-NEXT: %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool // CHECK-NEXT: cir.brcond %0 ^bb1, ^bb2 // CHECK-NEXT: ^bb1: // pred: ^bb0 @@ -35,7 +35,7 @@ module { } cir.return %arg0 : !s32i } -// CHECK: cir.func @onlyIf(%arg0: !s32i) -> !s32i { +// CHECK: cir.func{{.*}} @onlyIf(%arg0: !s32i) -> !s32i { // CHECK-NEXT: %0 = cir.cast(int_to_bool, %arg0 : !s32i), !cir.bool // CHECK-NEXT: cir.brcond %0 ^bb1, ^bb2 // CHECK-NEXT: ^bb1: // pred: ^bb0 diff --git a/clang/test/CIR/Transforms/loop.cir b/clang/test/CIR/Transforms/loop.cir index d02412d049158..9c76092bb8e76 100644 --- a/clang/test/CIR/Transforms/loop.cir +++ b/clang/test/CIR/Transforms/loop.cir @@ -16,7 +16,7 @@ module { } } -// CHECK: cir.func @testFor(%arg0: !cir.bool) { +// CHECK: cir.func{{.*}} @testFor(%arg0: !cir.bool) { // CHECK: cir.br ^bb[[#COND:]] // CHECK: ^bb[[#COND]]: // CHECK: cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]] @@ -38,7 +38,7 @@ module { cir.return } -// CHECK: cir.func @testWhile(%arg0: !cir.bool) { +// CHECK: cir.func{{.*}} @testWhile(%arg0: !cir.bool) { // CHECK: cir.br ^bb[[#COND:]] // CHECK: ^bb[[#COND]]: // CHECK: cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]] @@ -59,7 +59,7 @@ module { cir.return } -// CHECK: cir.func @testDoWhile(%arg0: !cir.bool) { +// CHECK: cir.func{{.*}} @testDoWhile(%arg0: !cir.bool) { // CHECK: cir.br ^bb[[#BODY:]] // CHECK: ^bb[[#COND]]: // CHECK: cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]] diff --git a/clang/test/CIR/Transforms/scope.cir b/clang/test/CIR/Transforms/scope.cir index 2d14784c33f87..757428cd019ac 100644 --- a/clang/test/CIR/Transforms/scope.cir +++ b/clang/test/CIR/Transforms/scope.cir @@ -11,7 +11,7 @@ module { } cir.return } -// CHECK: cir.func @foo() { +// CHECK: cir.func{{.*}} @foo() { // CHECK: cir.br ^bb1 // CHECK: ^bb1: // pred: ^bb0 // CHECK: %0 = cir.alloca !u32i, !cir.ptr, ["a", init] {alignment = 4 : i64} @@ -28,7 +28,7 @@ module { } cir.return } -// CHECK: cir.func @empty_scope() { +// CHECK: cir.func{{.*}} @empty_scope() { // CHECK: cir.return // CHECK: } @@ -44,7 +44,7 @@ module { cir.return %1 : !u32i } -// CHECK: cir.func @scope_with_return() -> !u32i { +// CHECK: cir.func{{.*}} @scope_with_return() -> !u32i { // CHECK: %0 = cir.alloca !u32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CHECK: cir.br ^bb1 // CHECK: ^bb1: // pred: ^bb0 diff --git a/clang/test/CIR/Transforms/select.cir b/clang/test/CIR/Transforms/select.cir index 29a5d1ed1ddeb..0ad5c43178831 100644 --- a/clang/test/CIR/Transforms/select.cir +++ b/clang/test/CIR/Transforms/select.cir @@ -10,7 +10,7 @@ module { cir.return %1 : !s32i } - // CHECK: cir.func @fold_true(%[[ARG0:.+]]: !s32i, %[[ARG1:.+]]: !s32i) -> !s32i { + // CHECK: cir.func{{.*}} @fold_true(%[[ARG0:.+]]: !s32i, %[[ARG1:.+]]: !s32i) -> !s32i { // CHECK-NEXT: cir.return %[[ARG0]] : !s32i // CHECK-NEXT: } @@ -20,7 +20,7 @@ module { cir.return %1 : !s32i } - // CHECK: cir.func @fold_false(%[[ARG0:.+]]: !s32i, %[[ARG1:.+]]: !s32i) -> !s32i { + // CHECK: cir.func{{.*}} @fold_false(%[[ARG0:.+]]: !s32i, %[[ARG1:.+]]: !s32i) -> !s32i { // CHECK-NEXT: cir.return %[[ARG1]] : !s32i // CHECK-NEXT: } @@ -30,7 +30,7 @@ module { cir.return %1 : !s32i } - // CHECK: cir.func @fold_to_const(%{{.+}}: !cir.bool) -> !s32i { + // CHECK: cir.func{{.*}} @fold_to_const(%{{.+}}: !cir.bool) -> !s32i { // CHECK-NEXT: %[[#A:]] = cir.const #cir.int<42> : !s32i // CHECK-NEXT: cir.return %[[#A]] : !s32i // CHECK-NEXT: } @@ -42,7 +42,7 @@ module { cir.return %2 : !cir.bool } - // CHECK: cir.func @simplify_1(%[[ARG0:.+]]: !cir.bool) -> !cir.bool { + // CHECK: cir.func{{.*}} @simplify_1(%[[ARG0:.+]]: !cir.bool) -> !cir.bool { // CHECK-NEXT: cir.return %[[ARG0]] : !cir.bool // CHECK-NEXT: } @@ -53,7 +53,7 @@ module { cir.return %2 : !cir.bool } - // CHECK: cir.func @simplify_2(%[[ARG0:.+]]: !cir.bool) -> !cir.bool { + // CHECK: cir.func{{.*}} @simplify_2(%[[ARG0:.+]]: !cir.bool) -> !cir.bool { // CHECK-NEXT: %[[#A:]] = cir.unary(not, %[[ARG0]]) : !cir.bool, !cir.bool // CHECK-NEXT: cir.return %[[#A]] : !cir.bool // CHECK-NEXT: } diff --git a/clang/test/CIR/Transforms/switch.cir b/clang/test/CIR/Transforms/switch.cir index 00b462a6075c9..a000d6b70fbcc 100644 --- a/clang/test/CIR/Transforms/switch.cir +++ b/clang/test/CIR/Transforms/switch.cir @@ -17,7 +17,7 @@ module { } cir.return } -// CHECK: cir.func @shouldFlatSwitchWithDefault(%arg0: !s8i) { +// CHECK: cir.func{{.*}} @shouldFlatSwitchWithDefault(%arg0: !s8i) { // CHECK: cir.switch.flat %arg0 : !s8i, ^bb[[#DEFAULT:]] [ // CHECK: 1: ^bb[[#CASE1:]] // CHECK: ] @@ -38,7 +38,7 @@ module { } cir.return } -// CHECK: cir.func @shouldFlatSwitchWithoutDefault(%arg0: !s32i) { +// CHECK: cir.func{{.*}} @shouldFlatSwitchWithoutDefault(%arg0: !s32i) { // CHECK: cir.switch.flat %arg0 : !s32i, ^bb[[#EXIT:]] [ // CHECK: 1: ^bb[[#CASE1:]] // CHECK: ] @@ -58,7 +58,7 @@ module { } cir.return } -// CHECK: cir.func @shouldFlatSwitchWithImplicitFallthrough(%arg0: !s64i) { +// CHECK: cir.func{{.*}} @shouldFlatSwitchWithImplicitFallthrough(%arg0: !s64i) { // CHECK: cir.switch.flat %arg0 : !s64i, ^bb[[#EXIT:]] [ // CHECK: 1: ^bb[[#CASE1N2:]], // CHECK: 2: ^bb[[#CASE1N2]] @@ -83,7 +83,7 @@ module { } cir.return } -// CHECK: cir.func @shouldFlatSwitchWithExplicitFallthrough(%arg0: !s64i) { +// CHECK: cir.func{{.*}} @shouldFlatSwitchWithExplicitFallthrough(%arg0: !s64i) { // CHECK: cir.switch.flat %arg0 : !s64i, ^bb[[#EXIT:]] [ // CHECK: 1: ^bb[[#CASE1:]], // CHECK: 2: ^bb[[#CASE2:]] @@ -105,7 +105,7 @@ module { } cir.return } -// CHECK: cir.func @shouldFlatSwitchWithFallthroughToExit(%arg0: !s64i) { +// CHECK: cir.func{{.*}} @shouldFlatSwitchWithFallthroughToExit(%arg0: !s64i) { // CHECK: cir.switch.flat %arg0 : !s64i, ^bb[[#EXIT:]] [ // CHECK: 1: ^bb[[#CASE1:]] // CHECK: ] @@ -122,7 +122,7 @@ module { // CHECK-NOT: llvm.switch cir.return } -// CHECK: cir.func @shouldDropEmptySwitch(%arg0: !s64i) +// CHECK: cir.func{{.*}} @shouldDropEmptySwitch(%arg0: !s64i) // CHECK-NOT: cir.switch.flat @@ -143,7 +143,7 @@ module { cir.return } -// CHECK: cir.func @shouldFlatMultiBlockCase(%arg0: !s32i) { +// CHECK: cir.func{{.*}} @shouldFlatMultiBlockCase(%arg0: !s32i) { // CHECK: %0 = cir.alloca !s32i, !cir.ptr, ["a", init] {alignment = 4 : i64} // CHECK: cir.store %arg0, %0 : !s32i, !cir.ptr // CHECK: cir.br ^bb1 @@ -189,7 +189,7 @@ module { %4 = cir.load %2 : !cir.ptr, !s32i cir.return %4 : !s32i } -// CHECK: cir.func @shouldFlatNestedBreak(%arg0: !s32i, %arg1: !s32i) -> !s32i { +// CHECK: cir.func{{.*}} @shouldFlatNestedBreak(%arg0: !s32i, %arg1: !s32i) -> !s32i { // CHECK: cir.switch.flat %[[COND:.*]] : !s32i, ^bb[[#DEFAULT_BB:]] [ // CHECK: 0: ^bb[[#BB1:]] // CHECK: ] @@ -243,7 +243,7 @@ module { %5 = cir.load %1 : !cir.ptr, !s32i cir.return %5 : !s32i } -// CHECK: cir.func @flatCaseRange(%arg0: !s32i) -> !s32i { +// CHECK: cir.func{{.*}} @flatCaseRange(%arg0: !s32i) -> !s32i { // CHECK: cir.switch.flat %[[X:[0-9]+]] : !s32i, ^[[JUDGE_RANGE:bb[0-9]+]] [ // CHECK-NEXT: -100: ^[[CASE_EQUAL:bb[0-9]+]] // CHECK-NEXT: ] @@ -293,7 +293,7 @@ module { cir.return } -// CHECK: cir.func @_Z8bigRangei(%arg0: !s32i) { +// CHECK: cir.func{{.*}} @_Z8bigRangei(%arg0: !s32i) { // CHECK: cir.switch.flat %[[COND:.*]] : !s32i, ^bb[[#RANGE_BR:]] [ // CHECK: ] // CHECK: ^bb[[#NO_PRED_BB:]]: // no predecessors diff --git a/clang/test/CIR/Transforms/ternary-fold.cir b/clang/test/CIR/Transforms/ternary-fold.cir index 1192a0ce29424..718906f5c6ee5 100644 --- a/clang/test/CIR/Transforms/ternary-fold.cir +++ b/clang/test/CIR/Transforms/ternary-fold.cir @@ -14,7 +14,7 @@ module { cir.return %1 : !s32i } - // CHECK: cir.func @fold_ternary(%{{.+}}: !s32i, %[[ARG:.+]]: !s32i) -> !s32i { + // CHECK: cir.func{{.*}} @fold_ternary(%{{.+}}: !s32i, %[[ARG:.+]]: !s32i) -> !s32i { // CHECK-NEXT: cir.return %[[ARG]] : !s32i // CHECK-NEXT: } @@ -28,7 +28,7 @@ module { cir.return %0 : !s32i } - // CHECK: cir.func @simplify_ternary(%[[ARG0:.+]]: !cir.bool, %[[ARG1:.+]]: !s32i) -> !s32i { + // CHECK: cir.func{{.*}} @simplify_ternary(%[[ARG0:.+]]: !cir.bool, %[[ARG1:.+]]: !s32i) -> !s32i { // CHECK-NEXT: %[[#A:]] = cir.const #cir.int<42> : !s32i // CHECK-NEXT: %[[#B:]] = cir.select if %[[ARG0]] then %[[#A]] else %[[ARG1]] : (!cir.bool, !s32i, !s32i) -> !s32i // CHECK-NEXT: cir.return %[[#B]] : !s32i @@ -44,7 +44,7 @@ module { cir.return %0 : !s32i } - // CHECK: cir.func @simplify_ternary_false_const(%[[ARG0:.+]]: !cir.bool, %[[ARG1:.+]]: !s32i) -> !s32i { + // CHECK: cir.func{{.*}} @simplify_ternary_false_const(%[[ARG0:.+]]: !cir.bool, %[[ARG1:.+]]: !s32i) -> !s32i { // CHECK-NEXT: %[[#A:]] = cir.const #cir.int<24> : !s32i // CHECK-NEXT: %[[#B:]] = cir.select if %[[ARG0]] then %[[ARG1]] else %[[#A]] : (!cir.bool, !s32i, !s32i) -> !s32i // CHECK-NEXT: cir.return %[[#B]] : !s32i @@ -62,7 +62,7 @@ module { cir.return %1 : !s32i } - // CHECK: cir.func @non_simplifiable_ternary(%[[ARG0:.+]]: !cir.bool) -> !s32i { + // CHECK: cir.func{{.*}} @non_simplifiable_ternary(%[[ARG0:.+]]: !cir.bool) -> !s32i { // CHECK-NEXT: %[[#A:]] = cir.alloca !s32i, !cir.ptr, ["a", init] // CHECK-NEXT: %[[#B:]] = cir.ternary(%[[ARG0]], true { // CHECK-NEXT: %[[#C:]] = cir.const #cir.int<42> : !s32i diff --git a/clang/test/CIR/Transforms/ternary.cir b/clang/test/CIR/Transforms/ternary.cir index 67ef7f95a6b52..fffafa9ff8e4c 100644 --- a/clang/test/CIR/Transforms/ternary.cir +++ b/clang/test/CIR/Transforms/ternary.cir @@ -22,7 +22,7 @@ module { cir.return %6 : !s32i } -// CHECK: cir.func @foo(%arg0: !s32i) -> !s32i { +// CHECK: cir.func{{.*}} @foo(%arg0: !s32i) -> !s32i { // CHECK: %0 = cir.alloca !s32i, !cir.ptr, ["y", init] {alignment = 4 : i64} // CHECK: %1 = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CHECK: cir.store %arg0, %0 : !s32i, !cir.ptr @@ -53,7 +53,7 @@ module { cir.return } -// CHECK: cir.func @foo2(%arg0: !cir.bool) { +// CHECK: cir.func{{.*}} @foo2(%arg0: !cir.bool) { // CHECK: cir.brcond %arg0 ^bb1, ^bb2 // CHECK: ^bb1: // pred: ^bb0 // CHECK: cir.br ^bb3 diff --git a/clang/test/CIR/Transforms/vector-cmp-fold.cir b/clang/test/CIR/Transforms/vector-cmp-fold.cir index b207fc08748e2..f3486bd26fe1b 100644 --- a/clang/test/CIR/Transforms/vector-cmp-fold.cir +++ b/clang/test/CIR/Transforms/vector-cmp-fold.cir @@ -10,7 +10,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -28,7 +28,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i, // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -46,7 +46,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i, // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -64,7 +64,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i, // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -82,7 +82,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -100,7 +100,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -120,7 +120,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -140,7 +140,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i, // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -160,7 +160,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i, // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -180,7 +180,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i, // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -200,7 +200,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -220,7 +220,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_cmp_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> diff --git a/clang/test/CIR/Transforms/vector-create-fold.cir b/clang/test/CIR/Transforms/vector-create-fold.cir index fb8f66dc4debc..fb8c39e4dda40 100644 --- a/clang/test/CIR/Transforms/vector-create-fold.cir +++ b/clang/test/CIR/Transforms/vector-create-fold.cir @@ -12,7 +12,7 @@ module { cir.return %vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_create_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_create_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[VEC:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, // CHECK-SAME: #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[VEC]] : !cir.vector<4 x !s32i> diff --git a/clang/test/CIR/Transforms/vector-shuffle-dynamic-fold.cir b/clang/test/CIR/Transforms/vector-shuffle-dynamic-fold.cir index 46ab04502afec..6584df3d7050b 100644 --- a/clang/test/CIR/Transforms/vector-shuffle-dynamic-fold.cir +++ b/clang/test/CIR/Transforms/vector-shuffle-dynamic-fold.cir @@ -11,7 +11,7 @@ module { } // Masking indices [8, 7, 6, 5] AND 3 = [0, 3, 2, 1] - // CHECK: cir.func @fold_shuffle_dynamic_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_shuffle_dynamic_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[NEW_VEC:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[NEW_VEC:.*]] : !cir.vector<4 x !s32i> @@ -23,7 +23,7 @@ module { } // Masking indices [3, 2, 1, 0] AND 3 = [3, 2, 1, 0] - // CHECK: cir.func @fold_shuffle_dynamic_vector_op_test_2() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_shuffle_dynamic_vector_op_test_2() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[NEW_VEC:.*]] = cir.const #cir.const_vector<[#cir.int<4> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[NEW_VEC:.*]] : !cir.vector<4 x !s32i> } diff --git a/clang/test/CIR/Transforms/vector-shuffle-fold.cir b/clang/test/CIR/Transforms/vector-shuffle-fold.cir index 87d409728989b..7aaddc051a75b 100644 --- a/clang/test/CIR/Transforms/vector-shuffle-fold.cir +++ b/clang/test/CIR/Transforms/vector-shuffle-fold.cir @@ -12,7 +12,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_shuffle_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_shuffle_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, // CHECK-SAME: #cir.int<4> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> @@ -32,7 +32,7 @@ module { cir.return %new_vec : !cir.vector<6 x !s32i> } - // CHECK: cir.func @fold_shuffle_vector_op_test() -> !cir.vector<6 x !s32i> { + // CHECK: cir.func{{.*}} @fold_shuffle_vector_op_test() -> !cir.vector<6 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, // CHECK-SAME: #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i]> : !cir.vector<6 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<6 x !s32i> @@ -52,7 +52,7 @@ module { cir.return %new_vec : !cir.vector<4 x !s32i> } - // CHECK: cir.func @fold_shuffle_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @fold_shuffle_vector_op_test() -> !cir.vector<4 x !s32i> { // CHECK: cir.const #cir.const_vector<[#cir.undef : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, // CHECK-SAME: #cir.int<4> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> diff --git a/clang/test/CIR/Transforms/vector-ternary-fold.cir b/clang/test/CIR/Transforms/vector-ternary-fold.cir index f2e18576da74b..3de614a39264f 100644 --- a/clang/test/CIR/Transforms/vector-ternary-fold.cir +++ b/clang/test/CIR/Transforms/vector-ternary-fold.cir @@ -12,7 +12,7 @@ module { } // [1, 0, 1, 0] ? [1, 2, 3, 4] : [5, 6, 7, 8] Will be fold to [1, 6, 3, 8] - // CHECK: cir.func @vector_ternary_fold_test() -> !cir.vector<4 x !s32i> { + // CHECK: cir.func{{.*}} @vector_ternary_fold_test() -> !cir.vector<4 x !s32i> { // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<4 x !s32i> // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<4 x !s32i> } diff --git a/clang/test/CIR/func-linkage.cpp b/clang/test/CIR/func-linkage.cpp new file mode 100644 index 0000000000000..d43f7ed273063 --- /dev/null +++ b/clang/test/CIR/func-linkage.cpp @@ -0,0 +1,51 @@ +// Linkage types of global variables +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck %s -check-prefix=CIR --input-file %t.cir +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck %s -check-prefix=LLVM --input-file %t-cir.ll +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck %s -check-prefix=OGCG --input-file %t.ll + +void a() {} + +// CIR: cir.func dso_local @_Z1av() +// LLVM: define dso_local void @_Z1av() +// OGCG: define dso_local void @_Z1av() + +extern void b(); +// CIR: cir.func private @_Z1bv() +// LLVM: declare void @_Z1bv() +// OGCG: declare void @_Z1bv() + +static void c() {} +// CIR: cir.func internal private dso_local @_ZL1cv() +// LLVM: define internal void @_ZL1cv() +// OGCG: define internal void @_ZL1cv() + +inline void d() {} +// CIR: cir.func comdat linkonce_odr @_Z1dv() +// LLVM: define linkonce_odr void @_Z1dv() +// OGCG: define linkonce_odr void @_Z1dv(){{.*}} comdat + +namespace { + void e() {} +} + +// CIR: cir.func internal private dso_local @_ZN12_GLOBAL__N_11eEv() +// LLVM: define internal void @_ZN12_GLOBAL__N_11eEv() +// OGCG: define internal void @_ZN12_GLOBAL__N_11eEv() + +void f(); +// CIR: cir.func private @_Z1fv() +// LLVM: declare void @_Z1fv() +// OGCG: declare void @_Z1fv() + +// Force the functions to be emitted +void reference_funcs() { + a(); + b(); + c(); + d(); + e(); + f(); +} diff --git a/clang/test/CIR/func-simple.cpp b/clang/test/CIR/func-simple.cpp index 45cf1746de713..c9cb5c5595352 100644 --- a/clang/test/CIR/func-simple.cpp +++ b/clang/test/CIR/func-simple.cpp @@ -2,17 +2,17 @@ // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s void empty() { } -// CHECK: cir.func @_Z5emptyv() { +// CHECK: cir.func{{.*}} @_Z5emptyv() { // CHECK: cir.return // CHECK: } void voidret() { return; } -// CHECK: cir.func @_Z7voidretv() { +// CHECK: cir.func{{.*}} @_Z7voidretv() { // CHECK: cir.return // CHECK: } int intfunc() { return 42; } -// CHECK: cir.func @_Z7intfuncv() -> !s32i { +// CHECK: cir.func{{.*}} @_Z7intfuncv() -> !s32i { // CHECK: %0 = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CHECK: %1 = cir.const #cir.int<42> : !s32i // CHECK: cir.store %1, %0 : !s32i, !cir.ptr @@ -27,7 +27,7 @@ int scopes() { } } } -// CHECK: cir.func @_Z6scopesv() -> !s32i { +// CHECK: cir.func{{.*}} @_Z6scopesv() -> !s32i { // CHECK: %0 = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CHECK: cir.scope { // CHECK: cir.scope { @@ -41,7 +41,7 @@ int scopes() { // CHECK: } long longfunc() { return 42l; } -// CHECK: cir.func @_Z8longfuncv() -> !s64i +// CHECK: cir.func{{.*}} @_Z8longfuncv() -> !s64i // CHECK: %0 = cir.alloca !s64i, !cir.ptr, ["__retval"] {alignment = 8 : i64} // CHECK: %1 = cir.const #cir.int<42> : !s64i // CHECK: cir.store %1, %0 : !s64i, !cir.ptr @@ -50,7 +50,7 @@ long longfunc() { return 42l; } // CHECK: } unsigned unsignedfunc() { return 42u; } -// CHECK: cir.func @_Z12unsignedfuncv() -> !u32i +// CHECK: cir.func{{.*}} @_Z12unsignedfuncv() -> !u32i // CHECK: %0 = cir.alloca !u32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CHECK: %1 = cir.const #cir.int<42> : !u32i // CHECK: cir.store %1, %0 : !u32i, !cir.ptr @@ -59,7 +59,7 @@ unsigned unsignedfunc() { return 42u; } // CHECK: } unsigned long long ullfunc() { return 42ull; } -// CHECK: cir.func @_Z7ullfuncv() -> !u64i +// CHECK: cir.func{{.*}} @_Z7ullfuncv() -> !u64i // CHECK: %0 = cir.alloca !u64i, !cir.ptr, ["__retval"] {alignment = 8 : i64} // CHECK: %1 = cir.const #cir.int<42> : !u64i // CHECK: cir.store %1, %0 : !u64i, !cir.ptr @@ -68,7 +68,7 @@ unsigned long long ullfunc() { return 42ull; } // CHECK: } bool boolfunc() { return true; } -// CHECK: cir.func @_Z8boolfuncv() -> !cir.bool { +// CHECK: cir.func{{.*}} @_Z8boolfuncv() -> !cir.bool { // CHECK: %0 = cir.alloca !cir.bool, !cir.ptr, ["__retval"] {alignment = 1 : i64} // CHECK: %1 = cir.const #true // CHECK: cir.store %1, %0 : !cir.bool, !cir.ptr @@ -77,7 +77,7 @@ bool boolfunc() { return true; } // CHECK: } float floatfunc() { return 42.42f; } -// CHECK: cir.func @_Z9floatfuncv() -> !cir.float { +// CHECK: cir.func{{.*}} @_Z9floatfuncv() -> !cir.float { // CHECK: %0 = cir.alloca !cir.float, !cir.ptr, ["__retval"] {alignment = 4 : i64} // CHECK: %1 = cir.const #cir.fp<4.242 // CHECK: cir.store %1, %0 : !cir.float, !cir.ptr @@ -86,7 +86,7 @@ float floatfunc() { return 42.42f; } // CHECK: } double doublefunc() { return 42.42; } -// CHECK: cir.func @_Z10doublefuncv() -> !cir.double { +// CHECK: cir.func{{.*}} @_Z10doublefuncv() -> !cir.double { // CHECK: %0 = cir.alloca !cir.double, !cir.ptr, ["__retval"] {alignment = 8 : i64} // CHECK: %1 = cir.const #cir.fp<4.242 // CHECK: cir.store %1, %0 : !cir.double, !cir.ptr diff --git a/clang/test/CIR/mlprint.c b/clang/test/CIR/mlprint.c index 755a6cb47855e..1630bc1e3ce9b 100644 --- a/clang/test/CIR/mlprint.c +++ b/clang/test/CIR/mlprint.c @@ -7,7 +7,7 @@ int foo(void) { } // CIR: IR Dump After CIRCanonicalize (cir-canonicalize) -// CIR: cir.func @foo() -> !s32i +// CIR: cir.func{{.*}} @foo() -> !s32i // LLVM: IR Dump After cir::direct::ConvertCIRToLLVMPass (cir-flat-to-llvm) // LLVM: llvm.func @foo() -> i32 // LLVM: IR Dump After diff --git a/clang/test/CodeGen/PowerPC/builtins-bcd-transform.c b/clang/test/CodeGen/PowerPC/builtins-bcd-transform.c new file mode 100644 index 0000000000000..74a8500da6dab --- /dev/null +++ b/clang/test/CodeGen/PowerPC/builtins-bcd-transform.c @@ -0,0 +1,79 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// Testfile that verifies positive cases (0 or 1 only) for BCD builtins national2packed, packed2zoned and zoned2packed. +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -O2 -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -O2 -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple powerpc-unknown-unknown -O2 -target-cpu pwr9 \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: define dso_local <16 x i8> @tBcd_National2packed_imm1( +// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.ppc.national2packed(<16 x i8> [[A]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +vector unsigned char tBcd_National2packed_imm1(vector unsigned char a) { + return __builtin_ppc_national2packed (a,'\1'); +} + +// CHECK-LABEL: define dso_local <16 x i8> @tBcd_National2packed_imm0( +// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.ppc.national2packed(<16 x i8> [[A]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +vector unsigned char tBcd_National2packed_imm0(vector unsigned char a) { + return __builtin_ppc_national2packed (a,'\0'); +} + +// CHECK-LABEL: define dso_local <16 x i8> @tBcd_Packed2national( +// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.ppc.packed2national(<16 x i8> [[A]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +vector unsigned char tBcd_Packed2national(vector unsigned char a){ + return __builtin_ppc_packed2national(a); +} + +// CHECK-LABEL: define dso_local <16 x i8> @tBcd_Packed2zoned_imm0( +// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.ppc.packed2zoned(<16 x i8> [[A]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +vector unsigned char tBcd_Packed2zoned_imm0(vector unsigned char a){ + return __builtin_ppc_packed2zoned(a,'\0'); +} + +// CHECK-LABEL: define dso_local <16 x i8> @tBcd_Packed2zoned_imm1( +// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.ppc.packed2zoned(<16 x i8> [[A]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +vector unsigned char tBcd_Packed2zoned_imm1(vector unsigned char a){ + return __builtin_ppc_packed2zoned(a,'\1'); +} + +// CHECK-LABEL: define dso_local <16 x i8> @tBcd_Zoned2packed_imm0( +// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.ppc.zoned2packed(<16 x i8> [[A]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +vector unsigned char tBcd_Zoned2packed_imm0(vector unsigned char a){ + return __builtin_ppc_zoned2packed(a,'\0'); +} + +// CHECK-LABEL: define dso_local <16 x i8> @tBcd_Zoned2packed_imm1( +// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.ppc.zoned2packed(<16 x i8> [[A]], i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +vector unsigned char tBcd_Zoned2packed_imm1(vector unsigned char a){ + return __builtin_ppc_zoned2packed(a,'\1'); +} diff --git a/clang/test/CodeGen/X86/ms-secure-hotpatch-bad-file.c b/clang/test/CodeGen/X86/ms-secure-hotpatch-bad-file.c index 839dd44f7ff61..7c8c7d590060d 100644 --- a/clang/test/CodeGen/X86/ms-secure-hotpatch-bad-file.c +++ b/clang/test/CodeGen/X86/ms-secure-hotpatch-bad-file.c @@ -3,7 +3,7 @@ // This verifies that we correctly handle a -fms-secure-hotpatch-functions-file argument that points // to a missing file. // -// RUN: not %clang_cl -c --target=x86_64-windows-msvc -O2 /Z7 -fms-secure-hotpatch-functions-file=%S/this-file-is-intentionally-missing-do-not-create-it.txt /Fo%t.obj %s 2>&1 | FileCheck %s +// RUN: not %clang_cl -c --target=x86_64-windows-msvc -O2 /Z7 -fms-secure-hotpatch-functions-file=%S/this-file-is-intentionally-missing-do-not-create-it.txt /Fo%t.obj -- %s 2>&1 | FileCheck %s // CHECK: failed to open hotpatch functions file void this_might_have_side_effects(); diff --git a/clang/test/CodeGen/X86/ms-secure-hotpatch-cpp.cpp b/clang/test/CodeGen/X86/ms-secure-hotpatch-cpp.cpp index 3dc75c95d76f7..24e1c2937baac 100644 --- a/clang/test/CodeGen/X86/ms-secure-hotpatch-cpp.cpp +++ b/clang/test/CodeGen/X86/ms-secure-hotpatch-cpp.cpp @@ -3,7 +3,7 @@ // This verifies that hotpatch function attributes are correctly propagated when compiling directly to OBJ, // and that name mangling works as expected. // -// RUN: %clang_cl -c --target=x86_64-windows-msvc -O2 /Z7 -fms-secure-hotpatch-functions-list=?this_gets_hotpatched@@YAHXZ /Fo%t.obj %s +// RUN: %clang_cl -c --target=x86_64-windows-msvc -O2 /Z7 -fms-secure-hotpatch-functions-list=?this_gets_hotpatched@@YAHXZ /Fo%t.obj -- %s // RUN: llvm-readobj --codeview %t.obj | FileCheck %s void this_might_have_side_effects(); diff --git a/clang/test/CodeGen/X86/ms-secure-hotpatch-eh.cpp b/clang/test/CodeGen/X86/ms-secure-hotpatch-eh.cpp index 69704626c8cb6..66fbc3a950bbf 100644 --- a/clang/test/CodeGen/X86/ms-secure-hotpatch-eh.cpp +++ b/clang/test/CodeGen/X86/ms-secure-hotpatch-eh.cpp @@ -2,7 +2,7 @@ // Global constant data such as exception handler tables should not be redirected by Windows Secure Hot-Patching // -// RUN: %clang_cl -c --target=x86_64-windows-msvc /EHsc -O2 -fms-secure-hotpatch-functions-list=this_gets_hotpatched /Fo%t.obj /clang:-S /clang:-o- %s 2>& 1 | FileCheck %s +// RUN: %clang_cl -c --target=x86_64-windows-msvc /EHsc -O2 -fms-secure-hotpatch-functions-list=this_gets_hotpatched /Fo%t.obj /clang:-S /clang:-o- -- %s 2>& 1 | FileCheck %s class Foo { public: diff --git a/clang/test/CodeGen/X86/ms-secure-hotpatch-globals.c b/clang/test/CodeGen/X86/ms-secure-hotpatch-globals.c index d76d2aa6d8acc..ff3a1a47288a6 100644 --- a/clang/test/CodeGen/X86/ms-secure-hotpatch-globals.c +++ b/clang/test/CodeGen/X86/ms-secure-hotpatch-globals.c @@ -4,7 +4,7 @@ // // RUN: %clang_cl -c --target=x86_64-windows-msvc -O2 /Z7 \ // RUN: -fms-secure-hotpatch-functions-list=hp1,hp2,hp3,hp4,hp5_phi_ptr_mixed,hp_phi_ptr_both,hp_const_ptr_sub \ -// RUN: /clang:-S /clang:-o- %s | FileCheck %s +// RUN: /clang:-S /clang:-o- -- %s | FileCheck %s #ifdef __clang__ #define NO_TAIL __attribute__((disable_tail_calls)) diff --git a/clang/test/CodeGen/X86/ms-secure-hotpatch-lto.c b/clang/test/CodeGen/X86/ms-secure-hotpatch-lto.c index 6adb0b1818e31..cbf19adb4739f 100644 --- a/clang/test/CodeGen/X86/ms-secure-hotpatch-lto.c +++ b/clang/test/CodeGen/X86/ms-secure-hotpatch-lto.c @@ -2,7 +2,7 @@ // This verifies that hotpatch function attributes are correctly propagated through LLVM IR when compiling with LTO. // -// RUN: %clang_cl -c --target=x86_64-windows-msvc -O2 /Z7 -fms-secure-hotpatch-functions-list=this_gets_hotpatched -flto /Fo%t.bc %s +// RUN: %clang_cl -c --target=x86_64-windows-msvc -O2 /Z7 -fms-secure-hotpatch-functions-list=this_gets_hotpatched -flto /Fo%t.bc -- %s // RUN: llvm-dis %t.bc -o - | FileCheck %s // // CHECK-LABEL: define dso_local noundef i32 @this_gets_hotpatched() diff --git a/clang/test/CodeGen/X86/ms-secure-hotpatch.c b/clang/test/CodeGen/X86/ms-secure-hotpatch.c index b829e5acc5c83..9bc8c2cf364bf 100644 --- a/clang/test/CodeGen/X86/ms-secure-hotpatch.c +++ b/clang/test/CodeGen/X86/ms-secure-hotpatch.c @@ -3,7 +3,7 @@ // This verifies that hotpatch function attributes are correctly propagated when compiling directly to OBJ. // // RUN: echo this_gets_hotpatched > %t.patch-functions.txt -// RUN: %clang_cl -c --target=x86_64-windows-msvc -O2 /Z7 -fms-secure-hotpatch-functions-file=%t.patch-functions.txt /Fo%t.obj %s +// RUN: %clang_cl -c --target=x86_64-windows-msvc -O2 /Z7 -fms-secure-hotpatch-functions-file=%t.patch-functions.txt /Fo%t.obj -- %s // RUN: llvm-readobj --codeview %t.obj | FileCheck %s void this_might_have_side_effects(); diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index e18977a4559b1..51e0038b64cde 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -21,6 +21,36 @@ long test_InterlockedAdd_constant(long volatile *Addend) { // CHECK-MSVC: ret i32 %[[NEWVAL:[0-9]+]] // CHECK-LINUX: error: call to undeclared function '_InterlockedAdd' +long test_InterlockedAdd_acq(long volatile *Addend, long Value) { + return _InterlockedAdd_acq(Addend, Value); +} + +// CHECK-LABEL: define {{.*}} i32 @test_InterlockedAdd_acq(ptr %Addend, i32 %Value) {{.*}} { +// CHECK-MSVC: %[[OLDVAL:[0-9]+]] = atomicrmw add ptr %1, i32 %2 acquire, align 4 +// CHECK-MSVC: %[[NEWVAL:[0-9]+]] = add i32 %[[OLDVAL:[0-9]+]], %2 +// CHECK-MSVC: ret i32 %[[NEWVAL:[0-9]+]] +// CHECK-LINUX: error: call to undeclared function '_InterlockedAdd_acq' + +long test_InterlockedAdd_nf(long volatile *Addend, long Value) { + return _InterlockedAdd_nf(Addend, Value); +} + +// CHECK-LABEL: define {{.*}} i32 @test_InterlockedAdd_nf(ptr %Addend, i32 %Value) {{.*}} { +// CHECK-MSVC: %[[OLDVAL:[0-9]+]] = atomicrmw add ptr %1, i32 %2 monotonic, align 4 +// CHECK-MSVC: %[[NEWVAL:[0-9]+]] = add i32 %[[OLDVAL:[0-9]+]], %2 +// CHECK-MSVC: ret i32 %[[NEWVAL:[0-9]+]] +// CHECK-LINUX: error: call to undeclared function '_InterlockedAdd_nf' + +long test_InterlockedAdd_rel(long volatile *Addend, long Value) { + return _InterlockedAdd_rel(Addend, Value); +} + +// CHECK-LABEL: define {{.*}} i32 @test_InterlockedAdd_rel(ptr %Addend, i32 %Value) {{.*}} { +// CHECK-MSVC: %[[OLDVAL:[0-9]+]] = atomicrmw add ptr %1, i32 %2 release, align 4 +// CHECK-MSVC: %[[NEWVAL:[0-9]+]] = add i32 %[[OLDVAL:[0-9]+]], %2 +// CHECK-MSVC: ret i32 %[[NEWVAL:[0-9]+]] +// CHECK-LINUX: error: call to undeclared function '_InterlockedAdd_rel' + __int64 test_InterlockedAdd64(__int64 volatile *Addend, __int64 Value) { return _InterlockedAdd64(Addend, Value); } @@ -35,6 +65,36 @@ __int64 test_InterlockedAdd64_constant(__int64 volatile *Addend) { // CHECK-MSVC: ret i64 %[[NEWVAL:[0-9]+]] // CHECK-LINUX: error: call to undeclared function '_InterlockedAdd64' +__int64 test_InterlockedAdd64_acq(__int64 volatile *Addend, __int64 Value) { + return _InterlockedAdd64_acq(Addend, Value); +} + +// CHECK-LABEL: define {{.*}} i64 @test_InterlockedAdd64_acq(ptr %Addend, i64 %Value) {{.*}} { +// CHECK-MSVC: %[[OLDVAL:[0-9]+]] = atomicrmw add ptr %1, i64 %2 acquire, align 8 +// CHECK-MSVC: %[[NEWVAL:[0-9]+]] = add i64 %[[OLDVAL:[0-9]+]], %2 +// CHECK-MSVC: ret i64 %[[NEWVAL:[0-9]+]] +// CHECK-LINUX: error: call to undeclared function '_InterlockedAdd64_acq' + +__int64 test_InterlockedAdd64_nf(__int64 volatile *Addend, __int64 Value) { + return _InterlockedAdd64_nf(Addend, Value); +} + +// CHECK-LABEL: define {{.*}} i64 @test_InterlockedAdd64_nf(ptr %Addend, i64 %Value) {{.*}} { +// CHECK-MSVC: %[[OLDVAL:[0-9]+]] = atomicrmw add ptr %1, i64 %2 monotonic, align 8 +// CHECK-MSVC: %[[NEWVAL:[0-9]+]] = add i64 %[[OLDVAL:[0-9]+]], %2 +// CHECK-MSVC: ret i64 %[[NEWVAL:[0-9]+]] +// CHECK-LINUX: error: call to undeclared function '_InterlockedAdd64_nf' + +__int64 test_InterlockedAdd64_rel(__int64 volatile *Addend, __int64 Value) { + return _InterlockedAdd64_rel(Addend, Value); +} + +// CHECK-LABEL: define {{.*}} i64 @test_InterlockedAdd64_rel(ptr %Addend, i64 %Value) {{.*}} { +// CHECK-MSVC: %[[OLDVAL:[0-9]+]] = atomicrmw add ptr %1, i64 %2 release, align 8 +// CHECK-MSVC: %[[NEWVAL:[0-9]+]] = add i64 %[[OLDVAL:[0-9]+]], %2 +// CHECK-MSVC: ret i64 %[[NEWVAL:[0-9]+]] +// CHECK-LINUX: error: call to undeclared function '_InterlockedAdd64_rel' + void check_ReadWriteBarrier(void) { _ReadWriteBarrier(); } diff --git a/clang/test/CodeGen/builtins-overflow.c b/clang/test/CodeGen/builtins-overflow.c index 7c524723f76e8..0e04191b9e2ac 100644 --- a/clang/test/CodeGen/builtins-overflow.c +++ b/clang/test/CodeGen/builtins-overflow.c @@ -604,3 +604,15 @@ long long test_mixed_sign_mul_overflow_extend_unsigned(int x, unsigned y) { return LongLongErrorCode; return result; } + +_BitInt(65) test_mixed_sign_mul_overflow_bitint(unsigned _BitInt(65) y, _BitInt(119) a) { +// CHECK: call { i119, i1 } @llvm.umul.with.overflow.i119 +// CHECK: select i1 %{{.*}}, i119 %{{.*}}, i119 %{{.*}} +// CHECK: trunc i119 +// CHECK: zext i65 +// CHECK: store + unsigned _BitInt(65) result; + if (__builtin_mul_overflow(a, y, &result)) + return LongLongErrorCode; + return result; +} diff --git a/clang/test/CodeGen/logb_scalbn.c b/clang/test/CodeGen/logb_scalbn.c index be5e68b5fd4b0..52c52bcb292be 100644 --- a/clang/test/CodeGen/logb_scalbn.c +++ b/clang/test/CodeGen/logb_scalbn.c @@ -4,6 +4,11 @@ // RUN: %clang -cc1 -triple amdgcn-amd-amdhsa -o - -ffp-exception-behavior=strict -emit-llvm %s | FileCheck %s -check-prefixes=STRICT // RUN: %clang -cc1 -triple amdgcn-amd-amdhsa -o - -ffp-exception-behavior=maytrap -emit-llvm %s | FileCheck %s -check-prefixes=MAYTRAP // RUN: %clang -cc1 -triple amdgcn-amd-amdhsa -o - -fmath-errno -emit-llvm %s | FileCheck %s -check-prefixes=ERRNO +// RUN: %clang -cc1 -triple spirv64-amd-amdhsa -o - -emit-llvm %s | FileCheck %s -check-prefixes=AMDGCNSPIRV-DEFAULT +// RUN: %clang -cc1 -triple spirv64-amd-amdhsa -o - -ffp-exception-behavior=ignore -emit-llvm %s | FileCheck %s -check-prefixes=AMDGCNSPIRV-IGNORE +// RUN: %clang -cc1 -triple spirv64-amd-amdhsa -o - -ffp-exception-behavior=strict -emit-llvm %s | FileCheck %s -check-prefixes=AMDGCNSPIRV-STRICT +// RUN: %clang -cc1 -triple spirv64-amd-amdhsa -o - -ffp-exception-behavior=maytrap -emit-llvm %s | FileCheck %s -check-prefixes=AMDGCNSPIRV-MAYTRAP +// RUN: %clang -cc1 -triple spirv64-amd-amdhsa -o - -fmath-errno -emit-llvm %s | FileCheck %s -check-prefixes=AMDGCNSPIRV-ERRNO // DEFAULT-LABEL: define dso_local void @test_logbf( // DEFAULT-SAME: ) #[[ATTR0:[0-9]+]] { @@ -78,6 +83,79 @@ // ERRNO-NEXT: store float [[CALL]], ptr [[D1_ASCAST]], align 4 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_logbf( +// AMDGCNSPIRV-DEFAULT-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float 0x40301999A0000000) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP4:%.*]] = call addrspace(4) float @llvm.fabs.f32(float 0x40301999A0000000) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP5:%.*]] = fcmp one float [[TMP4]], 0x7FF0000000000000 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP7:%.*]] = select i1 false, float 0xFFF0000000000000, float [[TMP6]] +// AMDGCNSPIRV-DEFAULT-NEXT: store float [[TMP7]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_logbf( +// AMDGCNSPIRV-IGNORE-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float 0x40301999A0000000) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP4:%.*]] = call addrspace(4) float @llvm.fabs.f32(float 0x40301999A0000000) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP5:%.*]] = fcmp one float [[TMP4]], 0x7FF0000000000000 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]] +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP7:%.*]] = select i1 false, float 0xFFF0000000000000, float [[TMP6]] +// AMDGCNSPIRV-IGNORE-NEXT: store float [[TMP7]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_logbf( +// AMDGCNSPIRV-STRICT-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float 0x40301999A0000000) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float +// AMDGCNSPIRV-STRICT-NEXT: [[TMP4:%.*]] = call addrspace(4) float @llvm.fabs.f32(float 0x40301999A0000000) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP5:%.*]] = fcmp one float [[TMP4]], 0x7FF0000000000000 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]] +// AMDGCNSPIRV-STRICT-NEXT: [[TMP7:%.*]] = select i1 false, float 0xFFF0000000000000, float [[TMP6]] +// AMDGCNSPIRV-STRICT-NEXT: store float [[TMP7]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_logbf( +// AMDGCNSPIRV-MAYTRAP-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float 0x40301999A0000000) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP4:%.*]] = call addrspace(4) float @llvm.fabs.f32(float 0x40301999A0000000) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP5:%.*]] = fcmp one float [[TMP4]], 0x7FF0000000000000 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP4]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP7:%.*]] = select i1 false, float 0xFFF0000000000000, float [[TMP6]] +// AMDGCNSPIRV-MAYTRAP-NEXT: store float [[TMP7]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_logbf( +// AMDGCNSPIRV-ERRNO-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) float @logbf(float noundef 0x40301999A0000000) #[[ATTR2:[0-9]+]] +// AMDGCNSPIRV-ERRNO-NEXT: store float [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_logbf() { float D1 = __builtin_logbf(16.1f); } @@ -182,6 +260,107 @@ void test_logbf() { // ERRNO-NEXT: store float [[CALL]], ptr [[D1_ASCAST]], align 4 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_logbf_var( +// AMDGCNSPIRV-DEFAULT-SAME: float noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP1:%.*]] = call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float [[TMP0]]) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], -1 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP4:%.*]] = sitofp i32 [[TMP3]] to float +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP6:%.*]] = call addrspace(4) float @llvm.fabs.f32(float [[TMP5]]) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP7:%.*]] = fcmp one float [[TMP6]], 0x7FF0000000000000 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP6]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP9:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float 0xFFF0000000000000, float [[TMP8]] +// AMDGCNSPIRV-DEFAULT-NEXT: store float [[TMP10]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_logbf_var( +// AMDGCNSPIRV-IGNORE-SAME: float noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP1:%.*]] = call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float [[TMP0]]) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], -1 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP4:%.*]] = sitofp i32 [[TMP3]] to float +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP6:%.*]] = call addrspace(4) float @llvm.fabs.f32(float [[TMP5]]) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP7:%.*]] = fcmp one float [[TMP6]], 0x7FF0000000000000 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP6]] +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP9:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float 0xFFF0000000000000, float [[TMP8]] +// AMDGCNSPIRV-IGNORE-NEXT: store float [[TMP10]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_logbf_var( +// AMDGCNSPIRV-STRICT-SAME: float noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP1:%.*]] = call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float [[TMP0]]) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], -1 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP4:%.*]] = sitofp i32 [[TMP3]] to float +// AMDGCNSPIRV-STRICT-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP6:%.*]] = call addrspace(4) float @llvm.fabs.f32(float [[TMP5]]) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP7:%.*]] = fcmp one float [[TMP6]], 0x7FF0000000000000 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP6]] +// AMDGCNSPIRV-STRICT-NEXT: [[TMP9:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float 0xFFF0000000000000, float [[TMP8]] +// AMDGCNSPIRV-STRICT-NEXT: store float [[TMP10]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_logbf_var( +// AMDGCNSPIRV-MAYTRAP-SAME: float noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP1:%.*]] = call addrspace(4) { float, i32 } @llvm.frexp.f32.i32(float [[TMP0]]) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], -1 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP4:%.*]] = sitofp i32 [[TMP3]] to float +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP6:%.*]] = call addrspace(4) float @llvm.fabs.f32(float [[TMP5]]) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP7:%.*]] = fcmp one float [[TMP6]], 0x7FF0000000000000 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP4]], float [[TMP6]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP9:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float 0xFFF0000000000000, float [[TMP8]] +// AMDGCNSPIRV-MAYTRAP-NEXT: store float [[TMP10]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_logbf_var( +// AMDGCNSPIRV-ERRNO-SAME: float noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) float @logbf(float noundef [[TMP0]]) #[[ATTR2]] +// AMDGCNSPIRV-ERRNO-NEXT: store float [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_logbf_var(float a) { float D1 = __builtin_logbf(a); } @@ -273,6 +452,79 @@ void test_logbf_var(float a) { // ERRNO-NEXT: store double [[CALL]], ptr [[D1_ASCAST]], align 8 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_logb( +// AMDGCNSPIRV-DEFAULT-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double 1.510000e+01) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to double +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP4:%.*]] = call addrspace(4) double @llvm.fabs.f64(double 1.510000e+01) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP5:%.*]] = fcmp one double [[TMP4]], 0x7FF0000000000000 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP4]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP7:%.*]] = select i1 false, double 0xFFF0000000000000, double [[TMP6]] +// AMDGCNSPIRV-DEFAULT-NEXT: store double [[TMP7]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_logb( +// AMDGCNSPIRV-IGNORE-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double 1.510000e+01) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to double +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP4:%.*]] = call addrspace(4) double @llvm.fabs.f64(double 1.510000e+01) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP5:%.*]] = fcmp one double [[TMP4]], 0x7FF0000000000000 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP4]] +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP7:%.*]] = select i1 false, double 0xFFF0000000000000, double [[TMP6]] +// AMDGCNSPIRV-IGNORE-NEXT: store double [[TMP7]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_logb( +// AMDGCNSPIRV-STRICT-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double 1.510000e+01) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to double +// AMDGCNSPIRV-STRICT-NEXT: [[TMP4:%.*]] = call addrspace(4) double @llvm.fabs.f64(double 1.510000e+01) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP5:%.*]] = fcmp one double [[TMP4]], 0x7FF0000000000000 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP4]] +// AMDGCNSPIRV-STRICT-NEXT: [[TMP7:%.*]] = select i1 false, double 0xFFF0000000000000, double [[TMP6]] +// AMDGCNSPIRV-STRICT-NEXT: store double [[TMP7]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_logb( +// AMDGCNSPIRV-MAYTRAP-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double 1.510000e+01) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to double +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP4:%.*]] = call addrspace(4) double @llvm.fabs.f64(double 1.510000e+01) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP5:%.*]] = fcmp one double [[TMP4]], 0x7FF0000000000000 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP4]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP7:%.*]] = select i1 false, double 0xFFF0000000000000, double [[TMP6]] +// AMDGCNSPIRV-MAYTRAP-NEXT: store double [[TMP7]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_logb( +// AMDGCNSPIRV-ERRNO-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) double @logb(double noundef 1.510000e+01) #[[ATTR2]] +// AMDGCNSPIRV-ERRNO-NEXT: store double [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_logb() { double D1 = __builtin_logb(15.1); } @@ -398,6 +650,107 @@ void test_logb() { // ERRNO-NEXT: store double [[CALL]], ptr [[D1_ASCAST]], align 8 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_logb_var( +// AMDGCNSPIRV-DEFAULT-SAME: double noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP1:%.*]] = call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double [[TMP0]]) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP1]], 1 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], -1 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP4:%.*]] = sitofp i32 [[TMP3]] to double +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP5:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP6:%.*]] = call addrspace(4) double @llvm.fabs.f64(double [[TMP5]]) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP7:%.*]] = fcmp one double [[TMP6]], 0x7FF0000000000000 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP6]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP9:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double 0xFFF0000000000000, double [[TMP8]] +// AMDGCNSPIRV-DEFAULT-NEXT: store double [[TMP10]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_logb_var( +// AMDGCNSPIRV-IGNORE-SAME: double noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP1:%.*]] = call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double [[TMP0]]) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP1]], 1 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], -1 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP4:%.*]] = sitofp i32 [[TMP3]] to double +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP5:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP6:%.*]] = call addrspace(4) double @llvm.fabs.f64(double [[TMP5]]) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP7:%.*]] = fcmp one double [[TMP6]], 0x7FF0000000000000 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP6]] +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP9:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double 0xFFF0000000000000, double [[TMP8]] +// AMDGCNSPIRV-IGNORE-NEXT: store double [[TMP10]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_logb_var( +// AMDGCNSPIRV-STRICT-SAME: double noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP1:%.*]] = call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double [[TMP0]]) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP1]], 1 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], -1 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP4:%.*]] = sitofp i32 [[TMP3]] to double +// AMDGCNSPIRV-STRICT-NEXT: [[TMP5:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP6:%.*]] = call addrspace(4) double @llvm.fabs.f64(double [[TMP5]]) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP7:%.*]] = fcmp one double [[TMP6]], 0x7FF0000000000000 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP6]] +// AMDGCNSPIRV-STRICT-NEXT: [[TMP9:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double 0xFFF0000000000000, double [[TMP8]] +// AMDGCNSPIRV-STRICT-NEXT: store double [[TMP10]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_logb_var( +// AMDGCNSPIRV-MAYTRAP-SAME: double noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP1:%.*]] = call addrspace(4) { double, i32 } @llvm.frexp.f64.i32(double [[TMP0]]) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP1]], 1 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], -1 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP4:%.*]] = sitofp i32 [[TMP3]] to double +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP5:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP6:%.*]] = call addrspace(4) double @llvm.fabs.f64(double [[TMP5]]) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP7:%.*]] = fcmp one double [[TMP6]], 0x7FF0000000000000 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP6]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP9:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double 0xFFF0000000000000, double [[TMP8]] +// AMDGCNSPIRV-MAYTRAP-NEXT: store double [[TMP10]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_logb_var( +// AMDGCNSPIRV-ERRNO-SAME: double noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) double @logb(double noundef [[TMP0]]) #[[ATTR2]] +// AMDGCNSPIRV-ERRNO-NEXT: store double [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_logb_var(double a) { double D1 = __builtin_logb(a); } @@ -455,6 +808,51 @@ void test_logb_var(double a) { // ERRNO-NEXT: store float [[CALL]], ptr [[D1_ASCAST]], align 4 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_scalbnf( +// AMDGCNSPIRV-DEFAULT-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float 0x4030B33340000000, i32 10) +// AMDGCNSPIRV-DEFAULT-NEXT: store float [[TMP0]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_scalbnf( +// AMDGCNSPIRV-IGNORE-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float 0x4030B33340000000, i32 10) +// AMDGCNSPIRV-IGNORE-NEXT: store float [[TMP0]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_scalbnf( +// AMDGCNSPIRV-STRICT-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float 0x4030B33340000000, i32 10) +// AMDGCNSPIRV-STRICT-NEXT: store float [[TMP0]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_scalbnf( +// AMDGCNSPIRV-MAYTRAP-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float 0x4030B33340000000, i32 10) +// AMDGCNSPIRV-MAYTRAP-NEXT: store float [[TMP0]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_scalbnf( +// AMDGCNSPIRV-ERRNO-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) float @scalbnf(float noundef 0x4030B33340000000, i32 noundef 10) #[[ATTR2]] +// AMDGCNSPIRV-ERRNO-NEXT: store float [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_scalbnf() { float D1 = __builtin_scalbnf(16.7f, 10); } @@ -535,6 +933,71 @@ void test_scalbnf() { // ERRNO-NEXT: store float [[CALL]], ptr [[D1_ASCAST]], align 4 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_scalbnf_var1( +// AMDGCNSPIRV-DEFAULT-SAME: float noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP1:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float [[TMP0]], i32 9) +// AMDGCNSPIRV-DEFAULT-NEXT: store float [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_scalbnf_var1( +// AMDGCNSPIRV-IGNORE-SAME: float noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP1:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float [[TMP0]], i32 9) +// AMDGCNSPIRV-IGNORE-NEXT: store float [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_scalbnf_var1( +// AMDGCNSPIRV-STRICT-SAME: float noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP1:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float [[TMP0]], i32 9) +// AMDGCNSPIRV-STRICT-NEXT: store float [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_scalbnf_var1( +// AMDGCNSPIRV-MAYTRAP-SAME: float noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP1:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float [[TMP0]], i32 9) +// AMDGCNSPIRV-MAYTRAP-NEXT: store float [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_scalbnf_var1( +// AMDGCNSPIRV-ERRNO-SAME: float noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) float @scalbnf(float noundef [[TMP0]], i32 noundef 9) #[[ATTR2]] +// AMDGCNSPIRV-ERRNO-NEXT: store float [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_scalbnf_var1(float a) { float D1 = __builtin_scalbnf(a, 9); } @@ -615,6 +1078,71 @@ void test_scalbnf_var1(float a) { // ERRNO-NEXT: store float [[CALL]], ptr [[D1_ASCAST]], align 4 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_scalbnf_var2( +// AMDGCNSPIRV-DEFAULT-SAME: i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP1:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float 0x402E666660000000, i32 [[TMP0]]) +// AMDGCNSPIRV-DEFAULT-NEXT: store float [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_scalbnf_var2( +// AMDGCNSPIRV-IGNORE-SAME: i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP1:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float 0x402E666660000000, i32 [[TMP0]]) +// AMDGCNSPIRV-IGNORE-NEXT: store float [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_scalbnf_var2( +// AMDGCNSPIRV-STRICT-SAME: i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP1:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float 0x402E666660000000, i32 [[TMP0]]) +// AMDGCNSPIRV-STRICT-NEXT: store float [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_scalbnf_var2( +// AMDGCNSPIRV-MAYTRAP-SAME: i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP1:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float 0x402E666660000000, i32 [[TMP0]]) +// AMDGCNSPIRV-MAYTRAP-NEXT: store float [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_scalbnf_var2( +// AMDGCNSPIRV-ERRNO-SAME: i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) float @scalbnf(float noundef 0x402E666660000000, i32 noundef [[TMP0]]) #[[ATTR2]] +// AMDGCNSPIRV-ERRNO-NEXT: store float [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_scalbnf_var2(int b) { float D1 = __builtin_scalbnf(15.2f, b); } @@ -719,6 +1247,91 @@ void test_scalbnf_var2(int b) { // ERRNO-NEXT: store float [[CALL]], ptr [[D1_ASCAST]], align 4 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_scalbnf_var3( +// AMDGCNSPIRV-DEFAULT-SAME: float noundef [[A:%.*]], i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP2:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float [[TMP0]], i32 [[TMP1]]) +// AMDGCNSPIRV-DEFAULT-NEXT: store float [[TMP2]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_scalbnf_var3( +// AMDGCNSPIRV-IGNORE-SAME: float noundef [[A:%.*]], i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP2:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float [[TMP0]], i32 [[TMP1]]) +// AMDGCNSPIRV-IGNORE-NEXT: store float [[TMP2]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_scalbnf_var3( +// AMDGCNSPIRV-STRICT-SAME: float noundef [[A:%.*]], i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP2:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float [[TMP0]], i32 [[TMP1]]) +// AMDGCNSPIRV-STRICT-NEXT: store float [[TMP2]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_scalbnf_var3( +// AMDGCNSPIRV-MAYTRAP-SAME: float noundef [[A:%.*]], i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP2:%.*]] = call addrspace(4) float @llvm.ldexp.f32.i32(float [[TMP0]], i32 [[TMP1]]) +// AMDGCNSPIRV-MAYTRAP-NEXT: store float [[TMP2]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_scalbnf_var3( +// AMDGCNSPIRV-ERRNO-SAME: float noundef [[A:%.*]], i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca float, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: store float [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[A_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) float @scalbnf(float noundef [[TMP0]], i32 noundef [[TMP1]]) #[[ATTR2]] +// AMDGCNSPIRV-ERRNO-NEXT: store float [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_scalbnf_var3(float a, int b) { float D1 = __builtin_scalbnf(a, b); } @@ -776,6 +1389,51 @@ void test_scalbnf_var3(float a, int b) { // ERRNO-NEXT: store double [[CALL]], ptr [[D1_ASCAST]], align 8 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_scalbn( +// AMDGCNSPIRV-DEFAULT-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double 1.720000e+01, i32 10) +// AMDGCNSPIRV-DEFAULT-NEXT: store double [[TMP0]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_scalbn( +// AMDGCNSPIRV-IGNORE-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double 1.720000e+01, i32 10) +// AMDGCNSPIRV-IGNORE-NEXT: store double [[TMP0]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_scalbn( +// AMDGCNSPIRV-STRICT-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double 1.720000e+01, i32 10) +// AMDGCNSPIRV-STRICT-NEXT: store double [[TMP0]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_scalbn( +// AMDGCNSPIRV-MAYTRAP-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double 1.720000e+01, i32 10) +// AMDGCNSPIRV-MAYTRAP-NEXT: store double [[TMP0]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_scalbn( +// AMDGCNSPIRV-ERRNO-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) double @scalbn(double noundef 1.720000e+01, i32 noundef 10) #[[ATTR2]] +// AMDGCNSPIRV-ERRNO-NEXT: store double [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_scalbn() { double D1 = __builtin_scalbn(17.2, 10); } @@ -856,6 +1514,71 @@ void test_scalbn() { // ERRNO-NEXT: store double [[CALL]], ptr [[D1_ASCAST]], align 8 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_scalbn_var1( +// AMDGCNSPIRV-DEFAULT-SAME: double noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP1:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double [[TMP0]], i32 9) +// AMDGCNSPIRV-DEFAULT-NEXT: store double [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_scalbn_var1( +// AMDGCNSPIRV-IGNORE-SAME: double noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP1:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double [[TMP0]], i32 9) +// AMDGCNSPIRV-IGNORE-NEXT: store double [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_scalbn_var1( +// AMDGCNSPIRV-STRICT-SAME: double noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP1:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double [[TMP0]], i32 9) +// AMDGCNSPIRV-STRICT-NEXT: store double [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_scalbn_var1( +// AMDGCNSPIRV-MAYTRAP-SAME: double noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP1:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double [[TMP0]], i32 9) +// AMDGCNSPIRV-MAYTRAP-NEXT: store double [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_scalbn_var1( +// AMDGCNSPIRV-ERRNO-SAME: double noundef [[A:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) double @scalbn(double noundef [[TMP0]], i32 noundef 9) #[[ATTR2]] +// AMDGCNSPIRV-ERRNO-NEXT: store double [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_scalbn_var1(double a) { double D1 = __builtin_scalbn(a, 9); } @@ -936,6 +1659,71 @@ void test_scalbn_var1(double a) { // ERRNO-NEXT: store double [[CALL]], ptr [[D1_ASCAST]], align 8 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_scalbn_var2( +// AMDGCNSPIRV-DEFAULT-SAME: i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP1:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double 1.540000e+01, i32 [[TMP0]]) +// AMDGCNSPIRV-DEFAULT-NEXT: store double [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_scalbn_var2( +// AMDGCNSPIRV-IGNORE-SAME: i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP1:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double 1.540000e+01, i32 [[TMP0]]) +// AMDGCNSPIRV-IGNORE-NEXT: store double [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_scalbn_var2( +// AMDGCNSPIRV-STRICT-SAME: i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP1:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double 1.540000e+01, i32 [[TMP0]]) +// AMDGCNSPIRV-STRICT-NEXT: store double [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_scalbn_var2( +// AMDGCNSPIRV-MAYTRAP-SAME: i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP1:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double 1.540000e+01, i32 [[TMP0]]) +// AMDGCNSPIRV-MAYTRAP-NEXT: store double [[TMP1]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_scalbn_var2( +// AMDGCNSPIRV-ERRNO-SAME: i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) double @scalbn(double noundef 1.540000e+01, i32 noundef [[TMP0]]) #[[ATTR2]] +// AMDGCNSPIRV-ERRNO-NEXT: store double [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_scalbn_var2(int b) { double D1 = __builtin_scalbn(15.4, b); } @@ -1040,6 +1828,91 @@ void test_scalbn_var2(int b) { // ERRNO-NEXT: store double [[CALL]], ptr [[D1_ASCAST]], align 8 // ERRNO-NEXT: ret void // +// AMDGCNSPIRV-DEFAULT-LABEL: define spir_func void @test_scalbn_var3( +// AMDGCNSPIRV-DEFAULT-SAME: double noundef [[A:%.*]], i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-DEFAULT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-DEFAULT-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-DEFAULT-NEXT: [[TMP2:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double [[TMP0]], i32 [[TMP1]]) +// AMDGCNSPIRV-DEFAULT-NEXT: store double [[TMP2]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-DEFAULT-NEXT: ret void +// +// AMDGCNSPIRV-IGNORE-LABEL: define spir_func void @test_scalbn_var3( +// AMDGCNSPIRV-IGNORE-SAME: double noundef [[A:%.*]], i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-IGNORE-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-IGNORE-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-IGNORE-NEXT: [[TMP2:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double [[TMP0]], i32 [[TMP1]]) +// AMDGCNSPIRV-IGNORE-NEXT: store double [[TMP2]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-IGNORE-NEXT: ret void +// +// AMDGCNSPIRV-STRICT-LABEL: define spir_func void @test_scalbn_var3( +// AMDGCNSPIRV-STRICT-SAME: double noundef [[A:%.*]], i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-STRICT-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-STRICT-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-STRICT-NEXT: [[TMP2:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double [[TMP0]], i32 [[TMP1]]) +// AMDGCNSPIRV-STRICT-NEXT: store double [[TMP2]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-STRICT-NEXT: ret void +// +// AMDGCNSPIRV-MAYTRAP-LABEL: define spir_func void @test_scalbn_var3( +// AMDGCNSPIRV-MAYTRAP-SAME: double noundef [[A:%.*]], i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-MAYTRAP-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-MAYTRAP-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-MAYTRAP-NEXT: [[TMP2:%.*]] = call addrspace(4) double @llvm.ldexp.f64.i32(double [[TMP0]], i32 [[TMP1]]) +// AMDGCNSPIRV-MAYTRAP-NEXT: store double [[TMP2]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-MAYTRAP-NEXT: ret void +// +// AMDGCNSPIRV-ERRNO-LABEL: define spir_func void @test_scalbn_var3( +// AMDGCNSPIRV-ERRNO-SAME: double noundef [[A:%.*]], i32 noundef [[B:%.*]]) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-ERRNO-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[D1:%.*]] = alloca double, align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr [[D1]] to ptr addrspace(4) +// AMDGCNSPIRV-ERRNO-NEXT: store double [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: store i32 [[B]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[B_ADDR_ASCAST]], align 4 +// AMDGCNSPIRV-ERRNO-NEXT: [[CALL:%.*]] = call spir_func addrspace(4) double @scalbn(double noundef [[TMP0]], i32 noundef [[TMP1]]) #[[ATTR2]] +// AMDGCNSPIRV-ERRNO-NEXT: store double [[CALL]], ptr addrspace(4) [[D1_ASCAST]], align 8 +// AMDGCNSPIRV-ERRNO-NEXT: ret void +// void test_scalbn_var3(double a, int b) { double D1 = __builtin_scalbn(a, b); } diff --git a/clang/test/CodeGen/pragma-comment.c b/clang/test/CodeGen/pragma-comment.c index 861fba9aece3b..aa3aba18b9b2c 100644 --- a/clang/test/CodeGen/pragma-comment.c +++ b/clang/test/CodeGen/pragma-comment.c @@ -34,4 +34,4 @@ // ELF-NOT: foo // This following match prevents the clang version metadata from matching the forbidden 'foo' and 'bar' tokens. // This can happen if the clang version string contains a Git repo URL that includes one of those substrings. -// ELF-LABEL: !"clang version +// ELF-LABEL: {{\!\".*clang version}} diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu index df56ec60c63ae..12474381e718b 100644 --- a/clang/test/CodeGenCUDA/bf16.cu +++ b/clang/test/CodeGenCUDA/bf16.cu @@ -37,11 +37,7 @@ __device__ __bf16 test_call( __bf16 in) { // CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0]; // CHECK: st.param.b16 [param0], %[[R]]; // CHECK: .param .align 2 .b8 retval0[2]; -// CHECK: call.uni (retval0), -// CHECK-NEXT: _Z13external_funcDF16b, -// CHECK-NEXT: ( -// CHECK-NEXT: param0 -// CHECK-NEXT ); +// CHECK: call.uni (retval0), _Z13external_funcDF16b, (param0); // CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0]; return external_func(in); // CHECK: st.param.b16 [func_retval0], %[[RET]] diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 3709b1ff52f35..864e301859682 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -2,6 +2,10 @@ // RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s // REQUIRES: amdgpu-registered-target +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +typedef half __attribute__((ext_vector_type(2))) half2; + // CHECK-LABEL: @test_setprio_inc_wg( // CHECK-NEXT: entry: // CHECK-NEXT: call void @llvm.amdgcn.s.setprio.inc.wg(i16 10) @@ -10,3 +14,43 @@ void test_setprio_inc_wg() { __builtin_amdgcn_s_setprio_inc_wg(10); } + +// CHECK-LABEL: @test_cvt_pk_f16_fp8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[TMP2]], i64 0 +// CHECK-NEXT: store <2 x half> [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +void test_cvt_pk_f16_fp8(global half2* out, short a) +{ + out[0] = __builtin_amdgcn_cvt_pk_f16_fp8(a); +} + +// CHECK-LABEL: @test_cvt_pk_f16_bf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[TMP2]], i64 0 +// CHECK-NEXT: store <2 x half> [[TMP1]], ptr addrspace(1) [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +void test_cvt_pk_f16_bf8(global half2* out, short a) +{ + out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a); +} diff --git a/clang/test/Index/inline-assembly.c b/clang/test/Index/inline-assembly.c new file mode 100644 index 0000000000000..64a7ce03852c9 --- /dev/null +++ b/clang/test/Index/inline-assembly.c @@ -0,0 +1,46 @@ +static void inline_assembly_template_regardless_of_target_machine() { + int tmp; + asm volatile ( + "nop\n" + "a_value %w[v]\n" + "o_value %w[o]" + : [v] "=&r" (tmp) + : [o] "r" (tmp) + : "cc", "memory" + ); +} + +// RUN: c-index-test -test-inline-assembly %s 2>&1 | FileCheck %s +// CHECK: ===ASM TEMPLATE=== +// CHECK: nop +// CHECK: a_value ${0:w} +// CHECK: o_value ${1:w} +// CHECK: ===ASM TEMPLATE END=== +// CHECK: volatile: true +// CHECK: Output #0 Constraint (=&r): DeclRefExpr=tmp:2:9 +// CHECK: Input #0 Constraint (r): UnexposedExpr=tmp:2:9 +// CHECK: Clobber #0: cc +// CHECK: Clobber #1: memory +// CHECK: ===ASM END=== + +static void inline_assembly_valid_x86_example() { + int tmp; + asm ( + "nop\n" + "mov %w[o], %w[v]" + : [v] "=&r" (tmp) + : [o] "r" (tmp) + : "cc", "memory" + ); +} + +// CHECK: ===ASM TEMPLATE=== +// CHECK: nop +// CHECK: mov ${1:w}, ${0:w} +// CHECK: ===ASM TEMPLATE END=== +// CHECK: volatile: false +// CHECK: Output #0 Constraint (=&r): DeclRefExpr=tmp:27:9 +// CHECK: Input #0 Constraint (r): UnexposedExpr=tmp:27:9 +// CHECK: Clobber #0: cc +// CHECK: Clobber #1: memory +// CHECK: ===ASM END=== diff --git a/clang/test/Modules/pr118137.cppm b/clang/test/Modules/pr118137.cppm new file mode 100644 index 0000000000000..38e35399b05c0 --- /dev/null +++ b/clang/test/Modules/pr118137.cppm @@ -0,0 +1,24 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++23 %t/a.cppm -emit-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++23 %t/a.cppm -emit-reduced-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++23 %t/a.cppm -emit-llvm -o - + +//--- a.h +typedef int nghttp2_session_callbacks; + +//--- a.cppm +module; +#include "a.h" +export module g; +template +concept Deleter = requires(T ptr) { ptr; }; +template > struct Handle { + void GetRaw(this auto); +}; +struct SessionCallbacksDeleter + : Handle { +} Server_callbacks; +void Server() { Server_callbacks.GetRaw(); } diff --git a/clang/test/Modules/template-declare.cppm b/clang/test/Modules/template-declare.cppm new file mode 100644 index 0000000000000..01a7cca10e4ee --- /dev/null +++ b/clang/test/Modules/template-declare.cppm @@ -0,0 +1,39 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -dwarf-version=4 -debug-info-kind=constructor \ +// RUN: -emit-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -dwarf-version=4 -debug-info-kind=constructor \ +// RUN: -emit-module-interface -o %t/b.pcm -fmodule-file=a=%t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cpp -dwarf-version=4 -debug-info-kind=constructor \ +// RUN: -emit-llvm -o - -fmodule-file=a=%t/a.pcm -fmodule-file=b=%t/b.pcm | FileCheck %t/b.cpp + +//--- a.cppm +export module a; +export template +class a { +private: + T *data; + +public: + virtual T* getData(); +}; + +extern template class a; + +//--- b.cppm +export module b; +import a; +export struct b { + a v; +}; + +//--- b.cpp +module b; +extern "C" void func() { + b(); +} + +// It is fine enough to check that we won't crash. +// CHECK: define {{.*}}void @func() diff --git a/clang/test/OpenMP/declare_mapper_messages.c b/clang/test/OpenMP/declare_mapper_messages.c index 2238689227311..4631016698c7d 100644 --- a/clang/test/OpenMP/declare_mapper_messages.c +++ b/clang/test/OpenMP/declare_mapper_messages.c @@ -1,12 +1,12 @@ -// RUN: %clang_cc1 -verify=omp50,expected -fopenmp -fopenmp-version=50 -ferror-limit 100 -DOMP50 %s -// RUN: %clang_cc1 -verify=omp51,expected -fopenmp -ferror-limit 100 %s -// RUN: %clang_cc1 -verify=expected,omp52 -fopenmp -fopenmp-version=52 -ferror-limit 100 -DOMP52 %s -// RUN: %clang_cc1 -verify=expected,omp60 -fopenmp -fopenmp-version=60 -ferror-limit 100 -DOMP60 %s +// RUN: %clang_cc1 -verify=omp50,omp5x,expected -fopenmp -fopenmp-version=50 -ferror-limit 100 -DOMP50 %s +// RUN: %clang_cc1 -verify=omp51,omp5x,expected -fopenmp -ferror-limit 100 %s +// RUN: %clang_cc1 -verify=expected,omp52,omp5x -fopenmp -fopenmp-version=52 -ferror-limit 100 -DOMP52 %s +// RUN: %clang_cc1 -verify=expected,omp60,omp60-maybe-simd -fopenmp -fopenmp-version=60 -ferror-limit 100 -DOMP60 %s -// RUN: %clang_cc1 -verify=omp50,expected -fopenmp-simd -fopenmp-version=50 -ferror-limit 100 -DOMP50 %s -// RUN: %clang_cc1 -verify=omp51-simd,expected -fopenmp-simd -ferror-limit 100 %s -// RUN: %clang_cc1 -verify=expected,omp52 -fopenmp-simd -fopenmp-version=52 -ferror-limit 100 -DOMP52 %s -// RUN: %clang_cc1 -verify=expected,omp60-simd -fopenmp-simd -fopenmp-version=60 -ferror-limit 100 -DOMP60 %s +// RUN: %clang_cc1 -verify=omp50,omp5x,expected -fopenmp-simd -fopenmp-version=50 -ferror-limit 100 -DOMP50 %s +// RUN: %clang_cc1 -verify=omp51-simd,omp5x,expected -fopenmp-simd -ferror-limit 100 %s +// RUN: %clang_cc1 -verify=expected,omp52,omp5x -fopenmp-simd -fopenmp-version=52 -ferror-limit 100 -DOMP52 %s +// RUN: %clang_cc1 -verify=expected,omp60-simd,omp60-maybe-simd -fopenmp-simd -fopenmp-version=60 -ferror-limit 100 -DOMP60 %s int temp; // expected-note {{'temp' declared here}} @@ -16,17 +16,38 @@ struct vec { // expec double *data; }; -#pragma omp declare mapper // expected-error {{expected '(' after 'declare mapper'}} -#pragma omp declare mapper { // expected-error {{expected '(' after 'declare mapper'}} -#pragma omp declare mapper( // expected-error {{expected a type}} expected-error {{expected declarator on 'omp declare mapper' directive}} -#pragma omp declare mapper(# // expected-error {{expected a type}} expected-error {{expected declarator on 'omp declare mapper' directive}} -#pragma omp declare mapper(struct v // expected-error {{expected declarator on 'omp declare mapper' directive}} -#pragma omp declare mapper(struct vec // expected-error {{expected declarator on 'omp declare mapper' directive}} -#pragma omp declare mapper(S v // expected-error {{unknown type name 'S'}} -#pragma omp declare mapper(struct vec v // expected-error {{expected ')'}} expected-note {{to match this '('}} -#pragma omp declare mapper(aa:struct vec v) // expected-error {{expected at least one clause on '#pragma omp declare mapper' directive}} -#pragma omp declare mapper(bb:struct vec v) private(v) // expected-error {{expected at least one clause on '#pragma omp declare mapper' directive}} // expected-error {{unexpected OpenMP clause 'private' in directive '#pragma omp declare mapper'}} -#pragma omp declare mapper(cc:struct vec v) map(v) ( // expected-warning {{extra tokens at the end of '#pragma omp declare mapper' are ignored}} +// omp60-maybe-simd-error@+2 {{expected '(' after 'declare_mapper'}} +// omp5x-error@+1 {{expected '(' after 'declare mapper'}} +#pragma omp declare mapper +// omp60-maybe-simd-error@+2 {{expected '(' after 'declare_mapper'}} +// omp5x-error@+1 {{expected '(' after 'declare mapper'}} +#pragma omp declare mapper { +// expected-error@+2 {{expected a type}} +// expected-error@+1 {{expected declarator on 'omp declare mapper' directive}} +#pragma omp declare mapper( +// expected-error@+2 {{expected a type}} +// expected-error@+1 {{expected declarator on 'omp declare mapper' directive}} +#pragma omp declare mapper(# +// expected-error@+1 {{expected declarator on 'omp declare mapper' directive}} +#pragma omp declare mapper(struct v +// expected-error@+1 {{expected declarator on 'omp declare mapper' directive}} +#pragma omp declare mapper(struct vec +// expected-error@+1 {{unknown type name 'S'}} +#pragma omp declare mapper(S v +// expected-error@+2 {{expected ')'}} +// expected-note@+1 {{to match this '('}} +#pragma omp declare mapper(struct vec v +// omp60-maybe-simd-error@+2 {{expected at least one clause on '#pragma omp declare_mapper' directive}} +// omp5x-error@+1 {{expected at least one clause on '#pragma omp declare mapper' directive}} +#pragma omp declare mapper(aa:struct vec v) +// omp60-maybe-simd-error@+4 {{expected at least one clause on '#pragma omp declare_mapper' directive}} +// omp60-maybe-simd-error@+3 {{unexpected OpenMP clause 'private' in directive '#pragma omp declare_mapper'}} +// omp5x-error@+2 {{expected at least one clause on '#pragma omp declare mapper' directive}} +// omp5x-error@+1 {{unexpected OpenMP clause 'private' in directive '#pragma omp declare mapper'}} +#pragma omp declare mapper(bb:struct vec v) private(v) +// omp60-maybe-simd-warning@+2 {{extra tokens at the end of '#pragma omp declare_mapper' are ignored}} +// omp5x-warning@+1 {{extra tokens at the end of '#pragma omp declare mapper' are ignored}} +#pragma omp declare mapper(cc:struct vec v) map(v) ( #pragma omp declare mapper(++: struct vec v) map(v.len) // expected-error {{illegal OpenMP user-defined mapper identifier}} #pragma omp declare mapper(id1: struct vec v) map(v.len, temp) // expected-error {{only variable 'v' is allowed in map clauses of this 'omp declare mapper' directive}} @@ -58,7 +79,9 @@ int fun(int arg) { #pragma omp declare mapper(id: struct vec v) map(v.len) // expected-note {{previous definition is here}} #pragma omp declare mapper(id: struct vec v) map(v.len) // expected-error {{redefinition of user-defined mapper for type 'struct vec' with name 'id'}} { -#pragma omp declare mapper(id: struct vec v) map(v.len) allocate(v) // expected-error {{unexpected OpenMP clause 'allocate' in directive '#pragma omp declare mapper'}} +// omp60-maybe-simd-error@+2 {{unexpected OpenMP clause 'allocate' in directive '#pragma omp declare_mapper'}} +// omp5x-error@+1 {{unexpected OpenMP clause 'allocate' in directive '#pragma omp declare mapper'}} +#pragma omp declare mapper(id: struct vec v) map(v.len) allocate(v) struct vec vv, v1; struct vec arr[10]; double d; diff --git a/clang/test/OpenMP/declare_target_messages.cpp b/clang/test/OpenMP/declare_target_messages.cpp index 3c0e766cf72ca..4aa4d686eaaf3 100644 --- a/clang/test/OpenMP/declare_target_messages.cpp +++ b/clang/test/OpenMP/declare_target_messages.cpp @@ -11,14 +11,14 @@ // RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp45,omp45-to-51,omp45-to-51-var,omp45-to-51-clause,omp45-to-51-clause %{openmp45} %{limit} -o - %s // RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp5,ompvar,omp45-to-51,omp5-and-51,omp5-or-later,omp5-or-later-var,omp45-to-51-var,omp45-to-51-clause,host5,host-5-and-51,no-host5-and-51 %{openmp50} %{target_mac} %{limit} -o - %s -// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp52,ompvar,omp5-or-later,omp5-or-later-var %{openmp60} %{target_mac} %{limit} -o - %s +// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp60,omp52-or-later,ompvar,omp5-or-later,omp5-or-later-var %{openmp60} %{target_mac} %{limit} -o - %s // RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp5,ompvar,omp45-to-51,omp5-and-51,omp5-or-later,omp5-or-later-var,omp45-to-51-var,omp45-to-51-clause,host-5-and-51,no-host5-and-51,dev5 %{openmp50} -fopenmp-is-target-device %{target_mac} %{aux_triple} %{limit} -o - %s -// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp52,ompvar,omp5-or-later,omp5-or-later-var %{openmp60} -fopenmp-is-target-device %{target_mac} %{aux_triple} %{limit} -o - %s +// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp60,omp52-or-later,ompvar,omp5-or-later,omp5-or-later-var %{openmp60} -fopenmp-is-target-device %{target_mac} %{aux_triple} %{limit} -o - %s // RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp5,ompvar,omp45-to-51,omp5-and-51,omp5-or-later,omp5-or-later-var,omp45-to-51-var,omp45-to-51-clause,host5,host-5-and-51,no-host5-and-51 %{openmp50_simd} %{target_mac} %{limit} -o - %s -// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp52,ompvar,omp5-or-later,omp5-or-later-var %{openmp60_simd} %{target_mac} %{limit} -o - %s +// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp60,omp52-or-later,ompvar,omp5-or-later,omp5-or-later-var %{openmp60_simd} %{target_mac} %{limit} -o - %s // RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp5,ompvar,omp45-to-51,omp5-and-51,omp5-or-later,omp5-or-later-var,omp45-to-51-var,omp45-to-51-clause,host5,host-5-and-51,no-host5-and-51 %{openmp50_simd} -fopenmp-is-target-device %{target_mac} %{limit} -o - %s -// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp52,ompvar,omp5-or-later,omp5-or-later-var %{openmp60_simd} -fopenmp-is-target-device %{target_mac} %{limit} -o - %s +// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp60,omp52-or-later,ompvar,omp5-or-later,omp5-or-later-var %{openmp60_simd} -fopenmp-is-target-device %{target_mac} %{limit} -o - %s // RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp45,omp45-to-51,omp45-to-51-var,omp45-to-51-clause -fopenmp-version=45 -fopenmp-simd %{limit} -o - %s // RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp51,ompvar,omp45-to-51,omp5-and-51,omp5-or-later,omp5-or-later-var,omp45-to-51-var,omp45-to-51-clause,host-5-and-51,no-host5-and-51 -fopenmp %{limit} -o - %s @@ -26,12 +26,12 @@ // RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp51,ompvar,omp45-to-51,omp5-and-51,omp5-or-later,omp5-or-later-var,omp45-to-51-var,omp45-to-51-clause,host-5-and-51,no-host5-and-51 -fopenmp %{limit} -I%S/Inputs -DTESTENDINC=1 -o - %s // RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp51,ompvar,omp45-to-51,omp5-and-51,omp5-or-later,omp5-or-later-var,omp45-to-51-var,omp45-to-51-clause,host-5-and-51,no-host5-and-51 -fopenmp-simd %{limit} -o - %s -// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp52,ompvar,omp5-or-later,omp5-or-later-var %{openmp52} -DVERBOSE_MODE=1 %{limit} -o - %s -// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp52,ompvar,omp5-or-later,omp5-or-later-var %{openmp60} -DVERBOSE_MODE=1 %{limit} -o - %s +// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp52,omp52-or-later,ompvar,omp5-or-later,omp5-or-later-var %{openmp52} -DVERBOSE_MODE=1 %{limit} -o - %s +// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp60,omp52-or-later,ompvar,omp5-or-later,omp5-or-later-var %{openmp60} -DVERBOSE_MODE=1 %{limit} -o - %s // RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp5,ompvar,omp45-to-51,omp5-and-51,omp5-or-later,omp5-or-later-var,omp45-to-51-var,omp45-to-51-clause,host-5-and-51,no-host5-and-51 %{openmp50} %{limit} -o - %s // RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp51,ompvar,omp45-to-51,omp5-and-51,omp5-or-later,omp5-or-later-var,omp45-to-51-var,omp45-to-51-clause,host-5-and-51,no-host5-and-51 -fopenmp %{limit} -o - %s -// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp52,ompvar,omp5-or-later,omp5-or-later-var %{openmp60} %{limit} -o - %s +// RUN: %clang_cc1 %{common_opts_mac} -verify=expected,omp60,omp52-or-later,ompvar,omp5-or-later,omp5-or-later-var %{openmp60} %{limit} -o - %s #pragma omp begin declare target static int gg; @@ -39,7 +39,9 @@ static int gg; int recursive = recursive ^ 3 + gg; #pragma omp end declare target -// expected-error@+1 {{unexpected OpenMP directive '#pragma omp end declare target'}} +// omp60-error@+3 {{unexpected OpenMP directive '#pragma omp end declare_target'}} +// omp52-error@+2 {{unexpected OpenMP directive '#pragma omp end declare target'}} +// omp45-to-51-error@+1 {{unexpected OpenMP directive '#pragma omp end declare target'}} #pragma omp end declare target // ompvar-error@+1 {{variable captured in declare target region must appear in a to clause}} @@ -47,14 +49,20 @@ int a, b, z; // expected-note@+1 {{defined as threadprivate or thread local}} __thread int t; -// expected-error@+1 {{expected '(' after 'declare target'}} +// omp60-error@+3 {{expected '(' after 'declare_target'}} +// omp52-error@+2 {{expected '(' after 'declare target'}} +// omp45-to-51-error@+1 {{expected '(' after 'declare target'}} #pragma omp declare target . #pragma omp declare target void f(); -// expected-warning@+1 {{extra tokens at the end of '#pragma omp end declare target' are ignored}} +// omp60-warning@+3 {{extra tokens at the end of '#pragma omp end declare_target' are ignored}} +// omp52-warning@+2 {{extra tokens at the end of '#pragma omp end declare target' are ignored}} +// omp45-to-51-warning@+1 {{extra tokens at the end of '#pragma omp end declare target' are ignored}} #pragma omp end declare target shared(a) +// omp60-error@+10 {{unexpected 'map' clause, only 'enter', 'link', 'device_type' or 'indirect' clauses expected}} +// omp60-error@+9 {{expected at least one 'enter', 'link' or 'indirect' clause}} // omp52-error@+8 {{unexpected 'map' clause, only 'enter', 'link', 'device_type' or 'indirect' clauses expected}} // omp52-error@+7 {{expected at least one 'enter', 'link' or 'indirect' clause}} // omp51-error@+6 {{unexpected 'map' clause, only 'to', 'link', 'device_type' or 'indirect' clauses expected}} @@ -65,6 +73,8 @@ void f(); // omp45-error@+1 {{expected at least one 'to' or 'link' clause}} #pragma omp declare target map(a) +// omp60-error@+5 {{unexpected 'to' clause, use 'enter' instead}} +// omp60-error@+4 {{expected at least one 'enter', 'link' or 'indirect' clause}} // omp52-error@+3 {{unexpected 'to' clause, use 'enter' instead}} // omp52-error@+2 {{expected at least one 'enter', 'link' or 'indirect' clause}} // omp45-to-51-error@+1 {{use of undeclared identifier 'foo1'}} @@ -73,6 +83,8 @@ void f(); // expected-error@+1 {{use of undeclared identifier 'foo2'}} #pragma omp declare target link(foo2) +// omp60-error@+6 {{unexpected 'to' clause, use 'enter' instead}} +// omp60-error@+5 {{expected at least one 'enter', 'link' or 'indirect' clause}} // omp52-error@+4 {{unexpected 'to' clause, use 'enter' instead}} // omp52-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} // dev5-note@+2 {{marked as 'device_type(host)' here}} @@ -80,8 +92,8 @@ void f(); #pragma omp declare target to(f) device_type(host) void q(); -// omp52-error@+4 {{unexpected 'to' clause, use 'enter' instead}} -// omp52-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} +// omp52-or-later-error@+4 {{unexpected 'to' clause, use 'enter' instead}} +// omp52-or-later-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} // omp5-and-51-warning@+2 {{more than one 'device_type' clause is specified}} // omp45-error@+1 {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} #pragma omp declare target to(q) device_type(any) device_type(any) device_type(host) @@ -121,7 +133,7 @@ void c(); // expected-note@+1 {{'func' defined here}} void func() {} -// omp52-error@+5 {{unexpected 'allocate' clause, only 'enter', 'link', 'device_type' or 'indirect' clauses expected}} +// omp52-or-later-error@+5 {{unexpected 'allocate' clause, only 'enter', 'link', 'device_type' or 'indirect' clauses expected}} // omp51-error@+4 {{unexpected 'allocate' clause, only 'to', 'link', 'device_type' or 'indirect' clauses expected}} // omp5-error@+3 {{unexpected 'allocate' clause, only 'to', 'link' or 'device_type' clauses expected}} // expected-error@+2 {{function name is not allowed in 'link' clause}} @@ -171,7 +183,9 @@ void t2() { void abc(); #pragma omp end declare target void cba(); -// expected-error@+1 {{unexpected OpenMP directive '#pragma omp end declare target'}} +// omp60-error@+3 {{unexpected OpenMP directive '#pragma omp end declare_target'}} +// omp52-error@+2 {{unexpected OpenMP directive '#pragma omp end declare target'}} +// omp45-to-51-error@+1 {{unexpected OpenMP directive '#pragma omp end declare target'}} #pragma omp end declare target #pragma omp declare target @@ -234,7 +248,9 @@ void foo1() { #pragma omp end declare target #pragma omp end declare target -// expected-error@+1 {{unexpected OpenMP directive '#pragma omp end declare target'}} +// omp60-error@+3 {{unexpected OpenMP directive '#pragma omp end declare_target'}} +// omp52-error@+2 {{unexpected OpenMP directive '#pragma omp end declare target'}} +// omp45-to-51-error@+1 {{unexpected OpenMP directive '#pragma omp end declare target'}} #pragma omp end declare target int C::method() { @@ -255,18 +271,22 @@ int *y; int **w = &y; int main (int argc, char **argv) { int a = 2; -// expected-error@+1 {{unexpected OpenMP directive '#pragma omp declare target'}} +// omp60-error@+3 {{unexpected OpenMP directive '#pragma omp declare_target'}} +// omp52-error@+2 {{unexpected OpenMP directive '#pragma omp declare target'}} +// omp45-to-51-error@+1 {{unexpected OpenMP directive '#pragma omp declare target'}} #pragma omp declare target int v; -// expected-error@+1 {{unexpected OpenMP directive '#pragma omp end declare target'}} +// omp60-error@+3 {{unexpected OpenMP directive '#pragma omp end declare_target'}} +// omp52-error@+2 {{unexpected OpenMP directive '#pragma omp end declare target'}} +// omp45-to-51-error@+1 {{unexpected OpenMP directive '#pragma omp end declare target'}} #pragma omp end declare target foo(v); - // omp52-error@+2 {{expected at least one 'enter', 'link' or 'indirect' clause}} - // omp52-error@+1 {{unexpected 'to' clause, use 'enter' instead}} + // omp52-or-later-error@+2 {{expected at least one 'enter', 'link' or 'indirect' clause}} + // omp52-or-later-error@+1 {{unexpected 'to' clause, use 'enter' instead}} #pragma omp declare target to(foo3) link(w) - // omp52-error@+3 {{unexpected 'to' clause, use 'enter' instead}} - // omp52-error@+2 {{expected at least one 'enter', 'link' or 'indirect' clause}} + // omp52-or-later-error@+3 {{unexpected 'to' clause, use 'enter' instead}} + // omp52-or-later-error@+2 {{expected at least one 'enter', 'link' or 'indirect' clause}} // omp45-to-51-var-error@+1 {{local variable 'a' should not be used in 'declare target' directive}} #pragma omp declare target to(a) return (0); @@ -283,48 +303,48 @@ namespace { // expected-error@+1 {{'x' appears multiple times in clauses on the same declare target directive}} #pragma omp declare target (x, x) -// omp52-error@+3 {{unexpected 'to' clause, use 'enter' instead}} -// omp52-error@+2 {{expected at least one 'enter', 'link' or 'indirect' clause}} +// omp52-or-later-error@+3 {{unexpected 'to' clause, use 'enter' instead}} +// omp52-or-later-error@+2 {{expected at least one 'enter', 'link' or 'indirect' clause}} // omp45-to-51-clause-error@+1 {{'x' appears multiple times in clauses on the same declare target directive}} #pragma omp declare target to(x) to(x) // expected-error@+1 {{'x' must not appear in both clauses 'to' and 'link'}} #pragma omp declare target link(x) void bazz() {} -// omp52-error@+4 {{unexpected 'to' clause, use 'enter' instead}} -// omp52-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} +// omp52-or-later-error@+4 {{unexpected 'to' clause, use 'enter' instead}} +// omp52-or-later-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} // host5-note@+2 3 {{marked as 'device_type(nohost)' here}} // omp45-error@+1 {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} #pragma omp declare target to(bazz) device_type(nohost) void bazzz() {bazz();} -// omp52-error@+3 {{unexpected 'to' clause, use 'enter' instead}} -// omp52-error@+2 {{expected at least one 'enter', 'link' or 'indirect' clause}} +// omp52-or-later-error@+3 {{unexpected 'to' clause, use 'enter' instead}} +// omp52-or-later-error@+2 {{expected at least one 'enter', 'link' or 'indirect' clause}} // omp45-error@+1 {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} #pragma omp declare target to(bazzz) device_type(nohost) // host5-error@+1 {{function with 'device_type(nohost)' is not available on host}} void any() {bazz();} // host5-error@+1 {{function with 'device_type(nohost)' is not available on host}} void host1() {bazz();} -// omp52-error@+4 {{unexpected 'to' clause, use 'enter' instead}} -// omp52-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} +// omp52-or-later-error@+4 {{unexpected 'to' clause, use 'enter' instead}} +// omp52-or-later-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} // dev5-note@+2 3 {{marked as 'device_type(host)' here}} // omp45-error@+1 {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} #pragma omp declare target to(host1) device_type(host) //host5-error@+1 {{function with 'device_type(nohost)' is not available on host}} void host2() {bazz();} -// omp52-error@+2 {{unexpected 'to' clause, use 'enter' instead}} -// omp52-error@+1 {{expected at least one 'enter', 'link' or 'indirect' clause}} +// omp52-or-later-error@+2 {{unexpected 'to' clause, use 'enter' instead}} +// omp52-or-later-error@+1 {{expected at least one 'enter', 'link' or 'indirect' clause}} #pragma omp declare target to(host2) // dev5-error@+1 {{function with 'device_type(host)' is not available on device}} void device() {host1();} -// omp52-error@+4 {{unexpected 'to' clause, use 'enter' instead}} -// omp52-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} +// omp52-or-later-error@+4 {{unexpected 'to' clause, use 'enter' instead}} +// omp52-or-later-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} // host5-note@+2 2 {{marked as 'device_type(nohost)' here}} // omp45-error@+1 {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} #pragma omp declare target to(device) device_type(nohost) void host3() {host1();} // dev5-error {{function with 'device_type(host)' is not available on device}} -// omp52-error@+2 {{unexpected 'to' clause, use 'enter' instead}} -// omp52-error@+1 {{expected at least one 'enter', 'link' or 'indirect' clause}} +// omp52-or-later-error@+2 {{unexpected 'to' clause, use 'enter' instead}} +// omp52-or-later-error@+1 {{expected at least one 'enter', 'link' or 'indirect' clause}} #pragma omp declare target to(host3) #pragma omp declare target @@ -343,17 +363,17 @@ void any7() {device();} void any8() {any2();} int MultiDevTy; -// omp52-error@+3 {{unexpected 'to' clause, use 'enter' instead}} -// omp52-error@+2 {{expected at least one 'enter', 'link' or 'indirect' clause}} +// omp52-or-later-error@+3 {{unexpected 'to' clause, use 'enter' instead}} +// omp52-or-later-error@+2 {{expected at least one 'enter', 'link' or 'indirect' clause}} // omp45-error@+1 {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} #pragma omp declare target to(MultiDevTy) device_type(any) -// omp52-error@+4 {{unexpected 'to' clause, use 'enter' instead}} -// omp52-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} +// omp52-or-later-error@+4 {{unexpected 'to' clause, use 'enter' instead}} +// omp52-or-later-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} // host-5-and-51-error@+2 {{'device_type(host)' does not match previously specified 'device_type(any)' for the same declaration}} // omp45-error@+1 {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} #pragma omp declare target to(MultiDevTy) device_type(host) -// omp52-error@+4 {{unexpected 'to' clause, use 'enter' instead}} -// omp52-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} +// omp52-or-later-error@+4 {{unexpected 'to' clause, use 'enter' instead}} +// omp52-or-later-error@+3 {{expected at least one 'enter', 'link' or 'indirect' clause}} // no-host5-and-51-error@+2 {{'device_type(nohost)' does not match previously specified 'device_type(any)' for the same declaration}} // omp45-error@+1 {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} #pragma omp declare target to(MultiDevTy) device_type(nohost) @@ -414,6 +434,8 @@ target *S1 = &S; // expected-warning@+1 {{expected '#pragma omp end declare target' at end of file to match '#pragma omp declare target'}} #pragma omp declare target #else -// expected-warning@+1 {{expected '#pragma omp end declare target' at end of file to match '#pragma omp begin declare target'}} +// omp60-warning@+3 {{expected '#pragma omp end declare target' at end of file to match '#pragma omp begin declare_target'}} +// omp52-warning@+2 {{expected '#pragma omp end declare target' at end of file to match '#pragma omp begin declare target'}} +// omp45-to-51-warning@+1 {{expected '#pragma omp end declare target' at end of file to match '#pragma omp begin declare target'}} #pragma omp begin declare target #endif diff --git a/clang/test/OpenMP/declare_variant_clauses_messages.cpp b/clang/test/OpenMP/declare_variant_clauses_messages.cpp index aadded7699ea1..bca91481220ff 100644 --- a/clang/test/OpenMP/declare_variant_clauses_messages.cpp +++ b/clang/test/OpenMP/declare_variant_clauses_messages.cpp @@ -152,7 +152,7 @@ void vararg_bar2(const char *fmt) { return; } #pragma omp declare variant(foo_v1) match(construct={dispatch}) \ append_args(foobar(target)) -// expected-error@+2 {{directive '#pragma omp declare variant' cannot contain more than one 'append_args' clause}} +// expected-error@+2 {{directive '#pragma omp declare_variant' cannot contain more than one 'append_args' clause}} #pragma omp declare variant(foo_v1) match(construct={dispatch}) \ append_args(interop(target)) \ append_args(interop(targetsync)) diff --git a/clang/test/OpenMP/target_data_ast_print.cpp b/clang/test/OpenMP/target_data_ast_print.cpp index a41c7f1a0da53..3f939549dcb54 100644 --- a/clang/test/OpenMP/target_data_ast_print.cpp +++ b/clang/test/OpenMP/target_data_ast_print.cpp @@ -102,7 +102,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: T i, j, b, c, d, e, x[20]; // CHECK-NEXT: #pragma omp target data map(to: c){{$}} // CHECK-NEXT: i = argc; -// CHECK-NEXT: #pragma omp target data map(to: c) if(target data: j > 0) +// CHECK-NEXT: #pragma omp target data map(to: c) if(target{{[ _]}}data: j > 0) // CHECK-NEXT: foo(); // CHECK-NEXT: #pragma omp target data map(to: c) if(b) // CHECK-NEXT: foo(); @@ -140,7 +140,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: int i, j, b, c, d, e, x[20]; // CHECK-NEXT: #pragma omp target data map(to: c) // CHECK-NEXT: i = argc; -// CHECK-NEXT: #pragma omp target data map(to: c) if(target data: j > 0) +// CHECK-NEXT: #pragma omp target data map(to: c) if(target{{[ _]}}data: j > 0) // CHECK-NEXT: foo(); // CHECK-NEXT: #pragma omp target data map(to: c) if(b) // CHECK-NEXT: foo(); @@ -178,7 +178,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: char i, j, b, c, d, e, x[20]; // CHECK-NEXT: #pragma omp target data map(to: c) // CHECK-NEXT: i = argc; -// CHECK-NEXT: #pragma omp target data map(to: c) if(target data: j > 0) +// CHECK-NEXT: #pragma omp target data map(to: c) if(target{{[ _]}}data: j > 0) // CHECK-NEXT: foo(); // CHECK-NEXT: #pragma omp target data map(to: c) if(b) // CHECK-NEXT: foo(); @@ -225,7 +225,7 @@ int main (int argc, char **argv) { a=2; // CHECK-NEXT: a = 2; #pragma omp target data map(to: c) if (target data: b) -// CHECK: #pragma omp target data map(to: c) if(target data: b) +// CHECK: #pragma omp target data map(to: c) if(target{{[ _]}}data: b) foo(); // CHECK-NEXT: foo(); diff --git a/clang/test/OpenMP/target_map_messages.cpp b/clang/test/OpenMP/target_map_messages.cpp index 911031d5412a9..4a026584a47cb 100644 --- a/clang/test/OpenMP/target_map_messages.cpp +++ b/clang/test/OpenMP/target_map_messages.cpp @@ -1,35 +1,35 @@ // -fopenmp, -fno-openmp-extensions -// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,lt51,lt60,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla -// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla -// RUN: %clang_cc1 -verify=expected,ge50,ge51,lt60,omp,ge51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=51 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla -// RUN: %clang_cc1 -verify=expected,ge50,ge51,ge52,lt60,omp,ge52-omp,omp52 -fopenmp -fno-openmp-extensions -fopenmp-version=52 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla -// RUN: %clang_cc1 -verify=expected,ge50,ge52,ge60,omp,ge60-omp,omp60 -fopenmp -fno-openmp-extensions -fopenmp-version=60 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,lt51,lt60,omp,lt51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,ge51,lt60,omp,ge51-omp -fopenmp -fno-openmp-extensions -fopenmp-version=51 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,ge51,ge52,lt60,omp,ge52-omp,omp52 -fopenmp -fno-openmp-extensions -fopenmp-version=52 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,ge60,omp,ge60-omp,omp60 -fopenmp -fno-openmp-extensions -fopenmp-version=60 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -DCCODE -verify -fopenmp -fno-openmp-extensions -ferror-limit 300 -x c %s -Wno-openmp -Wuninitialized -Wno-vla // -fopenmp-simd, -fno-openmp-extensions -// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,lt51,lt60,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla -// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla -// RUN: %clang_cc1 -verify=expected,ge50,ge51,lt60,omp,ge51-omp -fopenmp-simd -fno-openmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,lt51,lt60,omp,lt51-omp -fopenmp-simd -fno-openmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,ge51,lt60,omp,ge51-omp -fopenmp-simd -fno-openmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -DCCODE -verify -fopenmp-simd -fno-openmp-extensions -ferror-limit 300 -x c %s -Wno-openmp-mapping -Wuninitialized -Wno-vla // -fopenmp -fopenmp-extensions -// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla -// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla -// RUN: %clang_cc1 -verify=expected,ge50,ge51,lt60,ompx,ge51-ompx -fopenmp -fopenmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,ge51,lt60,ompx,ge51-ompx -fopenmp -fopenmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -DCCODE -verify -fopenmp -fopenmp-extensions -ferror-limit 300 -x c %s -Wno-openmp -Wuninitialized -Wno-vla // -fopenmp-simd -fopenmp-extensions -// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=40 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -verify=expected,lt50,lt51,lt60,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=45 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla -// RUN: %clang_cc1 -verify=expected,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla -// RUN: %clang_cc1 -verify=expected,ge50,ge51,lt60,ompx,ge51-ompx -fopenmp-simd -fopenmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,lt51,lt60,ompx,lt51-ompx -fopenmp-simd -fopenmp-extensions -fopenmp-version=50 -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla +// RUN: %clang_cc1 -verify=expected,omp5x,ge50,ge51,lt60,ompx,ge51-ompx -fopenmp-simd -fopenmp-extensions -ferror-limit 300 %s -Wno-openmp-target -Wuninitialized -Wno-vla // RUN: %clang_cc1 -DCCODE -verify -fopenmp-simd -fopenmp-extensions -ferror-limit 300 -x c %s -Wno-openmp-mapping -Wuninitialized -Wno-vla // Check @@ -87,7 +87,7 @@ struct SA { {} #pragma omp target map(arg[2:2],a,d) // expected-error {{subscripted value is not an array or pointer}} {} - #pragma omp target map(arg,a*2) // lt50-error {{expected expression containing only member accesses and/or array sections based on named variables}} ge50-error {{expected addressable lvalue in 'map' clause}} + #pragma omp target map(arg,a*2) // lt50-error {{expected expression containing only member accesses and/or array sections based on named variables}} ge50-error {{expected addressable lvalue in 'map' clause}} ge60-error {{expected addressable lvalue in 'map' clause}} {} #pragma omp target map(arg,(c+1)[2]) // lt50-error {{expected expression containing only member accesses and/or array sections based on named variables}} {} @@ -170,27 +170,32 @@ struct SA { // expected-error@+1 {{use of undeclared identifier 'present'}} #pragma omp target map(present) {} + // ge60-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator', 'self}} // ge52-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(ompx_hold, tofrom: c,f) {} + // ge60-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator', 'self}} // ge52-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(ompx_hold, tofrom: c[1:2],f) {} + // ge60-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator', 'self}} // ge52-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(ompx_hold, tofrom: c,f[1:2]) {} + // ge60-error@+5 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator', 'self}} // ge52-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // expected-error@+3 {{section length is unspecified and cannot be inferred because subscripted value is not an array}} // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(ompx_hold, tofrom: c[:],f) {} + // ge60-error@+5 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator', 'self}} // ge52-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // expected-error@+3 {{section length is unspecified and cannot be inferred because subscripted value is not an array}} // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} @@ -211,12 +216,14 @@ struct SA { // lt51-error@+1 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(present, present, tofrom: a) {} + // ge60-error@+5 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator', 'self}} // ge52-error@+4 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // ompx-error@+3 {{same map type modifier has been specified more than once}} // ge51-omp-error@+2 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-omp-error@+1 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(ompx_hold, ompx_hold, tofrom: a) {} + // ge60-error@+10 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator', 'self}} // ge60-error@+9 {{same map type modifier has been specified more than once}} // ge52-error@+8 2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}} // expected-error@+7 2 {{same map type modifier has been specified more than once}} @@ -247,10 +254,11 @@ struct SA { // lt60-error@+1 {{missing map type}} #pragma omp target map( , f, : a) {} - #pragma omp target map(always close: a) // lt60-error {{missing map type}} ge52-error{{missing ',' after map type modifier}} + #pragma omp target map(always close: a) // lt60-error {{missing map type}} ge52-error{{missing ',' after map type modifier}} ge60-error {{missing ',' after map type modifier}} {} - #pragma omp target map(always close bf: a) // ge52-error 2 {{missing ',' after map type modifier}} expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}} + #pragma omp target map(always close bf: a) // ge52-error 2 {{missing ',' after map type modifier}} expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}} ge60-error 2 {{missing ',' after map type modifier}} {} + // ge60-error@+5 {{missing ',' after map type modifier}} // ge52-error@+4 {{missing ',' after map type modifier}} // ge51-error@+3 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}} // lt51-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} @@ -263,6 +271,7 @@ struct SA { // lt51-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(tofrom from: a) {} + // ge60-error@+6 {{missing ',' after map type modifier}} // ge60-note@+5 {{map type 'to' is previous specified here}} // ge60-error@+4 {{map type is already specified}} // ge52-error@+3 {{missing ',' after map type modifier}} @@ -270,7 +279,7 @@ struct SA { // lt51-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} #pragma omp target map(to always from: a) {} - #pragma omp target map(close bf: a) // ge52-error {{missing ',' after map type modifier}} expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}} + #pragma omp target map(close bf: a) // ge52-error {{missing ',' after map type modifier}} expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}} ge60-error {{missing ',' after map type modifier}} {} #pragma omp target map(([b[I]][bf])f) // lt50-error {{expected ',' or ']' in lambda capture list}} lt50-error {{expected ')'}} lt50-note {{to match this '('}} {} @@ -476,7 +485,7 @@ void SAclient(int arg) { {} #pragma omp target map(r.S.Arr[:12]) {} -#pragma omp target map(r.S.foo() [:12]) // lt50-error {{expected expression containing only member accesses and/or array sections based on named variables}} ge50-error {{expected addressable lvalue in 'map' clause}} +#pragma omp target map(r.S.foo() [:12]) // lt50-error {{expected expression containing only member accesses and/or array sections based on named variables}} ge50-error {{expected addressable lvalue in 'map' clause}} ge60-error {{expected addressable lvalue in 'map' clause}} {} #pragma omp target map(r.C, r.D) {} @@ -673,7 +682,7 @@ T tmain(T argc) { foo(); #pragma omp target map(T) // expected-error {{'T' does not refer to a value}} foo(); -#pragma omp target map(I) // lt50-error 2 {{expected expression containing only member accesses and/or array sections based on named variables}} ge50-error 2 {{expected addressable lvalue in 'map' clause}} +#pragma omp target map(I) // lt50-error 2 {{expected expression containing only member accesses and/or array sections based on named variables}} ge50-error 2 {{expected addressable lvalue in 'map' clause}} ge60-error 2 {{expected addressable lvalue in 'map' clause}} foo(); #pragma omp target map(S2::S2s) foo(); @@ -690,7 +699,7 @@ T tmain(T argc) { #pragma omp target map(to, x) foo(); #pragma omp target data map(to x) // expected-error {{expected ',' or ')' in 'map' clause}} -#pragma omp target data map(tofrom: argc > 0 ? x : y) // lt50-error 2 {{expected expression containing only member accesses and/or array sections based on named variables}} ge50-error 2 {{expected addressable lvalue in 'map' clause}} +#pragma omp target data map(tofrom: argc > 0 ? x : y) // lt50-error 2 {{expected expression containing only member accesses and/or array sections based on named variables}} ge50-error 2 {{expected addressable lvalue in 'map' clause}} ge60-error 2 {{expected addressable lvalue in 'map' clause}} #pragma omp target data map(argc) #pragma omp target data map(S1) // expected-error {{'S1' does not refer to a value}} #pragma omp target data map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} warn-warning 2 {{type 'const S2' is not trivially copyable and not guaranteed to be mapped correctly}} warn-warning 2 {{type 'const S3' is not trivially copyable and not guaranteed to be mapped correctly}} @@ -809,7 +818,11 @@ int main(int argc, char **argv) { SC1 s; SC1 *p; int Arr[10]; -#pragma omp target data map // expected-error {{expected '(' after 'map'}} lt50-error {{expected at least one 'map' or 'use_device_ptr' clause for '#pragma omp target data'}} ge50-error {{expected at least one 'map', 'use_device_ptr', or 'use_device_addr' clause for '#pragma omp target data'}} +// expected-error@+4 {{expected '(' after 'map'}} +// lt50-error@+3 {{expected at least one 'map' or 'use_device_ptr' clause for '#pragma omp target data'}} +// omp5x-error@+2 {{expected at least one 'map', 'use_device_ptr', or 'use_device_addr' clause for '#pragma omp target data'}} +// ge60-error@+1 {{expected at least one 'map', 'use_device_ptr', or 'use_device_addr' clause for '#pragma omp target_data'}} +#pragma omp target data map #pragma omp target data map( // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected expression}} #pragma omp target data map() // expected-error {{expected expression}} #pragma omp target data map(alloc) // expected-error {{use of undeclared identifier 'alloc'}} @@ -828,7 +841,7 @@ int main(int argc, char **argv) { #pragma omp target map(to, x) foo(); #pragma omp target data map(to x) // expected-error {{expected ',' or ')' in 'map' clause}} -#pragma omp target data map(tofrom: argc > 0 ? argv[1] : argv[2]) // lt50-error {{expected expression containing only member accesses and/or array sections based on named variables}} ge50-error {{expected addressable lvalue in 'map' clause}} +#pragma omp target data map(tofrom: argc > 0 ? argv[1] : argv[2]) // lt50-error {{expected expression containing only member accesses and/or array sections based on named variables}} ge50-error {{expected addressable lvalue in 'map' clause}} ge60-error {{expected addressable lvalue in 'map' clause}} #pragma omp target data map(argc) #pragma omp target data map(S1) // expected-error {{'S1' does not refer to a value}} #pragma omp target data map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} warn-warning {{type 'const S2' is not trivially copyable and not guaranteed to be mapped correctly}} warn-warning {{type 'const S3' is not trivially copyable and not guaranteed to be mapped correctly}} diff --git a/clang/test/Sema/builtins-bcd-transform.c b/clang/test/Sema/builtins-bcd-transform.c new file mode 100644 index 0000000000000..103a6be6452b5 --- /dev/null +++ b/clang/test/Sema/builtins-bcd-transform.c @@ -0,0 +1,30 @@ +// Testfile to verify the semantics and the error handling for BCD builtins national2packed, packed2zoned and zoned2packed. +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -target-feature +altivec -triple powerpc64-unknown-unknown -fsyntax-only -verify %s +// RUN: %clang_cc1 -target-feature +altivec -triple powerpc64le-unknown-unknown -fsyntax-only -verify %s +// RUN: %clang_cc1 -target-feature +altivec -triple powerpc-unknown-unknown -fsyntax-only -verify %s + +#include +vector unsigned char test_national2packed(void) +{ + vector unsigned char a = {1,2,3,4}; + vector unsigned char res_a = __builtin_ppc_national2packed(a, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}} + vector unsigned char res_b = __builtin_ppc_national2packed(a, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}} + return __builtin_ppc_national2packed(a, 0); +} + +vector unsigned char test_packed2zoned(void) +{ + vector unsigned char a = {1,2,3,4}; + vector unsigned char res_a = __builtin_ppc_packed2zoned(a,2); // expected-error-re {{argument value {{.*}} is outside the valid range}} + vector unsigned char res_b = __builtin_ppc_packed2zoned(a, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}} + return __builtin_ppc_packed2zoned(a,1); +} + +vector unsigned char test_zoned2packed(void) +{ + vector unsigned char a = {1,2,3,4}; + vector unsigned char res_a = __builtin_ppc_zoned2packed(a,2); // expected-error-re {{argument value {{.*}} is outside the valid range}} + vector unsigned char res_b = __builtin_ppc_zoned2packed(a, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}} + return __builtin_ppc_zoned2packed(a,0); +} \ No newline at end of file diff --git a/clang/test/SemaCXX/cxx2c-trivially-relocatable.cpp b/clang/test/SemaCXX/cxx2c-trivially-relocatable.cpp index 7152a5937d9b7..6f4003f525930 100644 --- a/clang/test/SemaCXX/cxx2c-trivially-relocatable.cpp +++ b/clang/test/SemaCXX/cxx2c-trivially-relocatable.cpp @@ -410,3 +410,39 @@ C& C::operator=(const C&) = default; static_assert (!__builtin_is_cpp_trivially_relocatable(C)); static_assert (!__builtin_is_replaceable(C)); } + +namespace GH144232 { + +struct E trivially_relocatable_if_eligible replaceable_if_eligible { + E (E &&); + E &operator= (E &&) = default; +}; + +struct F trivially_relocatable_if_eligible replaceable_if_eligible { + F (F &&) = default; + F &operator= (F &&); +}; + +struct G trivially_relocatable_if_eligible replaceable_if_eligible { G (G const &) = default; }; + +struct I trivially_relocatable_if_eligible replaceable_if_eligible { I &operator= (const I &) = default; }; + +struct J trivially_relocatable_if_eligible replaceable_if_eligible { J (J const &); }; +struct K trivially_relocatable_if_eligible replaceable_if_eligible { K (K const &); }; + + + +static_assert (__builtin_is_replaceable (E)); +static_assert (__builtin_is_cpp_trivially_relocatable(E)); +static_assert (__builtin_is_replaceable (F)); +static_assert (__builtin_is_cpp_trivially_relocatable(F)); +static_assert (__builtin_is_replaceable (G)); +static_assert (__builtin_is_cpp_trivially_relocatable(G)); +static_assert (__builtin_is_replaceable (I)); +static_assert (__builtin_is_cpp_trivially_relocatable(I)); +static_assert (__builtin_is_replaceable (J)); +static_assert (__builtin_is_cpp_trivially_relocatable(J)); +static_assert (__builtin_is_replaceable (K)); +static_assert (__builtin_is_cpp_trivially_relocatable(K)); + +} diff --git a/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp b/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp index 329b611110c1d..cf33ac283ab42 100644 --- a/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp +++ b/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp @@ -20,6 +20,28 @@ struct is_trivially_copyable { template constexpr bool is_trivially_copyable_v = __is_trivially_copyable(T); + +template +struct is_assignable { + static constexpr bool value = __is_assignable(T, U); +}; + +template +constexpr bool is_assignable_v = __is_assignable(T, U); + +template +struct is_empty { + static constexpr bool value = __is_empty(T); +}; +template +constexpr bool is_empty_v = __is_empty(T); + +template +struct is_standard_layout { +static constexpr bool value = __is_standard_layout(T); +}; +template +constexpr bool is_standard_layout_v = __is_standard_layout(T); #endif #ifdef STD2 @@ -44,6 +66,37 @@ using is_trivially_copyable = __details_is_trivially_copyable; template constexpr bool is_trivially_copyable_v = __is_trivially_copyable(T); + +template +struct __details_is_assignable { + static constexpr bool value = __is_assignable(T, U); +}; + +template +using is_assignable = __details_is_assignable; + +template +constexpr bool is_assignable_v = __is_assignable(T, U); + +template +struct __details_is_empty { + static constexpr bool value = __is_empty(T); +}; +template +using is_empty = __details_is_empty; +template +constexpr bool is_empty_v = __is_empty(T); + +template +struct __details_is_standard_layout { +static constexpr bool value = __is_standard_layout(T); + + +}; +template +using is_standard_layout = __details_is_standard_layout; +template +constexpr bool is_standard_layout_v = __is_standard_layout(T); #endif @@ -73,6 +126,29 @@ using is_trivially_copyable = __details_is_trivially_copyable; template constexpr bool is_trivially_copyable_v = is_trivially_copyable::value; + +template +struct __details_is_assignable : bool_constant<__is_assignable(T, U)> {}; + +template +using is_assignable = __details_is_assignable; + +template +constexpr bool is_assignable_v = is_assignable::value; + +template +struct __details_is_empty : bool_constant<__is_empty(T)> {}; +template +using is_empty = __details_is_empty; +template +constexpr bool is_empty_v = is_empty::value; + +template +struct __details_is_standard_layout : bool_constant<__is_standard_layout(T)> {}; +template +using is_standard_layout = __details_is_standard_layout; +template +constexpr bool is_standard_layout_v = is_standard_layout::value; #endif } @@ -100,6 +176,41 @@ static_assert(std::is_trivially_copyable_v); // expected-note@-1 {{because it is a reference type}} + // Direct tests + static_assert(std::is_standard_layout::value); + static_assert(std::is_standard_layout_v); + + static_assert(std::is_standard_layout::value); + // expected-error-re@-1 {{static assertion failed due to requirement 'std::{{.*}}is_standard_layout::value'}} \ + // expected-note@-1 {{'int &' is not standard-layout}} \ + // expected-note@-1 {{because it is a reference type}} + + static_assert(std::is_standard_layout_v); + // expected-error@-1 {{static assertion failed due to requirement 'std::is_standard_layout_v'}} \ + // expected-note@-1 {{'int &' is not standard-layout}} \ + // expected-note@-1 {{because it is a reference type}} + +static_assert(!std::is_empty::value); + +static_assert(std::is_empty::value); +// expected-error-re@-1 {{static assertion failed due to requirement 'std::{{.*}}is_empty::value'}} \ +// expected-note@-1 {{'int &' is not empty}} \ +// expected-note@-1 {{because it is a reference type}} +static_assert(std::is_empty_v); +// expected-error@-1 {{static assertion failed due to requirement 'std::is_empty_v'}} \ +// expected-note@-1 {{'int &' is not empty}} \ +// expected-note@-1 {{because it is a reference type}} + + +static_assert(std::is_assignable::value); + +static_assert(std::is_assignable::value); +// expected-error-re@-1 {{static assertion failed due to requirement 'std::{{.*}}is_assignable::value'}} \ +// expected-error@-1 {{assigning to 'int' from incompatible type 'void'}} +static_assert(std::is_assignable_v); +// expected-error@-1 {{static assertion failed due to requirement 'std::is_assignable_v'}} \ +// expected-error@-1 {{assigning to 'int' from incompatible type 'void'}} + namespace test_namespace { using namespace std; static_assert(is_trivially_relocatable::value); @@ -119,6 +230,32 @@ namespace test_namespace { // expected-error@-1 {{static assertion failed due to requirement 'is_trivially_copyable_v'}} \ // expected-note@-1 {{'int &' is not trivially copyable}} \ // expected-note@-1 {{because it is a reference type}} + + static_assert(is_standard_layout::value); + // expected-error-re@-1 {{static assertion failed due to requirement '{{.*}}is_standard_layout::value'}} \ + // expected-note@-1 {{'int &' is not standard-layout}} \ + // expected-note@-1 {{because it is a reference type}} + + static_assert(is_standard_layout_v); + // expected-error@-1 {{static assertion failed due to requirement 'is_standard_layout_v'}} \ + // expected-note@-1 {{'int &' is not standard-layout}} \ + // expected-note@-1 {{because it is a reference type}} + + static_assert(is_assignable::value); + // expected-error-re@-1 {{static assertion failed due to requirement '{{.*}}is_assignable::value'}} \ + // expected-error@-1 {{assigning to 'int' from incompatible type 'void'}} + static_assert(is_assignable_v); + // expected-error@-1 {{static assertion failed due to requirement 'is_assignable_v'}} \ + // expected-error@-1 {{assigning to 'int' from incompatible type 'void'}} + + static_assert(is_empty::value); + // expected-error-re@-1 {{static assertion failed due to requirement '{{.*}}is_empty::value'}} \ + // expected-note@-1 {{'int &' is not empty}} \ + // expected-note@-1 {{because it is a reference type}} + static_assert(is_empty_v); + // expected-error@-1 {{static assertion failed due to requirement 'is_empty_v'}} \ + // expected-note@-1 {{'int &' is not empty}} \ + // expected-note@-1 {{because it is a reference type}} } @@ -139,6 +276,14 @@ concept C2 = std::is_trivially_copyable_v; // #concept4 template void g2(); // #cand4 +template +requires std::is_assignable::value void f4(); // #cand7 + +template +concept C4 = std::is_assignable_v; // #concept8 + +template T> void g4(); // #cand8 + void test() { f(); // expected-error@-1 {{no matching function for call to 'f'}} \ @@ -169,6 +314,19 @@ void test() { // expected-note@#concept4 {{because 'std::is_trivially_copyable_v' evaluated to false}} \ // expected-note@#concept4 {{'int &' is not trivially copyable}} \ // expected-note@#concept4 {{because it is a reference type}} + + f4(); + // expected-error@-1 {{no matching function for call to 'f4'}} \ + // expected-note@#cand7 {{candidate template ignored: constraints not satisfied [with T = int &, U = void]}} \ + // expected-note-re@#cand7 {{because '{{.*}}is_assignable::value' evaluated to false}} \ + // expected-error@#cand7 {{assigning to 'int' from incompatible type 'void'}} + + g4(); + // expected-error@-1 {{no matching function for call to 'g4'}} \ + // expected-note@#cand8 {{candidate template ignored: constraints not satisfied [with T = int &]}} \ + // expected-note@#cand8 {{because 'C4' evaluated to false}} \ + // expected-note@#concept8 {{because 'std::is_assignable_v' evaluated to false}} \ + // expected-error@#concept8 {{assigning to 'int' from incompatible type 'void'}} } } diff --git a/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp b/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp index 5210354a66d43..cc923d206ab35 100644 --- a/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp +++ b/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp @@ -488,3 +488,282 @@ static_assert(__is_trivially_copyable(S12)); // expected-note@-1 {{'S12' is not trivially copyable}} \ // expected-note@#tc-S12 {{'S12' defined here}} } + +namespace assignable { +struct S1; +static_assert(__is_assignable(S1&, const S1&)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(assignable::S1 &, const assignable::S1 &)'}} \ +// expected-error@-1 {{no viable overloaded '='}} \ +// expected-note@-1 {{type 'S1' is incomplete}} + +static_assert(__is_assignable(void, int)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(void, int)'}} \ +// expected-error@-1 {{expression is not assignable}} + +static_assert(__is_assignable(int, int)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(int, int)'}} \ +// expected-error@-1 {{expression is not assignable}} + +static_assert(__is_assignable(int*, int)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(int *, int)'}} \ +// expected-error@-1 {{expression is not assignable}} + +static_assert(__is_assignable(int[], int)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(int[], int)'}} \ +// expected-error@-1 {{expression is not assignable}} + +static_assert(__is_assignable(int&, void)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(int &, void)'}} \ +// expected-error@-1 {{assigning to 'int' from incompatible type 'void'}} + +static_assert(__is_assignable(int*&, float*)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(int *&, float *)'}} \ +// expected-error@-1 {{incompatible pointer types assigning to 'int *' from 'float *'}} + +static_assert(__is_assignable(const int&, int)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(const int &, int)'}} \ +// expected-error@-1 {{read-only variable is not assignable}} + +struct S2 {}; // #a-S2 +static_assert(__is_assignable(const S2, S2)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(const assignable::S2, assignable::S2)'}} \ +// expected-error@-1 {{no viable overloaded '='}} \ +// expected-note@#a-S2 {{candidate function (the implicit copy assignment operator) not viable: 'this' argument has type 'const S2', but method is not marked const}} \ +// expected-note@#a-S2 {{candidate function (the implicit move assignment operator) not viable: 'this' argument has type 'const S2', but method is not marked const}} \ +// expected-note@#a-S2 {{'S2' defined here}} + +struct S3 { // #a-S3 + S3& operator=(const S3&) = delete; // #aca-S3 + S3& operator=(S3&&) = delete; // #ama-S3 +}; +static_assert(__is_assignable(S3, const S3&)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(assignable::S3, const assignable::S3 &)'}} \ +// expected-error@-1 {{overload resolution selected deleted operator '='}} \ +// expected-note@#aca-S3 {{candidate function has been explicitly deleted}} \ +// expected-note@#ama-S3 {{candidate function not viable: 1st argument ('const S3') would lose const qualifier}} \ +// expected-note@#a-S3 {{'S3' defined here}} +static_assert(__is_assignable(S3, S3&&)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(assignable::S3, assignable::S3 &&)'}} \ +// expected-error@-1 {{overload resolution selected deleted operator '='}} \ +// expected-note@#aca-S3 {{candidate function has been explicitly deleted}} \ +// expected-note@#ama-S3 {{candidate function has been explicitly deleted}} \ +// expected-note@#a-S3 {{'S3' defined here}} + +class C1 { // #a-C1 + C1& operator=(const C1&) = default; + C1& operator=(C1&&) = default; // #ama-C1 +}; +static_assert(__is_assignable(C1, C1)); +// expected-error@-1 {{static assertion failed due to requirement '__is_assignable(assignable::C1, assignable::C1)'}} \ +// expected-error@-1 {{'operator=' is a private member of 'assignable::C1'}} \ +// expected-note@#ama-C1 {{implicitly declared private here}} \ +// expected-note@#a-C1 {{'C1' defined here}} +} + +namespace is_empty_tests { + // Non-static data member. + struct A { int x; }; // #e-A + static_assert(__is_empty(A)); + // expected-error@-1 {{static assertion failed due to requirement '__is_empty(is_empty_tests::A)'}} \ + // expected-note@-1 {{'A' is not empty}} \ + // expected-note@-1 {{because it has a non-static data member 'x' of type 'int'}} \ + // expected-note@#e-A {{'A' defined here}} + + // Reference member. + struct R {int &r; }; // #e-R + static_assert(__is_empty(R)); + // expected-error@-1 {{static assertion failed due to requirement '__is_empty(is_empty_tests::R)'}} \ + // expected-note@-1 {{'R' is not empty}} \ + // expected-note@-1 {{because it has a non-static data member 'r' of type 'int &'}} \ + // expected-note@#e-R {{'R' defined here}} + + // Virtual function. + struct VirtualFunc {virtual void f(); }; // #e-VirtualFunc + static_assert(__is_empty(VirtualFunc)); + // expected-error@-1 {{static assertion failed due to requirement '__is_empty(is_empty_tests::VirtualFunc)'}} \ + // expected-note@-1 {{'VirtualFunc' is not empty}} \ + // expected-note@-1 {{because it has a virtual function 'f'}} \ + // expected-note@#e-VirtualFunc {{'VirtualFunc' defined here}} + + // Virtual base class. + struct EB {}; + struct VB: virtual EB {}; // #e-VB + static_assert(__is_empty(VB)); + // expected-error@-1 {{static assertion failed due to requirement '__is_empty(is_empty_tests::VB)'}} \ + // expected-note@-1 {{'VB' is not empty}} \ + // expected-note@-1 {{because it has a virtual base 'EB'}} \ + // expected-note@#e-VB {{'VB' defined here}} + + // Non-empty base class. + struct Base { int b; }; // #e-Base + struct Derived : Base {}; // #e-Derived + static_assert(__is_empty(Derived)); + // expected-error@-1 {{static assertion failed due to requirement '__is_empty(is_empty_tests::Derived)'}} \ + // expected-note@-1 {{'Derived' is not empty}} \ + // expected-note@-1 {{because it has a base class 'Base' that is not empty}} \ + // expected-note@#e-Derived {{'Derived' defined here}} + + // Combination of the above. + struct Multi : Base, virtual EB { // #e-Multi + int z; + virtual void g(); + }; + static_assert(__is_empty(Multi)); + // expected-error@-1 {{static assertion failed due to requirement '__is_empty(is_empty_tests::Multi)'}} \ + // expected-note@-1 {{'Multi' is not empty}} \ + // expected-note@-1 {{because it has a non-static data member 'z' of type 'int'}} \ + // expected-note@-1 {{because it has a virtual function 'g'}} \ + // expected-note@-1 {{because it has a base class 'Base' that is not empty}} \ + // expected-note@-1 {{because it has a virtual base 'EB'}} \ + // expected-note@#e-Multi {{'Multi' defined here}} + + // Zero-width bit-field. + struct BitField { int : 0; }; // #e-BitField + static_assert(__is_empty(BitField)); // no diagnostics + + // Dependent bit-field width. + template + struct DependentBitField { int : N; }; // #e-DependentBitField + + static_assert(__is_empty(DependentBitField<0>)); // no diagnostics + + static_assert(__is_empty(DependentBitField<2>)); + // expected-error@-1 {{static assertion failed due to requirement '__is_empty(is_empty_tests::DependentBitField<2>)'}} \ + // expected-note@-1 {{'DependentBitField<2>' is not empty}} \ + // expected-note@-1 {{because it field '' is a non-zero-length bit-field}} \ + // expected-note@#e-DependentBitField {{'DependentBitField<2>' defined here}} + +} + +namespace standard_layout_tests { +struct WithVirtual { // #sl-Virtual + virtual void foo(); // #sl-Virtual-Foo +}; +static_assert(__is_standard_layout(WithVirtual)); +// expected-error@-1 {{static assertion failed due to requirement '__is_standard_layout(standard_layout_tests::WithVirtual)'}} \ +// expected-note@-1 {{'WithVirtual' is not standard-layout}} \ +// expected-note@-1 {{because it has a virtual function 'foo'}} \ +// expected-note@#sl-Virtual-Foo {{'foo' defined here}} \ +// expected-note@#sl-Virtual {{'WithVirtual' defined here}} + +struct MixedAccess { // #sl-Mixed +public: + int a; // #sl-MixedF1 +private: + int b; // #sl-MixedF2 +}; +static_assert(__is_standard_layout(MixedAccess)); +// expected-error@-1 {{static assertion failed due to requirement '__is_standard_layout(standard_layout_tests::MixedAccess)'}} \ +// expected-note@-1 {{'MixedAccess' is not standard-layout}} \ +// expected-note@-1 {{because it has mixed access specifiers}} \ +// expected-note@#sl-MixedF1 {{'a' defined here}} +// expected-note@#sl-MixedF2 {{field 'b' has a different access specifier than field 'a'}} +// expected-note@#sl-Mixed {{'MixedAccess' defined here}} + +struct VirtualBase { virtual ~VirtualBase(); }; // #sl-VirtualBase +struct VB : virtual VirtualBase {}; // #sl-VB +static_assert(__is_standard_layout(VB)); +// expected-error@-1 {{static assertion failed due to requirement '__is_standard_layout(standard_layout_tests::VB)'}} \ +// expected-note@-1 {{'VB' is not standard-layout}} \ +// expected-note@-1 {{because it has a virtual base 'VirtualBase'}} \ +// expected-note@-1 {{because it has a non-standard-layout base 'VirtualBase'}} \ +// expected-note@-1 {{because it has a virtual function '~VB'}} \ +// expected-note@#sl-VB {{'VB' defined here}} +// expected-note@#sl-VB {{'~VB' defined here}} + +union U { // #sl-U +public: + int x; // #sl-UF1 +private: + int y; // #sl-UF2 +}; +static_assert(__is_standard_layout(U)); +// expected-error@-1 {{static assertion failed due to requirement '__is_standard_layout(standard_layout_tests::U)'}} \ +// expected-note@-1 {{'U' is not standard-layout}} \ +// expected-note@-1 {{because it has mixed access specifiers}} +// expected-note@#sl-UF1 {{'x' defined here}} +// expected-note@#sl-UF2 {{field 'y' has a different access specifier than field 'x'}} +// expected-note@#sl-U {{'U' defined here}} + +// Single base class is OK +struct BaseClass{ int a; }; // #sl-BaseClass +struct DerivedOK : BaseClass {}; // #sl-DerivedOK +static_assert(__is_standard_layout(DerivedOK)); + +// Primitive types should be standard layout +static_assert(__is_standard_layout(int)); // #sl-Int +static_assert(__is_standard_layout(float)); // #sl-Float + +// Multi-level inheritance: Non-standard layout +struct Base1 { int a; }; // #sl-Base1 +struct Base2 { int b; }; // #sl-Base2 +struct DerivedClass : Base1, Base2 {}; // #sl-DerivedClass +static_assert(__is_standard_layout(DerivedClass)); +// expected-error@-1 {{static assertion failed due to requirement '__is_standard_layout(standard_layout_tests::DerivedClass)'}} \ +// expected-note@-1 {{'DerivedClass' is not standard-layout}} \ +// expected-note@-1 {{because it has multiple base classes with data members}} \ +// expected-note@#sl-DerivedClass {{'DerivedClass' defined here}} + +// Inheritance hierarchy with multiple classes having data members +struct BaseA { int a; }; // #sl-BaseA +struct BaseB : BaseA {}; // inherits BaseA, has no new members +struct BaseC: BaseB { int c; }; // #sl-BaseC +static_assert(__is_standard_layout(BaseC)); +// expected-error@-1 {{static assertion failed due to requirement '__is_standard_layout(standard_layout_tests::BaseC)'}} \ +// expected-note@-1 {{'BaseC' is not standard-layout}} \ +// expected-note@-1 {{because it has an indirect base 'BaseA' with data members}} \ +// expected-note@#sl-BaseC {{'BaseC' defined here}} \ +// Multiple direct base classes with no data members --> standard layout +struct BaseX {}; // #sl-BaseX +struct BaseY {}; // #sl-BaseY +struct MultiBase : BaseX, BaseY {}; // #sl-MultiBase +static_assert(__is_standard_layout(MultiBase)); + +struct A { + int x; +}; + +struct B : A { +}; +// Indirect base with data members +struct C : B { int y; }; // #sl-C +static_assert(__is_standard_layout(C)); +// expected-error@-1 {{static assertion failed due to requirement '__is_standard_layout(standard_layout_tests::C)'}} \ +// expected-note@-1 {{'C' is not standard-layout}} \ +// expected-note@-1 {{because it has an indirect base 'A' with data members}} \ +// expected-note@#sl-C {{'C' defined here}} + +struct D { + union { int a; float b; }; + }; // #sl-D +static_assert(__is_standard_layout(D)); // no diagnostics + +// E inherits D but adds a new member +struct E : D { int x; }; // #sl-E +static_assert(__is_standard_layout(E)); +// expected-error@-1 {{static assertion failed due to requirement '__is_standard_layout(standard_layout_tests::E)'}} \ +// expected-note@-1 {{'E' is not standard-layout}} \ +// expected-note@-1 {{because it has an indirect base 'D' with data members}} \ +// expected-note@#sl-E {{'E' defined here}} + +// F inherits D but only an unnamed bitfield +// This should still fail because F ends up with a +// base class with a data member and its own unnamed bitfield +// which is not allowed in standard layout +struct F : D { int : 0; }; // #sl-F +static_assert(__is_standard_layout(F)); +// expected-error@-1 {{static assertion failed due to requirement '__is_standard_layout(standard_layout_tests::F)'}} \ +// expected-note@-1 {{'F' is not standard-layout}} \ +// expected-note@#sl-F {{'F' defined here}} + +struct Empty {}; +struct G { Empty a, b; }; // #sl-G +static_assert(__is_standard_layout(G)); // no diagnostics + +struct H { Empty a; int x; }; // #sl-H +static_assert(__is_standard_layout(H)); // no diagnostics + + struct I { Empty a; int : 0; int x; }; // #sl-I +static_assert(__is_standard_layout(I)); // no diagnostics +} + diff --git a/clang/tools/c-index-test/c-index-test.c b/clang/tools/c-index-test/c-index-test.c index 4a887cd0c1e2e..cb3245756a394 100644 --- a/clang/tools/c-index-test/c-index-test.c +++ b/clang/tools/c-index-test/c-index-test.c @@ -1988,6 +1988,51 @@ static enum CXChildVisitResult PrintDeclAttributes(CXCursor cursor, CXCursor p, return CXChildVisit_Continue; } +/******************************************************************************/ +/* Inline assembly cursor testing */ +/******************************************************************************/ + +static enum CXChildVisitResult +PrintGCCInlineAssembly(CXCursor cursor, CXCursor p, CXClientData d) { + CXString Constraint, Template, Clobber; + CXCursor Expr; + unsigned hasGoto, i, e; + if (clang_getCursorKind(cursor) != CXCursor_AsmStmt) + return CXChildVisit_Recurse; + + hasGoto = clang_Cursor_isGCCAssemblyHasGoto(cursor); + printf("===ASM TEMPLATE%s===\n", hasGoto ? " (WITH GOTO)" : ""); + Template = clang_Cursor_getGCCAssemblyTemplate(cursor); + printf("%s", clang_getCString(Template)); + clang_disposeString(Template); + printf("\n===ASM TEMPLATE END===\n"); + + printf("volatile: %s\n", + clang_Cursor_isGCCAssemblyVolatile(cursor) ? "true" : "false"); + + for (i = 0, e = clang_Cursor_getGCCAssemblyNumOutputs(cursor); i < e; ++i) { + clang_Cursor_getGCCAssemblyOutput(cursor, i, &Constraint, &Expr); + printf("Output #%d Constraint (%s): ", i, clang_getCString(Constraint)); + PrintCursor(Expr, NULL); + printf("\n"); + clang_disposeString(Constraint); + } + for (i = 0, e = clang_Cursor_getGCCAssemblyNumInputs(cursor); i < e; ++i) { + clang_Cursor_getGCCAssemblyInput(cursor, i, &Constraint, &Expr); + printf("Input #%d Constraint (%s): ", i, clang_getCString(Constraint)); + PrintCursor(Expr, NULL); + printf("\n"); + clang_disposeString(Constraint); + } + for (i = 0, e = clang_Cursor_getGCCAssemblyNumClobbers(cursor); i < e; ++i) { + Clobber = clang_Cursor_getGCCAssemblyClobber(cursor, i); + printf("Clobber #%d: %s\n", i, clang_getCString(Clobber)); + clang_disposeString(Clobber); + } + printf("===ASM END===\n"); + return CXChildVisit_Recurse; +} + /******************************************************************************/ /* Target information testing. */ /******************************************************************************/ @@ -5010,6 +5055,7 @@ static void print_usage(void) { " c-index-test -test-annotate-tokens= {}*\n" " c-index-test -test-inclusion-stack-source {}*\n" " c-index-test -test-inclusion-stack-tu \n"); + fprintf(stderr, " c-index-test -test-inline-assembly \n"); fprintf(stderr, " c-index-test -test-print-linkage-source {}*\n" " c-index-test -test-print-visibility {}*\n" @@ -5167,6 +5213,10 @@ int cindextest_main(int argc, const char **argv) { else if (argc > 2 && strstr(argv[1], "-single-symbol-sgf-for=") == argv[1]) return perform_test_single_symbol_sgf(argv[1], argc - 2, argv + 2); + if (argc > 2 && strstr(argv[1], "-test-inline-assembly") == argv[1]) + return perform_test_load_source(argc - 2, argv + 2, "all", + PrintGCCInlineAssembly, NULL); + print_usage(); return 1; } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 3068621d9c004..e239ffae547aa 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -8648,6 +8648,100 @@ void clang_annotateTokens(CXTranslationUnit TU, CXToken *Tokens, } } +//===----------------------------------------------------------------------===// +// Operations for querying information of a GCC inline assembly block under a +// cursor. +//===----------------------------------------------------------------------===// +CXString clang_Cursor_getGCCAssemblyTemplate(CXCursor Cursor) { + if (!clang_isStatement(Cursor.kind)) + return cxstring::createEmpty(); + if (auto const *S = dyn_cast_or_null(getCursorStmt(Cursor))) { + ASTContext const &C = getCursorContext(Cursor); + std::string AsmTemplate = S->generateAsmString(C); + return cxstring::createDup(AsmTemplate); + } + return cxstring::createEmpty(); +} + +unsigned clang_Cursor_isGCCAssemblyHasGoto(CXCursor Cursor) { + if (!clang_isStatement(Cursor.kind)) + return 0; + if (auto const *S = dyn_cast_or_null(getCursorStmt(Cursor))) + return S->isAsmGoto(); + return 0; +} + +unsigned clang_Cursor_getGCCAssemblyNumOutputs(CXCursor Cursor) { + if (!clang_isStatement(Cursor.kind)) + return 0; + if (auto const *S = dyn_cast_or_null(getCursorStmt(Cursor))) + return S->getNumOutputs(); + return 0; +} + +unsigned clang_Cursor_getGCCAssemblyNumInputs(CXCursor Cursor) { + if (!clang_isStatement(Cursor.kind)) + return 0; + if (auto const *S = dyn_cast_or_null(getCursorStmt(Cursor))) + return S->getNumInputs(); + return 0; +} + +unsigned clang_Cursor_getGCCAssemblyInput(CXCursor Cursor, unsigned Index, + CXString *Constraint, + CXCursor *ExprCursor) { + if (!clang_isStatement(Cursor.kind) || !Constraint || !ExprCursor) + return 0; + if (auto const *S = dyn_cast_or_null(getCursorStmt(Cursor)); + S && Index < S->getNumInputs()) { + *Constraint = cxstring::createDup(S->getInputConstraint(Index)); + *ExprCursor = MakeCXCursor(S->getInputExpr(Index), getCursorDecl(Cursor), + cxcursor::getCursorTU(Cursor)); + return 1; + } + return 0; +} + +unsigned clang_Cursor_getGCCAssemblyOutput(CXCursor Cursor, unsigned Index, + CXString *Constraint, + CXCursor *ExprCursor) { + if (!clang_isStatement(Cursor.kind) || !Constraint || !ExprCursor) + return 0; + if (auto const *S = dyn_cast_or_null(getCursorStmt(Cursor)); + S && Index < S->getNumOutputs()) { + *Constraint = cxstring::createDup(S->getOutputConstraint(Index)); + *ExprCursor = MakeCXCursor(S->getOutputExpr(Index), getCursorDecl(Cursor), + cxcursor::getCursorTU(Cursor)); + return 1; + } + return 0; +} + +unsigned clang_Cursor_getGCCAssemblyNumClobbers(CXCursor Cursor) { + if (!clang_isStatement(Cursor.kind)) + return 0; + if (auto const *S = dyn_cast_or_null(getCursorStmt(Cursor))) + return S->getNumClobbers(); + return 0; +} + +CXString clang_Cursor_getGCCAssemblyClobber(CXCursor Cursor, unsigned Index) { + if (!clang_isStatement(Cursor.kind)) + return cxstring::createEmpty(); + if (auto const *S = dyn_cast_or_null(getCursorStmt(Cursor)); + S && Index < S->getNumClobbers()) + return cxstring::createDup(S->getClobber(Index)); + return cxstring::createEmpty(); +} + +unsigned clang_Cursor_isGCCAssemblyVolatile(CXCursor Cursor) { + if (!clang_isStatement(Cursor.kind)) + return 0; + if (auto const *S = dyn_cast_or_null(getCursorStmt(Cursor))) + return S->isVolatile(); + return 0; +} + //===----------------------------------------------------------------------===// // Operations for querying linkage of a cursor. //===----------------------------------------------------------------------===// diff --git a/clang/tools/libclang/libclang.map b/clang/tools/libclang/libclang.map index f08d13c3da9e1..d140a71e771a0 100644 --- a/clang/tools/libclang/libclang.map +++ b/clang/tools/libclang/libclang.map @@ -441,6 +441,15 @@ LLVM_20 { LLVM_21 { global: clang_getFullyQualifiedName; + clang_Cursor_getGCCAssemblyTemplate; + clang_Cursor_isGCCAssemblyHasGoto; + clang_Cursor_getGCCAssemblyNumOutputs; + clang_Cursor_getGCCAssemblyNumInputs; + clang_Cursor_getGCCAssemblyInput; + clang_Cursor_getGCCAssemblyOutput; + clang_Cursor_getGCCAssemblyNumClobbers; + clang_Cursor_getGCCAssemblyClobber; + clang_Cursor_isGCCAssemblyVolatile; }; # Example of how to add a new symbol version entry. If you do add a new symbol diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index c0633ba3c29b3..a05bf8305716b 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -25768,6 +25768,21 @@ TEST_F(FormatTest, OperatorPassedAsAFunctionPtr) { verifyFormat("foo(operator, , -42);", Style); } +TEST_F(FormatTest, LineSpliceWithTrailingWhitespace) { + auto Style = getLLVMStyle(); + Style.AlignEscapedNewlines = FormatStyle::ENAS_DontAlign; + Style.UseTab = FormatStyle::UT_Never; + + verifyFormat("int i;", " \\ \n" + " int i;"); + verifyFormat("#define FOO(args) \\\n" + " struct a {};", + "#define FOO( args ) \\ \n" + "struct a{\\\t\t\t\n" + " };", + Style); +} + TEST_F(FormatTest, WhitespaceSensitiveMacros) { FormatStyle Style = getLLVMStyle(); Style.WhitespaceSensitiveMacros.push_back("FOO"); diff --git a/clang/unittests/Format/QualifierFixerTest.cpp b/clang/unittests/Format/QualifierFixerTest.cpp index 3eae39f267c3e..f42f2e307f713 100644 --- a/clang/unittests/Format/QualifierFixerTest.cpp +++ b/clang/unittests/Format/QualifierFixerTest.cpp @@ -1122,14 +1122,17 @@ TEST_F(QualifierFixerTest, IsQualifierType) { } TEST_F(QualifierFixerTest, IsMacro) { - auto Tokens = annotate("INT INTPR Foo int"); ASSERT_EQ(Tokens.size(), 5u) << Tokens; - EXPECT_TRUE(isPossibleMacro(Tokens[0])); EXPECT_TRUE(isPossibleMacro(Tokens[1])); EXPECT_FALSE(isPossibleMacro(Tokens[2])); EXPECT_FALSE(isPossibleMacro(Tokens[3])); + + Tokens = annotate("FOO::BAR"); + ASSERT_EQ(Tokens.size(), 4u) << Tokens; + EXPECT_FALSE(isPossibleMacro(Tokens[0])); + EXPECT_FALSE(isPossibleMacro(Tokens[2])); } TEST_F(QualifierFixerTest, OverlappingQualifier) { diff --git a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp index c80a8fd0e52b1..18fedd4de3973 100644 --- a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp +++ b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp @@ -16,7 +16,6 @@ using namespace llvm; using namespace clang; namespace { -using LocSeq = SourceLocationSequence; // Convert a single source location into encoded form and back. // If ExpectedEncoded is provided, verify the encoded value too. @@ -34,37 +33,9 @@ void roundTrip(SourceLocation::UIntTy Loc, ASSERT_EQ(DecodedEncoded, Loc) << "Decoding " << ActualEncoded; } -// As above, but use sequence encoding for a series of locations. -void roundTrip(std::vector Locs, - std::vector ExpectedEncoded = {}) { - std::vector ActualEncoded; - { - LocSeq::State Seq; - for (auto L : Locs) - ActualEncoded.push_back(SourceLocationEncoding::encode( - SourceLocation::getFromRawEncoding(L), /*BaseOffset=*/0, - /*BaseModuleFileIndex=*/0, Seq)); - if (!ExpectedEncoded.empty()) { - ASSERT_EQ(ActualEncoded, ExpectedEncoded) - << "Encoding " << testing::PrintToString(Locs); - } - } - std::vector DecodedEncoded; - { - LocSeq::State Seq; - for (auto L : ActualEncoded) { - SourceLocation Loc = SourceLocationEncoding::decode(L, Seq).first; - DecodedEncoded.push_back(Loc.getRawEncoding()); - } - ASSERT_EQ(DecodedEncoded, Locs) - << "Decoding " << testing::PrintToString(ActualEncoded); - } -} - constexpr SourceLocation::UIntTy MacroBit = 1 << (sizeof(SourceLocation::UIntTy) * CHAR_BIT - 1); constexpr SourceLocation::UIntTy Big = MacroBit >> 1; -constexpr SourceLocation::UIntTy Biggest = -1; TEST(SourceLocationEncoding, Individual) { roundTrip(1, 2); @@ -77,33 +48,4 @@ TEST(SourceLocationEncoding, Individual) { roundTrip(MacroBit | (Big + 1)); } -TEST(SourceLocationEncoding, Sequence) { - roundTrip({1, 2, 3, 3, 2, 1}, - {2, // 1 - 5, // +2 (+1 of non-raw) - 5, // +2 - 1, // +0 - 4, // -2 - 4} // -2 - ); - roundTrip({100, 0, 100}, - {200, // 100 - 0, // 0 - 1} // +0 - ); - - roundTrip({1, Big}, {2, ((Big - 1) << 2) + 1}); - roundTrip({2, MacroBit | Big}, {4, ((Big - 1) << 2) - 1}); - - roundTrip({3, MacroBit | 5, MacroBit | 4, 3}, - {6, // 3 - 11, // +5 (+2 of non-raw + set macro bit) - 4, // -2 - 6} // -3 (-2 of non-raw, clear macro bit) - ); - - roundTrip( - {123 | MacroBit, 1, 9, Biggest, Big, Big + 1, 0, MacroBit | Big, 0}); -} - } // namespace diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake index 86e19e08270d7..45a2f5c0a61fc 100644 --- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake +++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake @@ -582,6 +582,24 @@ macro(add_compiler_rt_script name) DESTINATION ${COMPILER_RT_INSTALL_BINARY_DIR}) endmacro(add_compiler_rt_script src name) + +macro(add_compiler_rt_cfg target_name file_name component arch) + set(src_file "${CMAKE_CURRENT_SOURCE_DIR}/${file_name}") + get_compiler_rt_output_dir(${arch} output_dir) + set(dst_file "${output_dir}/${file_name}") + add_custom_command(OUTPUT ${dst_file} + DEPENDS ${src_file} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src_file} ${dst_file} + COMMENT "Copying ${file_name}...") + add_custom_target(${target_name} DEPENDS ${dst_file}) + install(FILES ${file_name} + DESTINATION ${COMPILER_RT_INSTALL_LIBRARY_DIR} + COMPONENT ${component}) + add_dependencies(${component} ${target_name}) + + set_target_properties(${target_name} PROPERTIES FOLDER "Compiler-RT Misc") +endmacro() + # Builds custom version of libc++ and installs it in . # Can be used to build sanitized versions of libc++ for running unit tests. # add_custom_libcxx( diff --git a/compiler-rt/lib/asan/CMakeLists.txt b/compiler-rt/lib/asan/CMakeLists.txt index e2f39f224df9c..97cc5c85703e1 100644 --- a/compiler-rt/lib/asan/CMakeLists.txt +++ b/compiler-rt/lib/asan/CMakeLists.txt @@ -281,6 +281,8 @@ else() PARENT_TARGET asan) endif() + # On AIX, we only need the static libraries. + if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") foreach(arch ${ASAN_SUPPORTED_ARCH}) if (COMPILER_RT_HAS_VERSION_SCRIPT) if(WIN32) @@ -382,10 +384,21 @@ else() endif() endif() endforeach() + endif() endif() add_compiler_rt_resource_file(asan_ignorelist asan_ignorelist.txt asan) +# On AIX, static sanitizer libraries are not added to the DSO, so we need to put +# asan.link_with_main_exec.txt and asan_cxx.link_with_main_exec.txt to the build +# and install dir for use in resolving undefined sanitizer symbols at runtime. +if (${CMAKE_SYSTEM_NAME} MATCHES "AIX") + foreach(arch ${ASAN_SUPPORTED_ARCH}) + add_compiler_rt_cfg(asan_symbols_${arch} asan.link_with_main_exec.txt asan ${arch}) + add_compiler_rt_cfg(asan_cxx_symbols_${arch} asan_cxx.link_with_main_exec.txt asan ${arch}) + endforeach() +endif() + add_subdirectory(scripts) if(COMPILER_RT_INCLUDE_TESTS) diff --git a/compiler-rt/lib/asan/asan.link_with_main_exec.txt b/compiler-rt/lib/asan/asan.link_with_main_exec.txt new file mode 100644 index 0000000000000..5efc48c262369 --- /dev/null +++ b/compiler-rt/lib/asan/asan.link_with_main_exec.txt @@ -0,0 +1,115 @@ +#! . +__asan_report_load_n +__asan_loadN +__asan_report_load1 +__asan_load1 +__asan_report_load2 +__asan_load2 +__asan_report_load4 +__asan_load4 +__asan_report_load8 +__asan_load8 +__asan_report_load16 +__asan_load16 +__asan_report_store_n +__asan_storeN +__asan_report_store1 +__asan_store1 +__asan_report_store2 +__asan_store2 +__asan_report_store4 +__asan_store4 +__asan_report_store8 +__asan_store8 +__asan_report_store16 +__asan_store16 +__asan_report_exp_load_n +__asan_exp_loadN +__asan_report_exp_load1 +__asan_exp_load1 +__asan_report_exp_load2 +__asan_exp_load2 +__asan_report_exp_load4 +__asan_exp_load4 +__asan_report_exp_load8 +__asan_exp_load8 +__asan_report_exp_load16 +__asan_exp_load16 +__asan_report_exp_store_n +__asan_exp_storeN +__asan_report_exp_store1 +__asan_exp_store1 +__asan_report_exp_store2 +__asan_exp_store2 +__asan_report_exp_store4 +__asan_exp_store4 +__asan_report_exp_store8 +__asan_exp_store8 +__asan_report_exp_store16 +__asan_exp_store16 +__asan_memmove +__asan_memcpy +__asan_memset +__asan_handle_no_return +__sanitizer_ptr_cmp +__sanitizer_ptr_sub +__asan_before_dynamic_init +__asan_after_dynamic_init +__asan_register_globals +__asan_unregister_globals +__asan_register_image_globals +__asan_unregister_image_globals +__asan_register_elf_globals +__asan_unregister_elf_globals +__asan_init +__asan_version_mismatch_check_v8 +__asan_stack_malloc_0 +__asan_stack_malloc_1 +__asan_stack_malloc_2 +__asan_stack_malloc_3 +__asan_stack_malloc_4 +__asan_stack_malloc_5 +__asan_stack_malloc_6 +__asan_stack_malloc_7 +__asan_stack_malloc_8 +__asan_stack_malloc_9 +__asan_stack_malloc_10 +__asan_stack_malloc_always_0 +__asan_stack_malloc_always_1 +__asan_stack_malloc_always_2 +__asan_stack_malloc_always_3 +__asan_stack_malloc_always_4 +__asan_stack_malloc_always_5 +__asan_stack_malloc_always_6 +__asan_stack_malloc_always_7 +__asan_stack_malloc_always_8 +__asan_stack_malloc_always_9 +__asan_stack_malloc_always_10 +__asan_stack_free_0 +__asan_stack_free_1 +__asan_stack_free_2 +__asan_stack_free_3 +__asan_stack_free_4 +__asan_stack_free_5 +__asan_stack_free_6 +__asan_stack_free_7 +__asan_stack_free_8 +__asan_stack_free_9 +__asan_stack_free_10 +__asan_set_shadow_00 +__asan_set_shadow_01 +__asan_set_shadow_02 +__asan_set_shadow_03 +__asan_set_shadow_04 +__asan_set_shadow_05 +__asan_set_shadow_06 +__asan_set_shadow_07 +__asan_set_shadow_f1 +__asan_set_shadow_f2 +__asan_set_shadow_f3 +__asan_set_shadow_f5 +__asan_set_shadow_f8 +__asan_poison_stack_memory +__asan_unpoison_stack_memory +__asan_option_detect_stack_use_after_return +__asan_shadow_memory_dynamic_address diff --git a/compiler-rt/lib/asan/asan_cxx.link_with_main_exec.txt b/compiler-rt/lib/asan/asan_cxx.link_with_main_exec.txt new file mode 100644 index 0000000000000..7387f8173e859 --- /dev/null +++ b/compiler-rt/lib/asan/asan_cxx.link_with_main_exec.txt @@ -0,0 +1,21 @@ +#! . +_ZdaPv +_ZdaPvRKSt9nothrow_t +_ZdaPvSt11align_val_t +_ZdaPvSt11align_val_tRKSt9nothrow_t +_ZdaPvm +_ZdaPvmSt11align_val_t +_ZdlPv +_ZdlPvRKSt9nothrow_t +_ZdlPvSt11align_val_t +_ZdlPvSt11align_val_tRKSt9nothrow_t +_ZdlPvm +_ZdlPvmSt11align_val_t +_Znam +_ZnamRKSt9nothrow_t +_ZnamSt11align_val_t +_ZnamSt11align_val_tRKSt9nothrow_t +_Znwm +_ZnwmRKSt9nothrow_t +_ZnwmSt11align_val_t +_ZnwmSt11align_val_tRKSt9nothrow_t diff --git a/compiler-rt/lib/asan/asan_descriptions.cpp b/compiler-rt/lib/asan/asan_descriptions.cpp index c9f3e4d682d95..0c30959b23e28 100644 --- a/compiler-rt/lib/asan/asan_descriptions.cpp +++ b/compiler-rt/lib/asan/asan_descriptions.cpp @@ -211,10 +211,10 @@ bool GetStackAddressInformation(uptr addr, uptr access_size, descr->frame_pc = access.frame_pc; descr->frame_descr = access.frame_descr; -#if SANITIZER_PPC64V1 - // On PowerPC64 ELFv1, the address of a function actually points to a - // three-doubleword data structure with the first field containing - // the address of the function's code. +#if SANITIZER_PPC64V1 || SANITIZER_AIX + // On PowerPC64 ELFv1 or AIX, the address of a function actually points to a + // three-doubleword (or three-word for 32-bit AIX) data structure with + // the first field containing the address of the function's code. descr->frame_pc = *reinterpret_cast(descr->frame_pc); #endif descr->frame_pc += 16; @@ -444,6 +444,16 @@ AddressDescription::AddressDescription(uptr addr, uptr access_size, data.kind = kAddressKindShadow; return; } + + // Check global first. On AIX, some global data defined in shared libraries + // are put to the STACK region for unknown reasons. Check global first can + // workaround this issue. + // TODO: Look into whether there's a different solution to this problem. + if (GetGlobalAddressInformation(addr, access_size, &data.global)) { + data.kind = kAddressKindGlobal; + return; + } + if (GetHeapAddressInformation(addr, access_size, &data.heap)) { data.kind = kAddressKindHeap; return; @@ -461,10 +471,6 @@ AddressDescription::AddressDescription(uptr addr, uptr access_size, return; } - if (GetGlobalAddressInformation(addr, access_size, &data.global)) { - data.kind = kAddressKindGlobal; - return; - } data.kind = kAddressKindWild; data.wild.addr = addr; data.wild.access_size = access_size; diff --git a/compiler-rt/test/tsan/java_heap_init2.cpp b/compiler-rt/test/tsan/java_heap_init2.cpp new file mode 100644 index 0000000000000..2e5724d930e8f --- /dev/null +++ b/compiler-rt/test/tsan/java_heap_init2.cpp @@ -0,0 +1,34 @@ +// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s +// XFAIL: * + +#include "java.h" +#include +#include + +int main() { + // Test a non-regular kHeapSize + // Previously __tsan_java_init failed because it encountered non-zero meta + // shadow for the destination. + size_t const kPageSize = sysconf(_SC_PAGESIZE); + int const kSize = kPageSize - 1; + jptr jheap2 = (jptr)mmap(0, kSize, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + if (jheap2 == (jptr)MAP_FAILED) + return printf("mmap failed with %d\n", errno); + __atomic_store_n((int *)(jheap2 + kSize - 3), 1, __ATOMIC_RELEASE); + // Due to the previous incorrect meta-end calculation, the following munmap + // did not clear the tail meta shadow. + munmap((void *)jheap2, kSize); + int const kHeapSize2 = kSize + 1; + jheap2 = (jptr)mmap((void *)jheap2, kHeapSize2, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + if (jheap2 == (jptr)MAP_FAILED) + return printf("second mmap failed with %d\n", errno); + __tsan_java_init(jheap2, kHeapSize2); + __tsan_java_move(jheap2, jheap2 + kHeapSize2 - 8, 8); + fprintf(stderr, "DONE\n"); + return __tsan_java_fini(); +} + +// CHECK-NOT: WARNING: ThreadSanitizer: data race +// CHECK: DONE diff --git a/compiler-rt/test/tsan/munmap_clear_shadow.c b/compiler-rt/test/tsan/munmap_clear_shadow.c new file mode 100644 index 0000000000000..8a435a84258f5 --- /dev/null +++ b/compiler-rt/test/tsan/munmap_clear_shadow.c @@ -0,0 +1,59 @@ +// RUN: %clang_tsan %s -o %t && %run %t | FileCheck %s +// XFAIL: * + +#include "test.h" +#include +#include +#include +#include +#include +#include + +void __tsan_read1(void *addr); + +struct thread_params { + char *buf; + unsigned int size; +}; + +static void *thread_func(void *arg) { + struct thread_params *p = (struct thread_params *)arg; + // Access 1 + p->buf[0] = 0x42; + p->buf[p->size - 1] = 0x42; + barrier_wait(&barrier); + return 0; +} + +int main() { + const unsigned int kPageSize = sysconf(_SC_PAGESIZE); + // The relevant shadow memory size should be exactly multiple of kPageSize, + // even if Size = kPageSize - 1. + const unsigned int Size = kPageSize - 1; + + barrier_init(&barrier, 2); + char *buf = (char *)mmap(NULL, Size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(buf != MAP_FAILED); + assert(((uintptr_t)buf % kPageSize) == 0); + + pthread_t t; + struct thread_params p = {buf, Size}; + pthread_create(&t, 0, thread_func, &p); + + barrier_wait(&barrier); + // Should clear all the shadow memory related to the mmaped memory. + munmap(buf, Size); + + // If the shadow memory is cleared completely, the following reads should not + // cause races and behave the same. However, previously, __tsan_read1(&buf[0]) + // would not report a race, while __tsan_read1(&buf[Size - 1]) did. + // CHECK-NOT: WARNING: ThreadSanitizer: data race + __tsan_read1(&buf[0]); // Access 2 + __tsan_read1(&buf[Size - 1]); // Access 2 + pthread_join(t, 0); + + puts("DONE"); + + return 0; +} diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md index 36be369595ffd..35da8323e0a10 100644 --- a/flang/docs/ReleaseNotes.md +++ b/flang/docs/ReleaseNotes.md @@ -54,6 +54,11 @@ page](https://llvm.org/releases/). now be emitted into Clang's per-target resource directory (next to libclang_rt.*.*) where it is also found by Flang's driver. + * Flang on AArch64 now always depends on compiler-rt to provide the + `__trampoline_setup` function. This dependency will be automatically added + in in-tree builds with the AArch64 target, but compiler-rt will need to be + manually added to LLVM builds when building flang out-of-tree. + ## New Issues Found diff --git a/flang/include/flang/Optimizer/Dialect/FIRTypes.td b/flang/include/flang/Optimizer/Dialect/FIRTypes.td index 6fad77dffd9bc..0ead54df3ca97 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRTypes.td +++ b/flang/include/flang/Optimizer/Dialect/FIRTypes.td @@ -330,7 +330,8 @@ def fir_RecordType : FIR_Type<"Record", "type"> { let extraClassDeclaration = [{ using TypePair = std::pair; - using TypeList = std::vector; + using TypeList = llvm::ArrayRef; + using TypeVector = llvm::SmallVector; TypeList getTypeList() const; TypeList getLenParamList() const; diff --git a/flang/include/flang/Support/OpenMP-features.h b/flang/include/flang/Support/OpenMP-features.h index 349cd19c1224f..5e722930ae1b2 100644 --- a/flang/include/flang/Support/OpenMP-features.h +++ b/flang/include/flang/Support/OpenMP-features.h @@ -15,16 +15,8 @@ namespace Fortran::common { template void setOpenMPMacro(int version, FortranPredefinitions &predefinitions) { switch (version) { - case 20: - predefinitions.emplace_back("_OPENMP", "200011"); - break; - case 25: - predefinitions.emplace_back("_OPENMP", "200505"); - break; - case 30: - predefinitions.emplace_back("_OPENMP", "200805"); - break; case 31: + default: predefinitions.emplace_back("_OPENMP", "201107"); break; case 40: @@ -45,10 +37,6 @@ void setOpenMPMacro(int version, FortranPredefinitions &predefinitions) { case 60: predefinitions.emplace_back("_OPENMP", "202411"); break; - case 11: - default: - predefinitions.emplace_back("_OPENMP", "199911"); - break; } } } // namespace Fortran::common diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 2603a3f6dc643..07d6814da8671 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -26,6 +26,7 @@ #include "clang/Driver/Driver.h" #include "clang/Driver/OptionUtils.h" #include "clang/Driver/Options.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Frontend/Debug/Options.h" @@ -44,6 +45,7 @@ #include #include #include +#include using namespace Fortran::frontend; @@ -1140,11 +1142,43 @@ static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args, res.getLangOpts().OpenMPVersion = 31; res.getFrontendOpts().features.Enable( Fortran::common::LanguageFeature::OpenMP); - if (int Version = getLastArgIntValue( - args, clang::driver::options::OPT_fopenmp_version_EQ, - res.getLangOpts().OpenMPVersion, diags)) { - res.getLangOpts().OpenMPVersion = Version; + if (auto *arg = + args.getLastArg(clang::driver::options::OPT_fopenmp_version_EQ)) { + llvm::ArrayRef ompVersions = llvm::omp::getOpenMPVersions(); + unsigned oldVersions[] = {11, 20, 25, 30}; + unsigned version = 0; + + auto reportBadVersion = [&](llvm::StringRef value) { + const unsigned diagID = + diags.getCustomDiagID(clang::DiagnosticsEngine::Error, + "'%0' is not a valid OpenMP version in '%1', " + "valid versions are %2"); + std::string buffer; + llvm::raw_string_ostream versions(buffer); + llvm::interleaveComma(ompVersions, versions); + + diags.Report(diagID) << value << arg->getAsString(args) << versions.str(); + }; + + llvm::StringRef value = arg->getValue(); + if (!value.getAsInteger(/*radix=*/10, version)) { + if (llvm::is_contained(ompVersions, version)) { + res.getLangOpts().OpenMPVersion = version; + } else if (llvm::is_contained(oldVersions, version)) { + const unsigned diagID = + diags.getCustomDiagID(clang::DiagnosticsEngine::Warning, + "OpenMP version %0 is no longer supported, " + "assuming version %1"); + std::string assumed = std::to_string(res.getLangOpts().OpenMPVersion); + diags.Report(diagID) << value << assumed; + } else { + reportBadVersion(value); + } + } else { + reportBadVersion(value); + } } + if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) { res.getLangOpts().OpenMPForceUSM = 1; } diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 336a6f82319e6..8506b9a984e58 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -68,8 +68,8 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" -#include "mlir/IR/StateStack.h" #include "mlir/Parser/Parser.h" +#include "mlir/Support/StateStack.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 8575d8cf352fd..60b6366c184d4 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -39,7 +39,7 @@ #include "flang/Support/OpenMP-utils.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" -#include "mlir/IR/StateStack.h" +#include "mlir/Support/StateStack.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp index 78571f1f4bc2d..2ff1d6d945ba3 100644 --- a/flang/lib/Optimizer/Dialect/FIRType.cpp +++ b/flang/lib/Optimizer/Dialect/FIRType.cpp @@ -261,6 +261,8 @@ mlir::Type dyn_cast_ptrOrBoxEleTy(mlir::Type t) { } static bool hasDynamicSize(fir::RecordType recTy) { + if (recTy.getLenParamList().empty()) + return false; for (auto field : recTy.getTypeList()) { if (auto arr = mlir::dyn_cast(field.second)) { if (sequenceWithNonConstantShape(arr)) @@ -1006,7 +1008,7 @@ mlir::Type fir::RecordType::parse(mlir::AsmParser &parser) { return {}; RecordType result = RecordType::get(parser.getContext(), name); - RecordType::TypeList lenParamList; + RecordType::TypeVector lenParamList; if (!parser.parseOptionalLParen()) { while (true) { llvm::StringRef lenparam; @@ -1024,7 +1026,7 @@ mlir::Type fir::RecordType::parse(mlir::AsmParser &parser) { return {}; } - RecordType::TypeList typeList; + RecordType::TypeVector typeList; if (!parser.parseOptionalLess()) { result.pack(true); } diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp index f7efaa736a279..33f687db08f9a 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp @@ -326,11 +326,14 @@ class DeclareOpConversion : public mlir::OpRewritePattern { auto genHlfirBox = [&]() -> mlir::Value { if (auto baseBoxType = mlir::dyn_cast(firBase.getType())) { - // Rebox so that lower bounds are correct. + // Rebox so that lower bounds and attributes are correct. if (baseBoxType.isAssumedRank()) return builder.create( loc, hlfirBaseType, firBase, fir::LowerBoundModifierAttribute::SetToOnes); + if (!fir::extractSequenceType(baseBoxType.getEleTy()) && + baseBoxType == hlfirBaseType) + return firBase; return builder.create(loc, hlfirBaseType, firBase, declareOp.getShape(), /*slice=*/mlir::Value{}); diff --git a/flang/test/Driver/bbc-openmp-version-macro.f90 b/flang/test/Driver/bbc-openmp-version-macro.f90 index 6fa19e1672ad8..193c9d297de4f 100644 --- a/flang/test/Driver/bbc-openmp-version-macro.f90 +++ b/flang/test/Driver/bbc-openmp-version-macro.f90 @@ -1,29 +1,22 @@ ! Test predefined _OPENMP macro which denotes OpenMP version ! RUN: bbc -fopenmp -o - %s | FileCheck %s --check-prefix=DEFAULT-OPENMP-VERSION -! RUN: bbc -fopenmp -fopenmp-version=11 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-11 -! RUN: bbc -fopenmp -fopenmp-version=11 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-11 -! RUN: bbc -fopenmp -fopenmp-version=20 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-20 -! RUN: bbc -fopenmp -fopenmp-version=25 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-25 -! RUN: bbc -fopenmp -fopenmp-version=30 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-30 ! RUN: bbc -fopenmp -fopenmp-version=31 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-31 ! RUN: bbc -fopenmp -fopenmp-version=40 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-40 ! RUN: bbc -fopenmp -fopenmp-version=45 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-45 ! RUN: bbc -fopenmp -fopenmp-version=50 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-50 ! RUN: bbc -fopenmp -fopenmp-version=51 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-51 ! RUN: bbc -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-52 +! RUN: bbc -fopenmp -fopenmp-version=60 -o - %s | FileCheck %s --check-prefix=OPENMP-VERSION-60 ! DEFAULT-OPENMP-VERSION: {{.*}} = arith.constant 201107 : i32 -! OPENMP-VERSION-11: {{.*}} = arith.constant 199911 : i32 -! OPENMP-VERSION-20: {{.*}} = arith.constant 200011 : i32 -! OPENMP-VERSION-25: {{.*}} = arith.constant 200505 : i32 -! OPENMP-VERSION-30: {{.*}} = arith.constant 200805 : i32 ! OPENMP-VERSION-31: {{.*}} = arith.constant 201107 : i32 ! OPENMP-VERSION-40: {{.*}} = arith.constant 201307 : i32 ! OPENMP-VERSION-45: {{.*}} = arith.constant 201511 : i32 ! OPENMP-VERSION-50: {{.*}} = arith.constant 201811 : i32 ! OPENMP-VERSION-51: {{.*}} = arith.constant 202011 : i32 ! OPENMP-VERSION-52: {{.*}} = arith.constant 202111 : i32 +! OPENMP-VERSION-60: {{.*}} = arith.constant 202411 : i32 #if _OPENMP integer :: var1 = _OPENMP diff --git a/flang/test/Driver/flang-openmp-version-macro.f90 b/flang/test/Driver/flang-openmp-version-macro.f90 index f690ab3819482..fcabfefca7f18 100644 --- a/flang/test/Driver/flang-openmp-version-macro.f90 +++ b/flang/test/Driver/flang-openmp-version-macro.f90 @@ -1,10 +1,6 @@ ! Test predefined _OPENMP macro which denotes OpenMP version ! RUN: %flang_fc1 -fopenmp -cpp -E %s | FileCheck %s --check-prefix=DEFAULT-OPENMP-VERSION -! RUN: %flang_fc1 -fopenmp -fopenmp-version=11 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-11 -! RUN: %flang_fc1 -fopenmp -fopenmp-version=20 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-20 -! RUN: %flang_fc1 -fopenmp -fopenmp-version=25 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-25 -! RUN: %flang_fc1 -fopenmp -fopenmp-version=30 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-30 ! RUN: %flang_fc1 -fopenmp -fopenmp-version=31 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-31 ! RUN: %flang_fc1 -fopenmp -fopenmp-version=40 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-40 ! RUN: %flang_fc1 -fopenmp -fopenmp-version=45 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-45 @@ -14,10 +10,6 @@ ! RUN: %flang_fc1 -fopenmp -fopenmp-version=60 -cpp -E %s | FileCheck %s --check-prefix=OPENMP-VERSION-60 ! DEFAULT-OPENMP-VERSION: integer :: var1 = 201107 -! OPENMP-VERSION-11: integer :: var1 = 199911 -! OPENMP-VERSION-20: integer :: var1 = 200011 -! OPENMP-VERSION-25: integer :: var1 = 200505 -! OPENMP-VERSION-30: integer :: var1 = 200805 ! OPENMP-VERSION-31: integer :: var1 = 201107 ! OPENMP-VERSION-40: integer :: var1 = 201307 ! OPENMP-VERSION-45: integer :: var1 = 201511 diff --git a/flang/test/Driver/fopenmp-version.F90 b/flang/test/Driver/fopenmp-version.F90 new file mode 100644 index 0000000000000..c2866561461b7 --- /dev/null +++ b/flang/test/Driver/fopenmp-version.F90 @@ -0,0 +1,25 @@ +!RUN: %flang -dM -E -o - -fopenmp -fopenmp-version=31 %s | FileCheck --check-prefix=V31 %s +!RUN: %flang -dM -E -o - -fopenmp -fopenmp-version=40 %s | FileCheck --check-prefix=V40 %s +!RUN: %flang -dM -E -o - -fopenmp -fopenmp-version=45 %s | FileCheck --check-prefix=V45 %s +!RUN: %flang -dM -E -o - -fopenmp -fopenmp-version=50 %s | FileCheck --check-prefix=V50 %s +!RUN: %flang -dM -E -o - -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix=V51 %s +!RUN: %flang -dM -E -o - -fopenmp -fopenmp-version=52 %s | FileCheck --check-prefix=V52 %s +!RUN: %flang -dM -E -o - -fopenmp -fopenmp-version=60 %s | FileCheck --check-prefix=V60 %s + +!V31: #define _OPENMP 201107 +!V40: #define _OPENMP 201307 +!V45: #define _OPENMP 201511 +!V50: #define _OPENMP 201811 +!V51: #define _OPENMP 202011 +!V52: #define _OPENMP 202111 +!V60: #define _OPENMP 202411 + + +!RUN: %flang -c -fopenmp -fopenmp-version=25 %s 2>&1 | FileCheck --check-prefix=WARN-ASSUMED %s + +!WARN-ASSUMED: warning: OpenMP version 25 is no longer supported, assuming version 31 + + +!RUN: not %flang -c -fopenmp -fopenmp-version=29 %s 2>&1 | FileCheck --check-prefix=ERR-BAD %s + +!ERR-BAD: error: '29' is not a valid OpenMP version in '-fopenmp-version=29', valid versions are 31, 40, 45, 50, 51, 52, 60 diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index 6d8a8bb606b90..0e2bfe48a807d 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -1817,8 +1817,8 @@ func.func private @custom_typeP.field_1.offset() -> i32 func.func private @custom_typeP.field_2.offset() -> i32 func.func @field_index_dynamic_size() -> () { - %1 = fir.field_index field_1, !fir.type}> - %2 = fir.field_index field_2, !fir.type}> + %1 = fir.field_index field_1, !fir.type}> + %2 = fir.field_index field_2, !fir.type}> return } diff --git a/flang/test/HLFIR/declare-codegen.fir b/flang/test/HLFIR/declare-codegen.fir index bd0d61a2559db..a4edb630c4adb 100644 --- a/flang/test/HLFIR/declare-codegen.fir +++ b/flang/test/HLFIR/declare-codegen.fir @@ -219,3 +219,21 @@ func.func @assumed_rank_declare(%arg0: !fir.box>) { // CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { // CHECK: %[[VAL_1:.*]] = fir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> !fir.box> // CHECK: %[[VAL_2:.*]] = fir.rebox_assumed_rank %[[VAL_1]] lbs ones : (!fir.box>) -> !fir.box> + +func.func @no_useless_rebox(%arg0: !fir.class>) { + %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.class>) -> (!fir.class>, !fir.class>) + fir.call @takes_class(%0#0) : (!fir.class>) -> () + return +} +// CHECK-LABEL: @no_useless_rebox +// CHECK-NOT: fir.rebox +// CHECK: return + +func.func @rebox_scalar_attrs(%arg0: !fir.class>>) { + %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.class>>) -> (!fir.class>, !fir.class>) + fir.call @takes_class(%0#0) : (!fir.class>) -> () + return +} +// CHECK-LABEL: @rebox_scalar_attrs +// CHECK: fir.rebox %{{.*}} : (!fir.class>>) -> !fir.class> +// CHECK: return diff --git a/flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90 b/flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90 new file mode 100644 index 0000000000000..f1a150d5dfabc --- /dev/null +++ b/flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90 @@ -0,0 +1,30 @@ +!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s --check-prefix=NORT +!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s --check-prefix=LLVM + +!Make sure that there are no calls to the mapper. +!NORT-NOT: call{{.*}}__tgt_target_data_begin_mapper +!NORT-NOT: call{{.*}}__tgt_target_data_end_mapper + +!Make sure we generate the body +!LLVM: define internal void @_QFPf(ptr %[[A0:[0-9]+]], ptr %[[A1:[0-9]+]]) +!LLVM: %[[V0:[0-9]+]] = load i32, ptr %[[A0]], align 4 +!LLVM: %[[V1:[0-9]+]] = load i32, ptr %[[A1]], align 4 +!LLVM: %[[V2:[0-9]+]] = add i32 %[[V0]], %[[V1]] +!LLVM: store i32 %[[V2]], ptr %[[A0]], align 4 +!LLVM: ret void +!LLVM: } + + +program test + +call f(1, 2) + +contains + +subroutine f(x, y) + integer :: x, y + !$omp target data map(tofrom: x, y) + x = x + y + !$omp end target data +end subroutine +end diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index c8a6d6e648af9..6b3fc9485ec1a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1251,6 +1251,7 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.wchar.mbrtowc libc.src.wchar.mbtowc libc.src.wchar.wcrtomb + libc.src.wchar.wctomb ) endif() diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 98cb3bdaf0ac9..397296894829d 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -175,6 +175,13 @@ functions: - type: char *__restrict - type: wchar_t - type: mbstate_t *__restrict + - name: wctomb + standards: + - stdc + return_type: int + arguments: + - type: char * + - type: wchar_t - name: wcscpy standards: - stdc diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp index 1f81de4248ff0..c54a1b751f402 100644 --- a/libc/src/__support/wchar/character_converter.cpp +++ b/libc/src/__support/wchar/character_converter.cpp @@ -25,6 +25,9 @@ constexpr size_t ENCODED_BITS_PER_UTF8 = 6; // Information not metadata (# of bits excluding the byte headers) constexpr uint32_t MASK_ENCODED_BITS = mask_trailing_ones(); +// Maximum value for utf-32 for a utf-8 sequence of a given length +constexpr char32_t MAX_VALUE_PER_UTF8_LEN[] = {0x7f, 0x7ff, 0xffff, 0x10ffff}; +constexpr int MAX_UTF8_LENGTH = 4; CharacterConverter::CharacterConverter(mbstate *mbstate) { state = mbstate; } @@ -40,6 +43,17 @@ bool CharacterConverter::isFull() { bool CharacterConverter::isEmpty() { return state->bytes_stored == 0; } +bool CharacterConverter::isValidState() { + if (state->total_bytes > MAX_UTF8_LENGTH) + return false; + + const char32_t max_utf32_value = + state->total_bytes == 0 ? 0 + : MAX_VALUE_PER_UTF8_LEN[state->total_bytes - 1]; + return state->bytes_stored <= state->total_bytes && + state->partial <= max_utf32_value; +} + int CharacterConverter::push(char8_t utf8_byte) { uint8_t num_ones = static_cast(cpp::countl_one(utf8_byte)); // Checking the first byte if first push @@ -90,9 +104,7 @@ int CharacterConverter::push(char32_t utf32) { state->partial = utf32; // determine number of utf-8 bytes needed to represent this utf32 value - constexpr char32_t MAX_VALUE_PER_UTF8_LEN[] = {0x7f, 0x7ff, 0xffff, 0x10ffff}; - constexpr int NUM_RANGES = 4; - for (uint8_t i = 0; i < NUM_RANGES; i++) { + for (uint8_t i = 0; i < MAX_UTF8_LENGTH; i++) { if (state->partial <= MAX_VALUE_PER_UTF8_LEN[i]) { state->total_bytes = i + 1; state->bytes_stored = i + 1; diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h index be0e6129df236..d9a63fdc0522c 100644 --- a/libc/src/__support/wchar/character_converter.h +++ b/libc/src/__support/wchar/character_converter.h @@ -28,6 +28,7 @@ class CharacterConverter { void clear(); bool isFull(); bool isEmpty(); + bool isValidState(); int push(char8_t utf8_byte); int push(char32_t utf32); diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index f390785e5817b..16664100d42c7 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -48,6 +48,19 @@ add_entrypoint_object( libc.src.__support.wchar.mbstate ) +add_entrypoint_object( + wctomb + SRCS + wctomb.cpp + HDRS + wctomb.h + DEPENDS + libc.hdr.types.wchar_t + libc.src.__support.wchar.wcrtomb + libc.src.__support.wchar.mbstate + libc.src.__support.libc_errno +) + add_entrypoint_object( mbrtowc SRCS diff --git a/libc/src/wchar/wctomb.cpp b/libc/src/wchar/wctomb.cpp new file mode 100644 index 0000000000000..142302e6ae09b --- /dev/null +++ b/libc/src/wchar/wctomb.cpp @@ -0,0 +1,35 @@ +//===-- Implementation of wctomb ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wctomb.h" + +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/wcrtomb.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, wctomb, (char *s, wchar_t wc)) { + static internal::mbstate internal_mbstate; + if (s == nullptr) + return 0; + + auto result = internal::wcrtomb(s, wc, &internal_mbstate); + + if (!result.has_value()) { // invalid wide character + libc_errno = EILSEQ; + return -1; + } + + return static_cast(result.value()); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wctomb.h b/libc/src/wchar/wctomb.h new file mode 100644 index 0000000000000..02a34e5ad229f --- /dev/null +++ b/libc/src/wchar/wctomb.h @@ -0,0 +1,22 @@ +//===-- Implementation header for wctomb ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCTOMB_H +#define LLVM_LIBC_SRC_WCHAR_WCTOMB_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +int wctomb(char *s, wchar_t wc); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCTOMB_H diff --git a/libc/test/src/__support/wchar/utf32_to_8_test.cpp b/libc/test/src/__support/wchar/utf32_to_8_test.cpp index a6a7bc4aa6f4c..1ad523e148845 100644 --- a/libc/test/src/__support/wchar/utf32_to_8_test.cpp +++ b/libc/test/src/__support/wchar/utf32_to_8_test.cpp @@ -186,3 +186,45 @@ TEST(LlvmLibcCharacterConverterUTF32To8Test, CantPushMidConversion) { int err = cr.push(utf32); ASSERT_EQ(err, -1); } + +TEST(LlvmLibcCharacterConverterUTF32To8Test, InvalidState) { + LIBC_NAMESPACE::internal::mbstate s1; + LIBC_NAMESPACE::internal::CharacterConverter c1(&s1); + ASSERT_TRUE(c1.isValidState()); + + LIBC_NAMESPACE::internal::mbstate s2{0, 2, 0}; + LIBC_NAMESPACE::internal::CharacterConverter c2(&s2); + ASSERT_FALSE(c2.isValidState()); + + LIBC_NAMESPACE::internal::mbstate s3{0x7f, 1, 1}; + LIBC_NAMESPACE::internal::CharacterConverter c3(&s3); + ASSERT_TRUE(c3.isValidState()); + LIBC_NAMESPACE::internal::mbstate s4{0x80, 1, 1}; + LIBC_NAMESPACE::internal::CharacterConverter c4(&s4); + ASSERT_FALSE(c4.isValidState()); + + LIBC_NAMESPACE::internal::mbstate s5{0x7ff, 1, 2}; + LIBC_NAMESPACE::internal::CharacterConverter c5(&s5); + ASSERT_TRUE(c5.isValidState()); + LIBC_NAMESPACE::internal::mbstate s6{0x800, 1, 2}; + LIBC_NAMESPACE::internal::CharacterConverter c6(&s6); + ASSERT_FALSE(c6.isValidState()); + + LIBC_NAMESPACE::internal::mbstate s7{0xffff, 1, 3}; + LIBC_NAMESPACE::internal::CharacterConverter c7(&s7); + ASSERT_TRUE(c7.isValidState()); + LIBC_NAMESPACE::internal::mbstate s8{0x10000, 1, 3}; + LIBC_NAMESPACE::internal::CharacterConverter c8(&s8); + ASSERT_FALSE(c8.isValidState()); + + LIBC_NAMESPACE::internal::mbstate s9{0x10ffff, 1, 4}; + LIBC_NAMESPACE::internal::CharacterConverter c9(&s9); + ASSERT_TRUE(c9.isValidState()); + LIBC_NAMESPACE::internal::mbstate s10{0x110000, 1, 2}; + LIBC_NAMESPACE::internal::CharacterConverter c10(&s10); + ASSERT_FALSE(c10.isValidState()); + + LIBC_NAMESPACE::internal::mbstate s11{0, 0, 5}; + LIBC_NAMESPACE::internal::CharacterConverter c11(&s11); + ASSERT_FALSE(c11.isValidState()); +} diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index 48688b3bdd1f3..ddf8709a6a2a2 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -74,6 +74,17 @@ add_libc_test( libc.src.__support.libc_errno ) +add_libc_test( + wctomb_test + SUITE + libc_wchar_unittests + SRCS + wctomb_test.cpp + DEPENDS + libc.src.wchar.wctomb + libc.hdr.types.wchar_t +) + add_libc_test( wmemset_test SUITE diff --git a/libc/test/src/wchar/wctomb_test.cpp b/libc/test/src/wchar/wctomb_test.cpp new file mode 100644 index 0000000000000..09fbf52806224 --- /dev/null +++ b/libc/test/src/wchar/wctomb_test.cpp @@ -0,0 +1,73 @@ +//===-- Unittests for wctomb ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/wchar_t.h" +#include "src/__support/libc_errno.h" +#include "src/wchar/wctomb.h" +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcWCToMBTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST(LlvmLibcWCToMBTest, OneByte) { + wchar_t wc = L'U'; + char mb[4]; + int cnt = LIBC_NAMESPACE::wctomb(mb, wc); + ASSERT_EQ(cnt, 1); + ASSERT_EQ(mb[0], 'U'); +} + +TEST(LlvmLibcWCToMBTest, TwoByte) { + // testing utf32: 0xff -> utf8: 0xc3 0xbf + wchar_t wc = 0xff; + char mb[4]; + int cnt = LIBC_NAMESPACE::wctomb(mb, wc); + ASSERT_EQ(cnt, 2); + ASSERT_EQ(mb[0], static_cast(0xc3)); + ASSERT_EQ(mb[1], static_cast(0xbf)); +} + +TEST(LlvmLibcWCToMBTest, ThreeByte) { + // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95 + wchar_t wc = 0xac15; + char mb[4]; + int cnt = LIBC_NAMESPACE::wctomb(mb, wc); + ASSERT_EQ(cnt, 3); + ASSERT_EQ(mb[0], static_cast(0xea)); + ASSERT_EQ(mb[1], static_cast(0xb0)); + ASSERT_EQ(mb[2], static_cast(0x95)); +} + +TEST(LlvmLibcWCToMBTest, FourByte) { + // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1 + wchar_t wc = 0x1f921; + char mb[4]; + int cnt = LIBC_NAMESPACE::wctomb(mb, wc); + ASSERT_EQ(cnt, 4); + ASSERT_EQ(mb[0], static_cast(0xf0)); + ASSERT_EQ(mb[1], static_cast(0x9f)); + ASSERT_EQ(mb[2], static_cast(0xa4)); + ASSERT_EQ(mb[3], static_cast(0xa1)); +} + +TEST(LlvmLibcWCToMBTest, NullString) { + wchar_t wc = L'A'; + + int cnt = LIBC_NAMESPACE::wctomb(nullptr, wc); + + // no state-dependent encoding + ASSERT_EQ(cnt, 0); +} + +TEST(LlvmLibcWCToMBTest, InvalidWchar) { + wchar_t wc = 0x12ffff; + char mb[4]; + int cnt = LIBC_NAMESPACE::wctomb(mb, wc); + ASSERT_EQ(cnt, -1); + ASSERT_ERRNO_EQ(EILSEQ); +} diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h index de7b977021f8b..b712fe5cf326c 100644 --- a/libclc/clc/include/clc/clcmacro.h +++ b/libclc/clc/include/clc/clcmacro.h @@ -179,109 +179,4 @@ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, \ ARG2_TYPE) -// FIXME: Make _CLC_DEFINE_BINARY_BUILTIN avoid scalarization by default, and -// introduce an explicit scalarizing version. -#define _CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(RET_TYPE, FUNCTION, BUILTIN, \ - ARG1_TYPE, ARG2_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \ - return BUILTIN(x, y); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, \ - ARG2_TYPE##2 y) { \ - return BUILTIN(x, y); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, \ - ARG2_TYPE##3 y) { \ - return BUILTIN(x, y); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, \ - ARG2_TYPE##4 y) { \ - return BUILTIN(x, y); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, \ - ARG2_TYPE##8 y) { \ - return BUILTIN(x, y); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, \ - ARG2_TYPE##16 y) { \ - return BUILTIN(x, y); \ - } - -#define _CLC_DEFINE_BINARY_BUILTIN_WITH_SCALAR_SECOND_ARG( \ - RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \ - _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, \ - ARG2_TYPE) \ - _CLC_BINARY_VECTORIZE_SCALAR_SECOND_ARG(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, \ - FUNCTION, ARG1_TYPE, ARG2_TYPE) - -#define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { return BUILTIN(x); } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \ - return BUILTIN(x); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \ - return BUILTIN(x); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \ - return BUILTIN(x); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \ - return BUILTIN(x); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \ - return BUILTIN(x); \ - } - -#define _CLC_DEFINE_TERNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, \ - ARG2_TYPE, ARG3_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y, \ - ARG3_TYPE z) { \ - return BUILTIN(x, y, z); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, \ - ARG3_TYPE##2 z) { \ - return BUILTIN(x, y, z); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, \ - ARG3_TYPE##3 z) { \ - return BUILTIN(x, y, z); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, \ - ARG3_TYPE##4 z) { \ - return BUILTIN(x, y, z); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, \ - ARG3_TYPE##8 z) { \ - return BUILTIN(x, y, z); \ - } \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE##16 FUNCTION( \ - ARG1_TYPE##16 x, ARG2_TYPE##16 y, ARG3_TYPE##16 z) { \ - return BUILTIN(x, y, z); \ - } - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION) \ - _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x) { \ - return (half)FUNCTION((float)x); \ - } \ - _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half) - -#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION) \ - _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x, half y) { \ - return (half)FUNCTION((float)x, (float)y); \ - } \ - _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half) - -#pragma OPENCL EXTENSION cl_khr_fp16 : disable - -#else - -#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION) -#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION) - -#endif - #endif // __CLC_CLCMACRO_H__ diff --git a/libclc/clc/include/clc/math/clc_pown.h b/libclc/clc/include/clc/math/clc_pown.h index 67475503f92b7..30628efb19001 100644 --- a/libclc/clc/include/clc/math/clc_pown.h +++ b/libclc/clc/include/clc/math/clc_pown.h @@ -9,7 +9,7 @@ #ifndef __CLC_MATH_CLC_POWN_H__ #define __CLC_MATH_CLC_POWN_H__ -#define __CLC_BODY +#define __CLC_BODY #define __CLC_FUNCTION __clc_pown #include diff --git a/libclc/clc/include/clc/math/clc_rootn.h b/libclc/clc/include/clc/math/clc_rootn.h index bf9dd5413c3de..90a25ad52d867 100644 --- a/libclc/clc/include/clc/math/clc_rootn.h +++ b/libclc/clc/include/clc/math/clc_rootn.h @@ -9,7 +9,7 @@ #ifndef __CLC_MATH_CLC_ROOTN_H__ #define __CLC_MATH_CLC_ROOTN_H__ -#define __CLC_BODY +#define __CLC_BODY #define __CLC_FUNCTION __clc_rootn #include diff --git a/libclc/clc/include/clc/math/binary_decl_with_int_second_arg.inc b/libclc/clc/include/clc/shared/binary_decl_with_int_second_arg.inc similarity index 100% rename from libclc/clc/include/clc/math/binary_decl_with_int_second_arg.inc rename to libclc/clc/include/clc/shared/binary_decl_with_int_second_arg.inc diff --git a/libclc/clc/include/clc/math/binary_def_with_int_second_arg.inc b/libclc/clc/include/clc/shared/binary_def_with_int_second_arg.inc similarity index 100% rename from libclc/clc/include/clc/math/binary_def_with_int_second_arg.inc rename to libclc/clc/include/clc/shared/binary_def_with_int_second_arg.inc diff --git a/libclc/clc/lib/generic/math/clc_copysign.cl b/libclc/clc/lib/generic/math/clc_copysign.cl index d336985ebf967..b066c14bcf3f5 100644 --- a/libclc/clc/lib/generic/math/clc_copysign.cl +++ b/libclc/clc/lib/generic/math/clc_copysign.cl @@ -6,30 +6,10 @@ // //===----------------------------------------------------------------------===// -#include #include -_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __clc_copysign, - __builtin_elementwise_copysign, float, - float) - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __clc_copysign, - __builtin_elementwise_copysign, double, - double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __clc_copysign, - __builtin_elementwise_copysign, half, - half) - -#endif +#define FUNCTION __clc_copysign +#define __CLC_FUNCTION(x) __builtin_elementwise_copysign +#define __CLC_BODY +#include diff --git a/libclc/clc/lib/generic/math/clc_pow.inc b/libclc/clc/lib/generic/math/clc_pow.inc index 98e154984aaa3..8b1f820268ba0 100644 --- a/libclc/clc/lib/generic/math/clc_pow.inc +++ b/libclc/clc/lib/generic/math/clc_pow.inc @@ -330,6 +330,15 @@ _CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_pow(__CLC_GENTYPE x, const __CLC_GENTYPE lnof2_by_64_head = 0.010830424260348081; const __CLC_GENTYPE lnof2_by_64_tail = -4.359010638708991e-10; + // If v is so large that we need to return INFINITY, or so small that we + // need to return 0, set v to known values that will produce that result. Do + // not try to continue the computation with the original v and patch it up + // afterwards because v may be so large that temp is out of range of int, in + // which case that conversion, and a value based on that conversion being + // passed to __clc_ldexp, results in undefined behavior. + v = v > max_exp_arg ? 1000.0 : v; + v = v < min_exp_arg ? -1000.0 : v; + __CLC_GENTYPE temp = v * sixtyfour_by_lnof2; __CLC_INTN n = __CLC_CONVERT_INTN(temp); __CLC_GENTYPE dn = __CLC_CONVERT_GENTYPE(n); @@ -357,10 +366,6 @@ _CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_pow(__CLC_GENTYPE x, expv = __clc_fma(f, q, f2) + f1; expv = __clc_ldexp(expv, m); - - expv = v > max_exp_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)0x7FF0000000000000L) - : expv; - expv = v < min_exp_arg ? 0.0 : expv; } // See whether y is an integer. diff --git a/libclc/clc/lib/generic/math/clc_pown.inc b/libclc/clc/lib/generic/math/clc_pown.inc index 8bdc407e9ac82..483fd2faf2717 100644 --- a/libclc/clc/lib/generic/math/clc_pown.inc +++ b/libclc/clc/lib/generic/math/clc_pown.inc @@ -317,6 +317,15 @@ _CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_pown(__CLC_GENTYPE x, const __CLC_GENTYPE lnof2_by_64_head = 0.010830424260348081; const __CLC_GENTYPE lnof2_by_64_tail = -4.359010638708991e-10; + // If v is so large that we need to return INFINITY, or so small that we + // need to return 0, set v to known values that will produce that result. Do + // not try to continue the computation with the original v and patch it up + // afterwards because v may be so large that temp is out of range of int, in + // which case that conversion, and a value based on that conversion being + // passed to __clc_ldexp, results in undefined behavior. + v = v > max_exp_arg ? 1000.0 : v; + v = v < min_exp_arg ? -1000.0 : v; + __CLC_GENTYPE temp = v * sixtyfour_by_lnof2; __CLC_INTN n = __CLC_CONVERT_INTN(temp); __CLC_GENTYPE dn = __CLC_CONVERT_GENTYPE(n); @@ -344,10 +353,6 @@ _CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_pown(__CLC_GENTYPE x, expv = __clc_fma(f, q, f2) + f1; expv = __clc_ldexp(expv, m); - - expv = v > max_exp_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)0x7FF0000000000000L) - : expv; - expv = v < min_exp_arg ? 0.0 : expv; } // See whether y is an integer. diff --git a/libclc/clc/lib/generic/math/clc_powr.inc b/libclc/clc/lib/generic/math/clc_powr.inc index fbdf3d85de2b7..1244f7f6ac5d6 100644 --- a/libclc/clc/lib/generic/math/clc_powr.inc +++ b/libclc/clc/lib/generic/math/clc_powr.inc @@ -316,6 +316,15 @@ _CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_powr(__CLC_GENTYPE x, const __CLC_GENTYPE lnof2_by_64_head = 0.010830424260348081; const __CLC_GENTYPE lnof2_by_64_tail = -4.359010638708991e-10; + // If v is so large that we need to return INFINITY, or so small that we + // need to return 0, set v to known values that will produce that result. Do + // not try to continue the computation with the original v and patch it up + // afterwards because v may be so large that temp is out of range of int, in + // which case that conversion, and a value based on that conversion being + // passed to __clc_ldexp, results in undefined behavior. + v = v > max_exp_arg ? 1000.0 : v; + v = v < min_exp_arg ? -1000.0 : v; + __CLC_GENTYPE temp = v * sixtyfour_by_lnof2; __CLC_INTN n = __CLC_CONVERT_INTN(temp); __CLC_GENTYPE dn = __CLC_CONVERT_GENTYPE(n); @@ -343,10 +352,6 @@ _CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_powr(__CLC_GENTYPE x, expv = __clc_fma(f, q, f2) + f1; expv = __clc_ldexp(expv, m); - - expv = v > max_exp_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)0x7FF0000000000000L) - : expv; - expv = v < min_exp_arg ? 0.0 : expv; } // See whether y is an integer. diff --git a/libclc/clc/lib/generic/math/clc_rootn.inc b/libclc/clc/lib/generic/math/clc_rootn.inc index 0c459ae5c3cbb..996f88f145357 100644 --- a/libclc/clc/lib/generic/math/clc_rootn.inc +++ b/libclc/clc/lib/generic/math/clc_rootn.inc @@ -323,6 +323,15 @@ _CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_rootn(__CLC_GENTYPE x, const __CLC_GENTYPE lnof2_by_64_head = 0.010830424260348081; const __CLC_GENTYPE lnof2_by_64_tail = -4.359010638708991e-10; + // If v is so large that we need to return INFINITY, or so small that we + // need to return 0, set v to known values that will produce that result. Do + // not try to continue the computation with the original v and patch it up + // afterwards because v may be so large that temp is out of range of int, in + // which case that conversion, and a value based on that conversion being + // passed to __clc_ldexp, results in undefined behavior. + v = v > max_exp_arg ? 1000.0 : v; + v = v < min_exp_arg ? -1000.0 : v; + __CLC_GENTYPE temp = v * sixtyfour_by_lnof2; __CLC_INTN n = __CLC_CONVERT_INTN(temp); __CLC_GENTYPE dn = __CLC_CONVERT_GENTYPE(n); @@ -350,10 +359,6 @@ _CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_rootn(__CLC_GENTYPE x, expv = __clc_fma(f, q, f2) + f1; expv = __clc_ldexp(expv, m); - - expv = v > max_exp_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)0x7FF0000000000000L) - : expv; - expv = v < min_exp_arg ? 0.0 : expv; } // See whether y is an integer. diff --git a/libclc/opencl/include/clc/opencl/math/ldexp.h b/libclc/opencl/include/clc/opencl/math/ldexp.h index 6dcd2a9548d09..ca50ae6a98312 100644 --- a/libclc/opencl/include/clc/opencl/math/ldexp.h +++ b/libclc/opencl/include/clc/opencl/math/ldexp.h @@ -6,5 +6,10 @@ // //===----------------------------------------------------------------------===// +#define __CLC_FUNCTION ldexp +#define __CLC_BODY +#include +#undef __CLC_FUNCTION + #define __CLC_BODY #include diff --git a/libclc/opencl/include/clc/opencl/math/ldexp.inc b/libclc/opencl/include/clc/opencl/math/ldexp.inc index 116acdff41d37..b5a5cfcafdd53 100644 --- a/libclc/opencl/include/clc/opencl/math/ldexp.inc +++ b/libclc/opencl/include/clc/opencl/math/ldexp.inc @@ -6,10 +6,8 @@ // //===----------------------------------------------------------------------===// -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n); - #ifndef __CLC_SCALAR -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE ldexp(__CLC_GENTYPE x, __CLC_INTN n); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n); #endif diff --git a/libclc/opencl/include/clc/opencl/math/pown.h b/libclc/opencl/include/clc/opencl/math/pown.h index 1d38c68947ba1..bbdf8f8b6e91e 100644 --- a/libclc/opencl/include/clc/opencl/math/pown.h +++ b/libclc/opencl/include/clc/opencl/math/pown.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #define __CLC_FUNCTION pown -#define __CLC_BODY +#define __CLC_BODY #include diff --git a/libclc/opencl/include/clc/opencl/math/rootn.h b/libclc/opencl/include/clc/opencl/math/rootn.h index 789f31596d1cd..669aeefb273a9 100644 --- a/libclc/opencl/include/clc/opencl/math/rootn.h +++ b/libclc/opencl/include/clc/opencl/math/rootn.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#define __CLC_BODY +#define __CLC_BODY #define __CLC_FUNCTION rootn #include diff --git a/libclc/opencl/lib/clspv/math/fma.cl b/libclc/opencl/lib/clspv/math/fma.cl index 2722018121224..0f3141a0e09ee 100644 --- a/libclc/opencl/lib/clspv/math/fma.cl +++ b/libclc/opencl/lib/clspv/math/fma.cl @@ -6,8 +6,12 @@ // //===----------------------------------------------------------------------===// -#include #include #include -_CLC_DEFINE_TERNARY_BUILTIN(float, fma, __clc_sw_fma, float, float, float) +#define __FLOAT_ONLY +#define FUNCTION fma +#define __CLC_FUNCTION(x) __clc_sw_fma +#define __CLC_BODY + +#include diff --git a/libclc/opencl/lib/generic/common/degrees.cl b/libclc/opencl/lib/generic/common/degrees.cl index 8b17fe4321297..a86003c170bff 100644 --- a/libclc/opencl/lib/generic/common/degrees.cl +++ b/libclc/opencl/lib/generic/common/degrees.cl @@ -6,22 +6,10 @@ // //===----------------------------------------------------------------------===// -#include #include #include -_CLC_DEFINE_UNARY_BUILTIN(float, degrees, __clc_degrees, float) +#define FUNCTION degrees +#define __CLC_BODY -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_UNARY_BUILTIN(double, degrees, __clc_degrees, double) - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN(half, degrees, __clc_degrees, half) - -#endif +#include diff --git a/libclc/opencl/lib/generic/common/radians.cl b/libclc/opencl/lib/generic/common/radians.cl index 1c58c6c4da6f3..b45653be2e782 100644 --- a/libclc/opencl/lib/generic/common/radians.cl +++ b/libclc/opencl/lib/generic/common/radians.cl @@ -6,22 +6,10 @@ // //===----------------------------------------------------------------------===// -#include #include #include -_CLC_DEFINE_UNARY_BUILTIN(float, radians, __clc_radians, float) +#define FUNCTION radians +#define __CLC_BODY -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_UNARY_BUILTIN(double, radians, __clc_radians, double) - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_UNARY_BUILTIN(half, radians, __clc_radians, half) - -#endif +#include diff --git a/libclc/opencl/lib/generic/math/fma.cl b/libclc/opencl/lib/generic/math/fma.cl index ee3395bb2c648..c077357a44f0d 100644 --- a/libclc/opencl/lib/generic/math/fma.cl +++ b/libclc/opencl/lib/generic/math/fma.cl @@ -6,23 +6,11 @@ // //===----------------------------------------------------------------------===// -#include #include #include #include -_CLC_DEFINE_TERNARY_BUILTIN(float, fma, __clc_fma, float, float, float) +#define FUNCTION fma +#define __CLC_BODY -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_TERNARY_BUILTIN(double, fma, __clc_fma, double, double, double) - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_TERNARY_BUILTIN(half, fma, __clc_fma, half, half, half) - -#endif +#include diff --git a/libclc/opencl/lib/generic/math/ldexp.cl b/libclc/opencl/lib/generic/math/ldexp.cl index e3b9b2b3f1363..069ba8251feba 100644 --- a/libclc/opencl/lib/generic/math/ldexp.cl +++ b/libclc/opencl/lib/generic/math/ldexp.cl @@ -6,27 +6,14 @@ // //===----------------------------------------------------------------------===// -#include #include #include -_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, ldexp, __clc_ldexp, float, int) +#define FUNCTION ldexp +#define __CLC_FUNCTION(x) __clc_ldexp +#define __CLC_BODY -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, ldexp, __clc_ldexp, double, int) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, ldexp, __clc_ldexp, half, int) - -#endif +#include // This defines all the ldexp(GENTYPE, int) variants #define __CLC_BODY diff --git a/libclc/opencl/lib/generic/math/mad.cl b/libclc/opencl/lib/generic/math/mad.cl index 20e6903094454..39aa8e884cc03 100644 --- a/libclc/opencl/lib/generic/math/mad.cl +++ b/libclc/opencl/lib/generic/math/mad.cl @@ -6,22 +6,10 @@ // //===----------------------------------------------------------------------===// -#include #include #include -_CLC_DEFINE_TERNARY_BUILTIN(float, mad, __clc_mad, float, float, float) +#define FUNCTION mad +#define __CLC_BODY -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_TERNARY_BUILTIN(double, mad, __clc_mad, double, double, double) - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_TERNARY_BUILTIN(half, mad, __clc_mad, half, half, half) - -#endif +#include diff --git a/libclc/opencl/lib/generic/math/nextafter.cl b/libclc/opencl/lib/generic/math/nextafter.cl index ecb187c53069e..6a5a745f82526 100644 --- a/libclc/opencl/lib/generic/math/nextafter.cl +++ b/libclc/opencl/lib/generic/math/nextafter.cl @@ -6,27 +6,11 @@ // //===----------------------------------------------------------------------===// -#include #include #include -_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, nextafter, __clc_nextafter, - float, float) +#define FUNCTION nextafter +#define __CLC_FUNCTION(x) __clc_nextafter +#define __CLC_BODY -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, nextafter, __clc_nextafter, - double, double) - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, nextafter, __clc_nextafter, half, - half) - -#endif +#include diff --git a/libclc/opencl/lib/generic/math/pown.cl b/libclc/opencl/lib/generic/math/pown.cl index a2ed523a41f74..115bae3406f0e 100644 --- a/libclc/opencl/lib/generic/math/pown.cl +++ b/libclc/opencl/lib/generic/math/pown.cl @@ -10,5 +10,5 @@ #include #define FUNCTION pown -#define __CLC_BODY +#define __CLC_BODY #include diff --git a/libclc/opencl/lib/generic/math/rootn.cl b/libclc/opencl/lib/generic/math/rootn.cl index 9f737151b3903..0e1acc95470df 100644 --- a/libclc/opencl/lib/generic/math/rootn.cl +++ b/libclc/opencl/lib/generic/math/rootn.cl @@ -10,5 +10,5 @@ #include #define FUNCTION rootn -#define __CLC_BODY +#define __CLC_BODY #include diff --git a/libclc/opencl/lib/spirv/math/fma.cl b/libclc/opencl/lib/spirv/math/fma.cl index 2722018121224..0f3141a0e09ee 100644 --- a/libclc/opencl/lib/spirv/math/fma.cl +++ b/libclc/opencl/lib/spirv/math/fma.cl @@ -6,8 +6,12 @@ // //===----------------------------------------------------------------------===// -#include #include #include -_CLC_DEFINE_TERNARY_BUILTIN(float, fma, __clc_sw_fma, float, float, float) +#define __FLOAT_ONLY +#define FUNCTION fma +#define __CLC_FUNCTION(x) __clc_sw_fma +#define __CLC_BODY + +#include diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv index 2eb1921069776..00fad3ff802a8 100644 --- a/libcxx/docs/Status/Cxx2cPapers.csv +++ b/libcxx/docs/Status/Cxx2cPapers.csv @@ -59,7 +59,7 @@ "`P2248R8 `__","Enabling list-initialization for algorithms","2024-03 (Tokyo)","","","" "`P2810R4 `__","``is_debugger_present`` ``is_replaceable``","2024-03 (Tokyo)","","","" "`P1068R11 `__","Vector API for random number generation","2024-03 (Tokyo)","","","" -"`P2944R3 `__","Comparisons for ``reference_wrapper``","2024-03 (Tokyo)","|Partial|","","The changes to ``optional``, ``tuple`` and ``variant`` are not yet implemented" +"`P2944R3 `__","Comparisons for ``reference_wrapper``","2024-03 (Tokyo)","|Partial|","","The changes to ``optional`` and ``tuple`` are not yet implemented" "`P2642R6 `__","Padded ``mdspan`` layouts","2024-03 (Tokyo)","","","" "`P3029R1 `__","Better ``mdspan``'s CTAD","2024-03 (Tokyo)","|Complete|","19","" "","","","","","" diff --git a/libcxx/include/variant b/libcxx/include/variant index dac6f786cc198..ede9f486ecc2e 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -242,6 +242,7 @@ namespace std { # include <__type_traits/is_assignable.h> # include <__type_traits/is_constructible.h> # include <__type_traits/is_convertible.h> +# include <__type_traits/is_core_convertible.h> # include <__type_traits/is_destructible.h> # include <__type_traits/is_nothrow_assignable.h> # include <__type_traits/is_nothrow_constructible.h> @@ -1442,6 +1443,11 @@ struct __convert_to_bool { }; template +# if _LIBCPP_STD_VER >= 26 + requires(requires(const _Types& __t) { + { __t == __t } -> __core_convertible_to; + } && ...) +# endif _LIBCPP_HIDE_FROM_ABI constexpr bool operator==(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs) { using __variant_detail::__visitation::__variant; if (__lhs.index() != __rhs.index()) @@ -1474,6 +1480,11 @@ operator<=>(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs) { # endif // _LIBCPP_STD_VER >= 20 template +# if _LIBCPP_STD_VER >= 26 + requires(requires(const _Types& __t) { + { __t != __t } -> __core_convertible_to; + } && ...) +# endif _LIBCPP_HIDE_FROM_ABI constexpr bool operator!=(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs) { using __variant_detail::__visitation::__variant; if (__lhs.index() != __rhs.index()) @@ -1484,6 +1495,11 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool operator!=(const variant<_Types...>& __lhs, } template +# if _LIBCPP_STD_VER >= 26 + requires(requires(const _Types& __t) { + { __t < __t } -> __core_convertible_to; + } && ...) +# endif _LIBCPP_HIDE_FROM_ABI constexpr bool operator<(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs) { using __variant_detail::__visitation::__variant; if (__rhs.valueless_by_exception()) @@ -1498,6 +1514,11 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool operator<(const variant<_Types...>& __lhs, } template +# if _LIBCPP_STD_VER >= 26 + requires(requires(const _Types& __t) { + { __t > __t } -> __core_convertible_to; + } && ...) +# endif _LIBCPP_HIDE_FROM_ABI constexpr bool operator>(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs) { using __variant_detail::__visitation::__variant; if (__lhs.valueless_by_exception()) @@ -1512,6 +1533,11 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool operator>(const variant<_Types...>& __lhs, } template +# if _LIBCPP_STD_VER >= 26 + requires(requires(const _Types& __t) { + { __t <= __t } -> __core_convertible_to; + } && ...) +# endif _LIBCPP_HIDE_FROM_ABI constexpr bool operator<=(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs) { using __variant_detail::__visitation::__variant; if (__lhs.valueless_by_exception()) @@ -1526,6 +1552,11 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool operator<=(const variant<_Types...>& __lhs, } template +# if _LIBCPP_STD_VER >= 26 + requires(requires(const _Types& __t) { + { __t >= __t } -> __core_convertible_to; + } && ...) +# endif _LIBCPP_HIDE_FROM_ABI constexpr bool operator>=(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs) { using __variant_detail::__visitation::__variant; if (__rhs.valueless_by_exception()) diff --git a/libcxx/test/std/utilities/variant/variant.relops/relops.pass.cpp b/libcxx/test/std/utilities/variant/variant.relops/relops.pass.cpp index c1a5b8e474a74..2c00703662687 100644 --- a/libcxx/test/std/utilities/variant/variant.relops/relops.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.relops/relops.pass.cpp @@ -39,8 +39,57 @@ #include #include +#include "test_comparisons.h" #include "test_macros.h" +#if TEST_STD_VER >= 26 + +// Test SFINAE. + +// == +static_assert(HasOperatorEqual>); +static_assert(HasOperatorEqual>); + +static_assert(!HasOperatorEqual>); +static_assert(!HasOperatorEqual>); + +// > +static_assert(HasOperatorGreaterThan>); +static_assert(HasOperatorGreaterThan>); + +static_assert(!HasOperatorGreaterThan>); +static_assert(!HasOperatorGreaterThan>); + +// >= +static_assert(HasOperatorGreaterThanEqual>); +static_assert(HasOperatorGreaterThanEqual>); + +static_assert(!HasOperatorGreaterThanEqual>); +static_assert(!HasOperatorGreaterThanEqual>); + +// < +static_assert(HasOperatorLessThan>); +static_assert(HasOperatorLessThan>); + +static_assert(!HasOperatorLessThan>); +static_assert(!HasOperatorLessThan>); + +// <= +static_assert(HasOperatorLessThanEqual>); +static_assert(HasOperatorLessThanEqual>); + +static_assert(!HasOperatorLessThanEqual>); +static_assert(!HasOperatorLessThanEqual>); + +// != +static_assert(HasOperatorNotEqual>); +static_assert(HasOperatorNotEqual>); + +static_assert(!HasOperatorNotEqual>); +static_assert(!HasOperatorNotEqual>); + +#endif + #ifndef TEST_HAS_NO_EXCEPTIONS struct MakeEmptyT { MakeEmptyT() = default; diff --git a/libcxx/test/std/utilities/variant/variant.relops/relops_bool_conv.verify.cpp b/libcxx/test/std/utilities/variant/variant.relops/relops_bool_conv.verify.cpp index 64248171d1146..392a234e6a9b2 100644 --- a/libcxx/test/std/utilities/variant/variant.relops/relops_bool_conv.verify.cpp +++ b/libcxx/test/std/utilities/variant/variant.relops/relops_bool_conv.verify.cpp @@ -41,7 +41,9 @@ #include "test_macros.h" - +#if TEST_STD_VER >= 26 +// expected-no-diagnostics +#else struct MyBoolExplicit { bool value; constexpr explicit MyBoolExplicit(bool v) : value(v) {} @@ -70,8 +72,7 @@ inline constexpr MyBoolExplicit operator>=(const ComparesToMyBoolExplicit& LHS, return MyBoolExplicit(LHS.value >= RHS.value); } - -int main(int, char**) { +void test() { using V = std::variant; V v1(42); V v2(101); @@ -83,6 +84,6 @@ int main(int, char**) { (void)(v1 <= v2); // expected-note {{here}} (void)(v1 > v2); // expected-note {{here}} (void)(v1 >= v2); // expected-note {{here}} - - return 0; } + +#endif diff --git a/libcxx/test/support/test_comparisons.h b/libcxx/test/support/test_comparisons.h index d9729e0451b49..e37ab44828c70 100644 --- a/libcxx/test/support/test_comparisons.h +++ b/libcxx/test/support/test_comparisons.h @@ -271,12 +271,31 @@ struct PartialOrder { template concept HasOperatorEqual = requires(T1 t1, T2 t2) { t1 == t2; }; +template +concept HasOperatorGreaterThan = requires(T1 t1, T2 t2) { t1 > t2; }; + +template +concept HasOperatorGreaterThanEqual = requires(T1 t1, T2 t2) { t1 >= t2; }; +template +concept HasOperatorLessThan = requires(T1 t1, T2 t2) { t1 < t2; }; + +template +concept HasOperatorLessThanEqual = requires(T1 t1, T2 t2) { t1 <= t2; }; + +template +concept HasOperatorNotEqual = requires(T1 t1, T2 t2) { t1 != t2; }; + template concept HasOperatorSpaceship = requires(T1 t1, T2 t2) { t1 <=> t2; }; struct NonComparable {}; static_assert(!std::equality_comparable); static_assert(!HasOperatorEqual); +static_assert(!HasOperatorGreaterThan); +static_assert(!HasOperatorGreaterThanEqual); +static_assert(!HasOperatorLessThan); +static_assert(!HasOperatorLessThanEqual); +static_assert(!HasOperatorNotEqual); static_assert(!HasOperatorSpaceship); class EqualityComparable { @@ -290,6 +309,28 @@ class EqualityComparable { }; static_assert(std::equality_comparable); static_assert(HasOperatorEqual); +static_assert(HasOperatorNotEqual); + +class ThreeWayComparable { +public: + constexpr ThreeWayComparable(int value) : value_{value} {}; + + friend constexpr bool operator==(const ThreeWayComparable&, const ThreeWayComparable&) noexcept = default; + friend constexpr std::strong_ordering + operator<=>(const ThreeWayComparable&, const ThreeWayComparable&) noexcept = default; + +private: + int value_; +}; +static_assert(std::equality_comparable); +static_assert(std::three_way_comparable); +static_assert(HasOperatorEqual); +static_assert(HasOperatorGreaterThan); +static_assert(HasOperatorGreaterThanEqual); +static_assert(HasOperatorLessThan); +static_assert(HasOperatorLessThanEqual); +static_assert(HasOperatorNotEqual); +static_assert(HasOperatorSpaceship); #endif // TEST_STD_VER >= 20 diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index 4e7f92dd1991a..b306b2013445c 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -4468,7 +4468,9 @@ Node *AbstractManglingParser::parseType() { return nullptr; if (!consumeIf('_')) return nullptr; - return make(Size, Signed); + // The front end expects this to be available for Substitution + Result = make(Size, Signed); + break; } // ::= Di # char32_t case 'i': diff --git a/libcxxabi/test/DemangleTestCases.inc b/libcxxabi/test/DemangleTestCases.inc index 1e3f7459deaa2..2721d2aa5504e 100644 --- a/libcxxabi/test/DemangleTestCases.inc +++ b/libcxxabi/test/DemangleTestCases.inc @@ -6,6 +6,7 @@ {"_Z1fDU10_", "f(unsigned _BitInt(10))"}, {"_Z1fIfEvDUstPT__", "void f(unsigned _BitInt(sizeof (float*)))"}, {"_Z1fIiEvDBstPT__", "void f(_BitInt(sizeof (int*)))"}, +{"_Z6myfuncRDB8_S0_", "myfunc(_BitInt(8)&, _BitInt(8)&)"}, {"_Z4testI1A1BE1Cv", "C test()"}, {"_Z4testI1A1BET0_T_S3_", "B test(A, A)"}, {"_ZN1SgtEi", "S::operator>(int)"}, diff --git a/lldb/include/lldb/Core/Debugger.h b/lldb/include/lldb/Core/Debugger.h index 9f82466a83417..2087ef2a11562 100644 --- a/lldb/include/lldb/Core/Debugger.h +++ b/lldb/include/lldb/Core/Debugger.h @@ -602,10 +602,6 @@ class Debugger : public std::enable_shared_from_this, void FlushProcessOutput(Process &process, bool flush_stdout, bool flush_stderr); - void AddProtocolServer(lldb::ProtocolServerSP protocol_server_sp); - void RemoveProtocolServer(lldb::ProtocolServerSP protocol_server_sp); - lldb::ProtocolServerSP GetProtocolServer(llvm::StringRef protocol) const; - SourceManager::SourceFileCache &GetSourceFileCache() { return m_source_file_cache; } @@ -776,8 +772,6 @@ class Debugger : public std::enable_shared_from_this, mutable std::mutex m_progress_reports_mutex; /// @} - llvm::SmallVector m_protocol_servers; - std::mutex m_destroy_callback_mutex; lldb::callback_token_t m_destroy_callback_next_token = 0; struct DestroyCallbackInfo { diff --git a/lldb/include/lldb/Core/ProtocolServer.h b/lldb/include/lldb/Core/ProtocolServer.h index fafe460904323..937256c10aec1 100644 --- a/lldb/include/lldb/Core/ProtocolServer.h +++ b/lldb/include/lldb/Core/ProtocolServer.h @@ -20,8 +20,9 @@ class ProtocolServer : public PluginInterface { ProtocolServer() = default; virtual ~ProtocolServer() = default; - static lldb::ProtocolServerSP Create(llvm::StringRef name, - Debugger &debugger); + static ProtocolServer *GetOrCreate(llvm::StringRef name); + + static std::vector GetSupportedProtocols(); struct Connection { Socket::SocketProtocol protocol; diff --git a/lldb/include/lldb/Target/MemoryTagManager.h b/lldb/include/lldb/Target/MemoryTagManager.h index 6bd4180fff703..5b7219692d77f 100644 --- a/lldb/include/lldb/Target/MemoryTagManager.h +++ b/lldb/include/lldb/Target/MemoryTagManager.h @@ -122,11 +122,15 @@ class MemoryTagManager { // // 'reader' will always be a wrapper around a CoreFile in real use // but allows testing without having to mock a CoreFile. + // + // This call will fail in the case that the core file segment does not contain + // enough data to read all the tags. typedef std::function CoreReaderFn; - std::vector virtual UnpackTagsFromCoreFileSegment( - CoreReaderFn reader, lldb::addr_t tag_segment_virtual_address, - lldb::addr_t tag_segment_data_address, lldb::addr_t addr, - size_t len) const = 0; + llvm:: + Expected> virtual UnpackTagsFromCoreFileSegment( + CoreReaderFn reader, lldb::addr_t tag_segment_virtual_address, + lldb::addr_t tag_segment_data_address, lldb::addr_t addr, + size_t len) const = 0; // Pack uncompressed tags into their storage format (e.g. for gdb QMemTags). // Checks that each tag is within the expected value range. diff --git a/lldb/include/lldb/Utility/XcodeSDK.h b/lldb/include/lldb/Utility/XcodeSDK.h index ceb8abb8c502d..a1a0ec415b90e 100644 --- a/lldb/include/lldb/Utility/XcodeSDK.h +++ b/lldb/include/lldb/Utility/XcodeSDK.h @@ -93,19 +93,6 @@ class XcodeSDK { static bool SDKSupportsModules(Type type, llvm::VersionTuple version); static bool SDKSupportsModules(Type desired_type, const FileSpec &sdk_path); - /// Returns true if the SDK for the specified triple supports - /// builtin modules in system headers. - /// - /// NOTE: should be kept in sync with sdkSupportsBuiltinModules in - /// Toolchains/Darwin.cpp - /// - /// FIXME: this function will be removed once LLDB's ClangExpressionParser - /// constructs the compiler instance through the driver/toolchain. See \ref - /// SetupImportStdModuleLangOpts - /// - static bool SDKSupportsBuiltinModules(const llvm::Triple &target_triple, - llvm::VersionTuple sdk_version); - /// Return the canonical SDK name, such as "macosx" for the macOS SDK. static std::string GetCanonicalName(Info info); /// Return the best-matching SDK type for a specific triple. diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index 558818e8e2309..2bc85a2d2afa6 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -391,7 +391,7 @@ typedef std::shared_ptr PlatformSP; typedef std::shared_ptr ProcessSP; typedef std::shared_ptr ProcessAttachInfoSP; typedef std::shared_ptr ProcessLaunchInfoSP; -typedef std::shared_ptr ProtocolServerSP; +typedef std::unique_ptr ProtocolServerUP; typedef std::weak_ptr ProcessWP; typedef std::shared_ptr RegisterCheckpointSP; typedef std::shared_ptr RegisterContextSP; diff --git a/lldb/include/lldb/lldb-private-interfaces.h b/lldb/include/lldb/lldb-private-interfaces.h index 34eaaa8e581e9..249b25c251ac2 100644 --- a/lldb/include/lldb/lldb-private-interfaces.h +++ b/lldb/include/lldb/lldb-private-interfaces.h @@ -81,8 +81,7 @@ typedef lldb::PlatformSP (*PlatformCreateInstance)(bool force, typedef lldb::ProcessSP (*ProcessCreateInstance)( lldb::TargetSP target_sp, lldb::ListenerSP listener_sp, const FileSpec *crash_file_path, bool can_connect); -typedef lldb::ProtocolServerSP (*ProtocolServerCreateInstance)( - Debugger &debugger); +typedef lldb::ProtocolServerUP (*ProtocolServerCreateInstance)(); typedef lldb::RegisterTypeBuilderSP (*RegisterTypeBuilderCreateInstance)( Target &target); typedef lldb::ScriptInterpreterSP (*ScriptInterpreterCreateInstance)( diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py index 753de22b9cfee..b603c35c8df09 100644 --- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py @@ -92,6 +92,9 @@ class MockGDBServerResponder: class RESPONSE_DISCONNECT: pass + class RESPONSE_NONE: + pass + def __init__(self): self.packetLog = [] @@ -181,6 +184,8 @@ def respond(self, packet): return self.qQueryGDBServer() if packet == "qHostInfo": return self.qHostInfo() + if packet.startswith("qEcho"): + return self.qEcho(int(packet.split(":")[1])) if packet == "qGetWorkingDir": return self.qGetWorkingDir() if packet == "qOffsets": @@ -237,6 +242,9 @@ def qProcessInfo(self): def qHostInfo(self): return "ptrsize:8;endian:little;" + def qEcho(self): + return "E04" + def qQueryGDBServer(self): return "E04" @@ -655,6 +663,8 @@ def _handlePacket(self, packet): if not isinstance(response, list): response = [response] for part in response: + if part is MockGDBServerResponder.RESPONSE_NONE: + continue if part is MockGDBServerResponder.RESPONSE_DISCONNECT: raise self.TerminateConnectionException() self._sendPacket(part) diff --git a/lldb/source/Commands/CommandObjectProtocolServer.cpp b/lldb/source/Commands/CommandObjectProtocolServer.cpp index 115754769f3e3..55bd42ed1a533 100644 --- a/lldb/source/Commands/CommandObjectProtocolServer.cpp +++ b/lldb/source/Commands/CommandObjectProtocolServer.cpp @@ -23,20 +23,6 @@ using namespace lldb_private; #define LLDB_OPTIONS_mcp #include "CommandOptions.inc" -static std::vector GetSupportedProtocols() { - std::vector supported_protocols; - size_t i = 0; - - for (llvm::StringRef protocol_name = - PluginManager::GetProtocolServerPluginNameAtIndex(i++); - !protocol_name.empty(); - protocol_name = PluginManager::GetProtocolServerPluginNameAtIndex(i++)) { - supported_protocols.push_back(protocol_name); - } - - return supported_protocols; -} - class CommandObjectProtocolServerStart : public CommandObjectParsed { public: CommandObjectProtocolServerStart(CommandInterpreter &interpreter) @@ -57,12 +43,11 @@ class CommandObjectProtocolServerStart : public CommandObjectParsed { } llvm::StringRef protocol = args.GetArgumentAtIndex(0); - std::vector supported_protocols = GetSupportedProtocols(); - if (llvm::find(supported_protocols, protocol) == - supported_protocols.end()) { + ProtocolServer *server = ProtocolServer::GetOrCreate(protocol); + if (!server) { result.AppendErrorWithFormatv( "unsupported protocol: {0}. Supported protocols are: {1}", protocol, - llvm::join(GetSupportedProtocols(), ", ")); + llvm::join(ProtocolServer::GetSupportedProtocols(), ", ")); return; } @@ -72,10 +57,6 @@ class CommandObjectProtocolServerStart : public CommandObjectParsed { } llvm::StringRef connection_uri = args.GetArgumentAtIndex(1); - ProtocolServerSP server_sp = GetDebugger().GetProtocolServer(protocol); - if (!server_sp) - server_sp = ProtocolServer::Create(protocol, GetDebugger()); - const char *connection_error = "unsupported connection specifier, expected 'accept:///path' or " "'listen://[host]:port', got '{0}'."; @@ -98,14 +79,12 @@ class CommandObjectProtocolServerStart : public CommandObjectParsed { formatv("[{0}]:{1}", uri->hostname.empty() ? "0.0.0.0" : uri->hostname, uri->port.value_or(0)); - if (llvm::Error error = server_sp->Start(connection)) { + if (llvm::Error error = server->Start(connection)) { result.AppendErrorWithFormatv("{0}", llvm::fmt_consume(std::move(error))); return; } - GetDebugger().AddProtocolServer(server_sp); - - if (Socket *socket = server_sp->GetSocket()) { + if (Socket *socket = server->GetSocket()) { std::string address = llvm::join(socket->GetListeningConnectionURI(), ", "); result.AppendMessageWithFormatv( @@ -134,30 +113,18 @@ class CommandObjectProtocolServerStop : public CommandObjectParsed { } llvm::StringRef protocol = args.GetArgumentAtIndex(0); - std::vector supported_protocols = GetSupportedProtocols(); - if (llvm::find(supported_protocols, protocol) == - supported_protocols.end()) { + ProtocolServer *server = ProtocolServer::GetOrCreate(protocol); + if (!server) { result.AppendErrorWithFormatv( "unsupported protocol: {0}. Supported protocols are: {1}", protocol, - llvm::join(GetSupportedProtocols(), ", ")); + llvm::join(ProtocolServer::GetSupportedProtocols(), ", ")); return; } - Debugger &debugger = GetDebugger(); - - ProtocolServerSP server_sp = debugger.GetProtocolServer(protocol); - if (!server_sp) { - result.AppendError( - llvm::formatv("no {0} protocol server running", protocol).str()); - return; - } - - if (llvm::Error error = server_sp->Stop()) { + if (llvm::Error error = server->Stop()) { result.AppendErrorWithFormatv("{0}", llvm::fmt_consume(std::move(error))); return; } - - debugger.RemoveProtocolServer(server_sp); } }; diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp index 33d1053fd8a65..445baf1f63785 100644 --- a/lldb/source/Core/Debugger.cpp +++ b/lldb/source/Core/Debugger.cpp @@ -2376,26 +2376,3 @@ llvm::ThreadPoolInterface &Debugger::GetThreadPool() { "Debugger::GetThreadPool called before Debugger::Initialize"); return *g_thread_pool; } - -void Debugger::AddProtocolServer(lldb::ProtocolServerSP protocol_server_sp) { - assert(protocol_server_sp && - GetProtocolServer(protocol_server_sp->GetPluginName()) == nullptr); - m_protocol_servers.push_back(protocol_server_sp); -} - -void Debugger::RemoveProtocolServer(lldb::ProtocolServerSP protocol_server_sp) { - auto it = llvm::find(m_protocol_servers, protocol_server_sp); - if (it != m_protocol_servers.end()) - m_protocol_servers.erase(it); -} - -lldb::ProtocolServerSP -Debugger::GetProtocolServer(llvm::StringRef protocol) const { - for (ProtocolServerSP protocol_server_sp : m_protocol_servers) { - if (!protocol_server_sp) - continue; - if (protocol_server_sp->GetPluginName() == protocol) - return protocol_server_sp; - } - return nullptr; -} diff --git a/lldb/source/Core/ProtocolServer.cpp b/lldb/source/Core/ProtocolServer.cpp index d57a047afa7b2..41636cdacdecc 100644 --- a/lldb/source/Core/ProtocolServer.cpp +++ b/lldb/source/Core/ProtocolServer.cpp @@ -12,10 +12,36 @@ using namespace lldb_private; using namespace lldb; -ProtocolServerSP ProtocolServer::Create(llvm::StringRef name, - Debugger &debugger) { +ProtocolServer *ProtocolServer::GetOrCreate(llvm::StringRef name) { + static std::mutex g_mutex; + static llvm::StringMap g_protocol_server_instances; + + std::lock_guard guard(g_mutex); + + auto it = g_protocol_server_instances.find(name); + if (it != g_protocol_server_instances.end()) + return it->second.get(); + if (ProtocolServerCreateInstance create_callback = - PluginManager::GetProtocolCreateCallbackForPluginName(name)) - return create_callback(debugger); + PluginManager::GetProtocolCreateCallbackForPluginName(name)) { + auto pair = + g_protocol_server_instances.try_emplace(name, create_callback()); + return pair.first->second.get(); + } + return nullptr; } + +std::vector ProtocolServer::GetSupportedProtocols() { + std::vector supported_protocols; + size_t i = 0; + + for (llvm::StringRef protocol_name = + PluginManager::GetProtocolServerPluginNameAtIndex(i++); + !protocol_name.empty(); + protocol_name = PluginManager::GetProtocolServerPluginNameAtIndex(i++)) { + supported_protocols.push_back(protocol_name); + } + + return supported_protocols; +} diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index 7aa9cae5a5614..ffc76e6e93498 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -319,49 +319,6 @@ class ClangDiagnosticManagerAdapter : public clang::DiagnosticConsumer { StringRef m_filename; }; -/// Returns true if the SDK for the specified triple supports -/// builtin modules in system headers. This is used to decide -/// whether to pass -fbuiltin-headers-in-system-modules to -/// the compiler instance when compiling the `std` module. -static llvm::Expected -sdkSupportsBuiltinModules(lldb_private::Target &target) { - auto arch_spec = target.GetArchitecture(); - auto const &triple = arch_spec.GetTriple(); - auto module_sp = target.GetExecutableModule(); - if (!module_sp) - return llvm::createStringError("Executable module not found."); - - // Get SDK path that the target was compiled against. - auto platform_sp = target.GetPlatform(); - if (!platform_sp) - return llvm::createStringError("No Platform plugin found on target."); - - auto sdk_or_err = platform_sp->GetSDKPathFromDebugInfo(*module_sp); - if (!sdk_or_err) - return sdk_or_err.takeError(); - - // Use the SDK path from debug-info to find a local matching SDK directory. - auto sdk_path_or_err = - HostInfo::GetSDKRoot(HostInfo::SDKOptions{std::move(sdk_or_err->first)}); - if (!sdk_path_or_err) - return sdk_path_or_err.takeError(); - - auto VFS = FileSystem::Instance().GetVirtualFileSystem(); - if (!VFS) - return llvm::createStringError("No virtual filesystem available."); - - // Extract SDK version from the /path/to/some.sdk/SDKSettings.json - auto parsed_or_err = clang::parseDarwinSDKInfo(*VFS, *sdk_path_or_err); - if (!parsed_or_err) - return parsed_or_err.takeError(); - - auto maybe_sdk = *parsed_or_err; - if (!maybe_sdk) - return llvm::createStringError("Couldn't find Darwin SDK info."); - - return XcodeSDK::SDKSupportsBuiltinModules(triple, maybe_sdk->getVersion()); -} - static void SetupModuleHeaderPaths(CompilerInstance *compiler, std::vector include_directories, lldb::TargetSP target_sp) { @@ -705,7 +662,6 @@ static void SetupLangOpts(CompilerInstance &compiler, static void SetupImportStdModuleLangOpts(CompilerInstance &compiler, lldb_private::Target &target) { - Log *log = GetLog(LLDBLog::Expressions); LangOptions &lang_opts = compiler.getLangOpts(); lang_opts.Modules = true; // We want to implicitly build modules. @@ -723,12 +679,7 @@ static void SetupImportStdModuleLangOpts(CompilerInstance &compiler, lang_opts.GNUKeywords = true; lang_opts.CPlusPlus11 = true; - if (auto supported_or_err = sdkSupportsBuiltinModules(target)) - lang_opts.BuiltinHeadersInSystemModules = !*supported_or_err; - else - LLDB_LOG_ERROR(log, supported_or_err.takeError(), - "Failed to determine BuiltinHeadersInSystemModules when " - "setting up import-std-module: {0}"); + lang_opts.BuiltinHeadersInSystemModules = false; // The Darwin libc expects this macro to be set. lang_opts.GNUCVersion = 40201; diff --git a/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.cpp b/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.cpp index 7e25bc4ea2a28..9f60675e51904 100644 --- a/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.cpp +++ b/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.cpp @@ -247,7 +247,7 @@ MemoryTagManagerAArch64MTE::UnpackTagsData(const std::vector &tags, return unpacked; } -std::vector +llvm::Expected> MemoryTagManagerAArch64MTE::UnpackTagsFromCoreFileSegment( CoreReaderFn reader, lldb::addr_t tag_segment_virtual_address, lldb::addr_t tag_segment_data_address, lldb::addr_t addr, @@ -290,8 +290,12 @@ MemoryTagManagerAArch64MTE::UnpackTagsFromCoreFileSegment( const size_t bytes_copied = reader(tag_segment_data_address + file_offset_in_bytes, tag_bytes_to_read, tag_data.data()); - UNUSED_IF_ASSERT_DISABLED(bytes_copied); - assert(bytes_copied == tag_bytes_to_read); + if (bytes_copied != tag_bytes_to_read) { + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + "Could not read tags from core file segment. Segment " + "is missing some or all tag data."); + } std::vector tags; tags.reserve(2 * tag_data.size()); diff --git a/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.h b/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.h index 365e176e5b1da..79d24ce78ecee 100644 --- a/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.h +++ b/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.h @@ -44,7 +44,7 @@ class MemoryTagManagerAArch64MTE : public MemoryTagManager { UnpackTagsData(const std::vector &tags, size_t granules = 0) const override; - std::vector + llvm::Expected> UnpackTagsFromCoreFileSegment(CoreReaderFn reader, lldb::addr_t tag_segment_virtual_address, lldb::addr_t tag_segment_data_address, diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp index 394b62559da76..406fa06ea011a 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp @@ -180,7 +180,7 @@ bool GDBRemoteClientBase::Interrupt(std::chrono::seconds interrupt_timeout) { GDBRemoteCommunication::PacketResult GDBRemoteClientBase::SendPacketAndWaitForResponse( llvm::StringRef payload, StringExtractorGDBRemote &response, - std::chrono::seconds interrupt_timeout) { + std::chrono::seconds interrupt_timeout, bool sync_on_timeout) { Lock lock(*this, interrupt_timeout); if (!lock) { if (Log *log = GetLog(GDBRLog::Process)) @@ -191,7 +191,7 @@ GDBRemoteClientBase::SendPacketAndWaitForResponse( return PacketResult::ErrorSendFailed; } - return SendPacketAndWaitForResponseNoLock(payload, response); + return SendPacketAndWaitForResponseNoLock(payload, response, sync_on_timeout); } GDBRemoteCommunication::PacketResult @@ -236,14 +236,15 @@ GDBRemoteClientBase::SendPacketAndReceiveResponseWithOutputSupport( GDBRemoteCommunication::PacketResult GDBRemoteClientBase::SendPacketAndWaitForResponseNoLock( - llvm::StringRef payload, StringExtractorGDBRemote &response) { + llvm::StringRef payload, StringExtractorGDBRemote &response, + bool sync_on_timeout) { PacketResult packet_result = SendPacketNoLock(payload); if (packet_result != PacketResult::Success) return packet_result; const size_t max_response_retries = 3; for (size_t i = 0; i < max_response_retries; ++i) { - packet_result = ReadPacket(response, GetPacketTimeout(), true); + packet_result = ReadPacket(response, GetPacketTimeout(), sync_on_timeout); // Make sure we received a response if (packet_result != PacketResult::Success) return packet_result; diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h index af2abdf4da5cf..9c17a8c1de057 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h @@ -61,7 +61,8 @@ class GDBRemoteClientBase : public GDBRemoteCommunication, public Broadcaster { // ErrorReplyTimeout. PacketResult SendPacketAndWaitForResponse( llvm::StringRef payload, StringExtractorGDBRemote &response, - std::chrono::seconds interrupt_timeout = std::chrono::seconds(0)); + std::chrono::seconds interrupt_timeout = std::chrono::seconds(0), + bool sync_on_timeout = true); PacketResult ReadPacketWithOutputSupport( StringExtractorGDBRemote &response, Timeout timeout, @@ -104,7 +105,8 @@ class GDBRemoteClientBase : public GDBRemoteCommunication, public Broadcaster { protected: PacketResult SendPacketAndWaitForResponseNoLock(llvm::StringRef payload, - StringExtractorGDBRemote &response); + StringExtractorGDBRemote &response, + bool sync_on_timeout = true); virtual void OnRunPacketSent(bool first); diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp index 0d3ead840b080..2ca7099544bcc 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #if defined(__APPLE__) #define DEBUGSERVER_BASENAME "debugserver" @@ -354,8 +355,9 @@ GDBRemoteCommunication::WaitForPacketNoLock(StringExtractorGDBRemote &packet, disconnected = true; Disconnect(); } + } else { + timed_out = true; } - timed_out = true; break; case eConnectionStatusSuccess: // printf ("status = success but error = %s\n", @@ -894,11 +896,9 @@ FileSpec GDBRemoteCommunication::GetDebugserverPath(Platform *platform) { } Status GDBRemoteCommunication::StartDebugserverProcess( - const char *url, Platform *platform, ProcessLaunchInfo &launch_info, - uint16_t *port, const Args *inferior_args, shared_fd_t pass_comm_fd) { + std::variant comm, Platform *platform, + ProcessLaunchInfo &launch_info, const Args *inferior_args) { Log *log = GetLog(GDBRLog::Process); - LLDB_LOG(log, "Starting debug server: url={0}, port={1}", - url ? url : "", port ? *port : uint16_t(0)); FileSpec debugserver_file_spec = GetDebugserverPath(platform); if (!debugserver_file_spec) @@ -911,89 +911,58 @@ Status GDBRemoteCommunication::StartDebugserverProcess( #if !defined(__APPLE__) // First argument to lldb-server must be mode in which to run. - debugserver_args.AppendArgument(llvm::StringRef("gdbserver")); + debugserver_args.AppendArgument("gdbserver"); #endif - // If a url is supplied then use it - if (url && url[0]) - debugserver_args.AppendArgument(llvm::StringRef(url)); - - if (pass_comm_fd != SharedSocket::kInvalidFD) { - StreamString fd_arg; - fd_arg.Printf("--fd=%" PRIi64, (int64_t)pass_comm_fd); - debugserver_args.AppendArgument(fd_arg.GetString()); - // Send "pass_comm_fd" down to the inferior so it can use it to - // communicate back with this process. Ignored on Windows. - launch_info.AppendDuplicateFileAction((int64_t)pass_comm_fd, - (int64_t)pass_comm_fd); - } - // use native registers, not the GDB registers - debugserver_args.AppendArgument(llvm::StringRef("--native-regs")); + debugserver_args.AppendArgument("--native-regs"); if (launch_info.GetLaunchInSeparateProcessGroup()) - debugserver_args.AppendArgument(llvm::StringRef("--setsid")); + debugserver_args.AppendArgument("--setsid"); llvm::SmallString<128> named_pipe_path; // socket_pipe is used by debug server to communicate back either - // TCP port or domain socket name which it listens on. - // The second purpose of the pipe to serve as a synchronization point - + // TCP port or domain socket name which it listens on. However, we're not + // interested in the actualy value here. + // The only reason for using the pipe is to serve as a synchronization point - // once data is written to the pipe, debug server is up and running. Pipe socket_pipe; - std::unique_ptr sock_up; + // If a url is supplied then use it + if (shared_fd_t *comm_fd = std::get_if(&comm)) { + LLDB_LOG(log, "debugserver communicates over fd {0}", comm_fd); + assert(*comm_fd != SharedSocket::kInvalidFD); + debugserver_args.AppendArgument(llvm::formatv("--fd={0}", *comm_fd).str()); + // Send "comm_fd" down to the inferior so it can use it to communicate back + // with this process. + launch_info.AppendDuplicateFileAction((int64_t)*comm_fd, (int64_t)*comm_fd); + } else { + llvm::StringRef url = std::get(comm); + LLDB_LOG(log, "debugserver listens on: {0}", url); + debugserver_args.AppendArgument(url); - // port is null when debug server should listen on domain socket - we're - // not interested in port value but rather waiting for debug server to - // become available. - if (pass_comm_fd == SharedSocket::kInvalidFD) { - if (url) { -// Create a temporary file to get the stdout/stderr and redirect the output of -// the command into this file. We will later read this file if all goes well -// and fill the data into "command_output_ptr" #if defined(__APPLE__) - // Binding to port zero, we need to figure out what port it ends up - // using using a named pipe... - Status error = socket_pipe.CreateWithUniqueName("debugserver-named-pipe", - false, named_pipe_path); - if (error.Fail()) { - LLDB_LOG(log, "named pipe creation failed: {0}", error); - return error; - } - debugserver_args.AppendArgument(llvm::StringRef("--named-pipe")); - debugserver_args.AppendArgument(named_pipe_path); + // Using a named pipe as debugserver does not support --pipe. + Status error = socket_pipe.CreateWithUniqueName("debugserver-named-pipe", + false, named_pipe_path); + if (error.Fail()) { + LLDB_LOG(log, "named pipe creation failed: {0}", error); + return error; + } + debugserver_args.AppendArgument(llvm::StringRef("--named-pipe")); + debugserver_args.AppendArgument(named_pipe_path); #else - // Binding to port zero, we need to figure out what port it ends up - // using using an unnamed pipe... - Status error = socket_pipe.CreateNew(true); - if (error.Fail()) { - LLDB_LOG(log, "unnamed pipe creation failed: {0}", error); - return error; - } - pipe_t write = socket_pipe.GetWritePipe(); - debugserver_args.AppendArgument(llvm::StringRef("--pipe")); - debugserver_args.AppendArgument(llvm::to_string(write)); - launch_info.AppendCloseFileAction(socket_pipe.GetReadFileDescriptor()); -#endif - } else { - // No host and port given, so lets listen on our end and make the - // debugserver connect to us.. - if (llvm::Expected> expected_sock = - Socket::TcpListen("127.0.0.1:0")) - sock_up = std::move(*expected_sock); - else - return Status::FromError(expected_sock.takeError()); - - uint16_t port_ = sock_up->GetLocalPortNumber(); - // Send the host and port down that debugserver and specify an option - // so that it connects back to the port we are listening to in this - // process - debugserver_args.AppendArgument(llvm::StringRef("--reverse-connect")); - debugserver_args.AppendArgument( - llvm::formatv("127.0.0.1:{0}", port_).str()); - if (port) - *port = port_; + // Using an unnamed pipe as it's simpler. + Status error = socket_pipe.CreateNew(true); + if (error.Fail()) { + LLDB_LOG(log, "unnamed pipe creation failed: {0}", error); + return error; } + pipe_t write = socket_pipe.GetWritePipe(); + debugserver_args.AppendArgument(llvm::StringRef("--pipe")); + debugserver_args.AppendArgument(llvm::to_string(write)); + launch_info.AppendCloseFileAction(socket_pipe.GetReadFileDescriptor()); +#endif } Environment host_env = Host::GetEnvironment(); @@ -1070,7 +1039,7 @@ Status GDBRemoteCommunication::StartDebugserverProcess( return error; } - if (pass_comm_fd != SharedSocket::kInvalidFD) + if (std::holds_alternative(comm)) return Status(); Status error; @@ -1084,55 +1053,30 @@ Status GDBRemoteCommunication::StartDebugserverProcess( if (socket_pipe.CanWrite()) socket_pipe.CloseWriteFileDescriptor(); - if (socket_pipe.CanRead()) { - // Read port from pipe with 10 second timeout. - std::string port_str; - while (error.Success()) { - char buf[10]; - if (llvm::Expected num_bytes = - socket_pipe.Read(buf, std::size(buf), std::chrono::seconds(10))) { - if (*num_bytes == 0) - break; - port_str.append(buf, *num_bytes); - } else { - error = Status::FromError(num_bytes.takeError()); - } - } - if (error.Success() && (port != nullptr)) { - // NB: Deliberately using .c_str() to stop at embedded '\0's - llvm::StringRef port_ref = port_str.c_str(); - uint16_t child_port = 0; - // FIXME: improve error handling - llvm::to_integer(port_ref, child_port); - if (*port == 0 || *port == child_port) { - *port = child_port; - LLDB_LOG(log, "debugserver listens on port {0}", *port); - } else { - LLDB_LOG(log, - "debugserver listening on port {0} but requested port was {1}", - child_port, (*port)); - } + assert(socket_pipe.CanRead()); + + // Read data from the pipe -- and ignore it (see comment above). + while (error.Success()) { + char buf[10]; + if (llvm::Expected num_bytes = + socket_pipe.Read(buf, std::size(buf), std::chrono::seconds(10))) { + if (*num_bytes == 0) + break; } else { - LLDB_LOG(log, "failed to read a port value from pipe {0}: {1}", - named_pipe_path, error); + error = Status::FromError(num_bytes.takeError()); } - socket_pipe.Close(); } + if (error.Fail()) { + LLDB_LOG(log, "failed to synchronize on pipe {0}: {1}", named_pipe_path, + error); + } + socket_pipe.Close(); if (named_pipe_path.size() > 0) { if (Status err = socket_pipe.Delete(named_pipe_path); err.Fail()) LLDB_LOG(log, "failed to delete pipe {0}: {1}", named_pipe_path, err); } - if (error.Success() && sock_up) { - Socket *accepted_socket = nullptr; - error = sock_up->Accept(/*timeout=*/std::nullopt, accepted_socket); - if (accepted_socket) { - SetConnection(std::make_unique( - std::unique_ptr(accepted_socket))); - } - } - return error; } diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h index fc86f801f0d8a..31f8edf715a3a 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h @@ -135,17 +135,15 @@ class GDBRemoteCommunication : public Communication { std::chrono::seconds GetPacketTimeout() const { return m_packet_timeout; } // Get the debugserver path and check that it exist. - FileSpec GetDebugserverPath(Platform *platform); + static FileSpec GetDebugserverPath(Platform *platform); // Start a debugserver instance on the current host using the // supplied connection URL. - Status StartDebugserverProcess( - const char *url, + static Status StartDebugserverProcess( + std::variant comm, Platform *platform, // If non nullptr, then check with the platform for // the GDB server binary if it can't be located - ProcessLaunchInfo &launch_info, uint16_t *port, const Args *inferior_args, - shared_fd_t pass_comm_fd); // Communication file descriptor to pass during - // fork/exec to avoid having to connect/accept + ProcessLaunchInfo &launch_info, const Args *inferior_args); void DumpHistory(Stream &strm); diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index adbf06b9a19a0..7d2bd452acca9 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -406,7 +406,7 @@ void GDBRemoteCommunicationClient::GetRemoteQSupported() { m_supports_qXfer_memory_map_read = eLazyBoolYes; else if (x == "qXfer:siginfo:read+") m_supports_qXfer_siginfo_read = eLazyBoolYes; - else if (x == "qEcho") + else if (x == "qEcho+") m_supports_qEcho = eLazyBoolYes; else if (x == "QPassSignals+") m_supports_QPassSignals = eLazyBoolYes; @@ -4358,7 +4358,9 @@ llvm::Expected GDBRemoteCommunicationClient::KillProcess(lldb::pid_t pid) { StringExtractorGDBRemote response; GDBRemoteCommunication::ScopedTimeout(*this, seconds(3)); - if (SendPacketAndWaitForResponse("k", response, GetPacketTimeout()) != + // LLDB server typically sends no response for "k", so we shouldn't try + // to sync on timeout. + if (SendPacketAndWaitForResponse("k", response, GetPacketTimeout(), false) != PacketResult::Success) return llvm::createStringError(llvm::inconvertibleErrorCode(), "failed to send k packet"); diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp index 89fdfa74bc025..7506cf64def38 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp @@ -94,7 +94,16 @@ GDBRemoteCommunicationServerPlatform::~GDBRemoteCommunicationServerPlatform() = Status GDBRemoteCommunicationServerPlatform::LaunchGDBServer( const lldb_private::Args &args, lldb::pid_t &pid, std::string &socket_name, shared_fd_t fd) { - std::ostringstream url; + Log *log = GetLog(LLDBLog::Platform); + + ProcessLaunchInfo debugserver_launch_info; + // Do not run in a new session so that it can not linger after the platform + // closes. + debugserver_launch_info.SetLaunchInSeparateProcessGroup(false); + debugserver_launch_info.SetMonitorProcessCallback( + [](lldb::pid_t, int, int) {}); + + Status error; if (fd == SharedSocket::kInvalidFD) { if (m_socket_protocol == Socket::ProtocolTcp) { // Just check that GDBServer exists. GDBServer must be launched after @@ -104,31 +113,22 @@ Status GDBRemoteCommunicationServerPlatform::LaunchGDBServer( return Status(); } + std::ostringstream url; // debugserver does not accept the URL scheme prefix. #if !defined(__APPLE__) url << Socket::FindSchemeByProtocol(m_socket_protocol) << "://"; #endif socket_name = GetDomainSocketPath("gdbserver").GetPath(); url << socket_name; + error = StartDebugserverProcess(url.str(), nullptr, debugserver_launch_info, + &args); } else { if (m_socket_protocol != Socket::ProtocolTcp) return Status::FromErrorString("protocol must be tcp"); + error = + StartDebugserverProcess(fd, nullptr, debugserver_launch_info, &args); } - // Spawn a debugserver and try to get the port it listens to. - ProcessLaunchInfo debugserver_launch_info; - Log *log = GetLog(LLDBLog::Platform); - LLDB_LOG(log, "Launching debugserver url='{0}', fd={1}...", url.str(), fd); - - // Do not run in a new session so that it can not linger after the platform - // closes. - debugserver_launch_info.SetLaunchInSeparateProcessGroup(false); - debugserver_launch_info.SetMonitorProcessCallback( - [](lldb::pid_t, int, int) {}); - - Status error = StartDebugserverProcess( - url.str().c_str(), nullptr, debugserver_launch_info, nullptr, &args, fd); - if (error.Success()) { pid = debugserver_launch_info.GetProcessID(); AddSpawnedProcess(pid); diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 4e70fe8ac1595..3f9c4ddc60a25 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -3494,9 +3494,9 @@ Status ProcessGDBRemote::LaunchAndConnectToDebugserver( if (error.Fail()) return error; - error = m_gdb_comm.StartDebugserverProcess( - nullptr, GetTarget().GetPlatform().get(), debugserver_launch_info, - nullptr, nullptr, shared_socket.GetSendableFD()); + error = m_gdb_comm.StartDebugserverProcess(shared_socket.GetSendableFD(), + GetTarget().GetPlatform().get(), + debugserver_launch_info, nullptr); if (error.Fail()) { Log *log = GetLog(GDBRLog::Process); diff --git a/lldb/source/Plugins/Protocol/MCP/Protocol.h b/lldb/source/Plugins/Protocol/MCP/Protocol.h index e315899406573..cb790dc4e5596 100644 --- a/lldb/source/Plugins/Protocol/MCP/Protocol.h +++ b/lldb/source/Plugins/Protocol/MCP/Protocol.h @@ -123,6 +123,8 @@ using Message = std::variant; bool fromJSON(const llvm::json::Value &, Message &, llvm::json::Path); llvm::json::Value toJSON(const Message &); +using ToolArguments = std::variant; + } // namespace lldb_private::mcp::protocol #endif diff --git a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp index c3cd9a88c20bf..3180341b50b91 100644 --- a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp +++ b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp @@ -24,8 +24,7 @@ LLDB_PLUGIN_DEFINE(ProtocolServerMCP) static constexpr size_t kChunkSize = 1024; -ProtocolServerMCP::ProtocolServerMCP(Debugger &debugger) - : ProtocolServer(), m_debugger(debugger) { +ProtocolServerMCP::ProtocolServerMCP() : ProtocolServer() { AddRequestHandler("initialize", std::bind(&ProtocolServerMCP::InitializeHandler, this, std::placeholders::_1)); @@ -39,8 +38,10 @@ ProtocolServerMCP::ProtocolServerMCP(Debugger &debugger) "notifications/initialized", [](const protocol::Notification &) { LLDB_LOG(GetLog(LLDBLog::Host), "MCP initialization complete"); }); - AddTool(std::make_unique( - "lldb_command", "Run an lldb command.", m_debugger)); + AddTool( + std::make_unique("lldb_command", "Run an lldb command.")); + AddTool(std::make_unique( + "lldb_debugger_list", "List debugger instances with their debugger_id.")); } ProtocolServerMCP::~ProtocolServerMCP() { llvm::consumeError(Stop()); } @@ -54,8 +55,8 @@ void ProtocolServerMCP::Terminate() { PluginManager::UnregisterPlugin(CreateInstance); } -lldb::ProtocolServerSP ProtocolServerMCP::CreateInstance(Debugger &debugger) { - return std::make_shared(debugger); +lldb::ProtocolServerUP ProtocolServerMCP::CreateInstance() { + return std::make_unique(); } llvm::StringRef ProtocolServerMCP::GetPluginDescriptionStatic() { @@ -145,7 +146,7 @@ llvm::Error ProtocolServerMCP::Start(ProtocolServer::Connection connection) { std::lock_guard guard(m_server_mutex); if (m_running) - return llvm::createStringError("server already running"); + return llvm::createStringError("the MCP server is already running"); Status status; m_listener = Socket::Create(connection.protocol, status); @@ -162,10 +163,10 @@ llvm::Error ProtocolServerMCP::Start(ProtocolServer::Connection connection) { if (llvm::Error error = handles.takeError()) return error; + m_running = true; m_listen_handlers = std::move(*handles); m_loop_thread = std::thread([=] { - llvm::set_thread_name( - llvm::formatv("debugger-{0}.mcp.runloop", m_debugger.GetID())); + llvm::set_thread_name("protocol-server.mcp"); m_loop.Run(); }); @@ -175,6 +176,8 @@ llvm::Error ProtocolServerMCP::Start(ProtocolServer::Connection connection) { llvm::Error ProtocolServerMCP::Stop() { { std::lock_guard guard(m_server_mutex); + if (!m_running) + return createStringError("the MCP sever is not running"); m_running = false; } @@ -311,11 +314,12 @@ ProtocolServerMCP::ToolsCallHandler(const protocol::Request &request) { if (it == m_tools.end()) return llvm::createStringError(llvm::formatv("no tool \"{0}\"", tool_name)); - const json::Value *args = param_obj->get("arguments"); - if (!args) - return llvm::createStringError("no tool arguments"); + protocol::ToolArguments tool_args; + if (const json::Value *args = param_obj->get("arguments")) + tool_args = *args; - llvm::Expected text_result = it->second->Call(*args); + llvm::Expected text_result = + it->second->Call(tool_args); if (!text_result) return text_result.takeError(); diff --git a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h index 52bb92a04a802..d55882cc8ab09 100644 --- a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h +++ b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h @@ -21,7 +21,7 @@ namespace lldb_private::mcp { class ProtocolServerMCP : public ProtocolServer { public: - ProtocolServerMCP(Debugger &debugger); + ProtocolServerMCP(); virtual ~ProtocolServerMCP() override; virtual llvm::Error Start(ProtocolServer::Connection connection) override; @@ -33,7 +33,7 @@ class ProtocolServerMCP : public ProtocolServer { static llvm::StringRef GetPluginNameStatic() { return "MCP"; } static llvm::StringRef GetPluginDescriptionStatic(); - static lldb::ProtocolServerSP CreateInstance(Debugger &debugger); + static lldb::ProtocolServerUP CreateInstance(); llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } @@ -71,8 +71,6 @@ class ProtocolServerMCP : public ProtocolServer { llvm::StringLiteral kName = "lldb-mcp"; llvm::StringLiteral kVersion = "0.1.0"; - Debugger &m_debugger; - bool m_running = false; MainLoop m_loop; diff --git a/lldb/source/Plugins/Protocol/MCP/Tool.cpp b/lldb/source/Plugins/Protocol/MCP/Tool.cpp index de8fcc8f3cb4c..5c4626cf66b32 100644 --- a/lldb/source/Plugins/Protocol/MCP/Tool.cpp +++ b/lldb/source/Plugins/Protocol/MCP/Tool.cpp @@ -7,22 +7,38 @@ //===----------------------------------------------------------------------===// #include "Tool.h" +#include "lldb/Core/Module.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandReturnObject.h" using namespace lldb_private::mcp; using namespace llvm; -struct LLDBCommandToolArguments { +namespace { +struct CommandToolArguments { + uint64_t debugger_id; std::string arguments; }; -bool fromJSON(const llvm::json::Value &V, LLDBCommandToolArguments &A, +bool fromJSON(const llvm::json::Value &V, CommandToolArguments &A, llvm::json::Path P) { llvm::json::ObjectMapper O(V, P); - return O && O.map("arguments", A.arguments); + return O && O.map("debugger_id", A.debugger_id) && + O.mapOptional("arguments", A.arguments); } +/// Helper function to create a TextResult from a string output. +static lldb_private::mcp::protocol::TextResult +createTextResult(std::string output, bool is_error = false) { + lldb_private::mcp::protocol::TextResult text_result; + text_result.content.emplace_back( + lldb_private::mcp::protocol::TextContent{{std::move(output)}}); + text_result.isError = is_error; + return text_result; +} + +} // namespace + Tool::Tool(std::string name, std::string description) : m_name(std::move(name)), m_description(std::move(description)) {} @@ -37,22 +53,27 @@ protocol::ToolDefinition Tool::GetDefinition() const { return definition; } -LLDBCommandTool::LLDBCommandTool(std::string name, std::string description, - Debugger &debugger) - : Tool(std::move(name), std::move(description)), m_debugger(debugger) {} - llvm::Expected -LLDBCommandTool::Call(const llvm::json::Value &args) { - llvm::json::Path::Root root; +CommandTool::Call(const protocol::ToolArguments &args) { + if (!std::holds_alternative(args)) + return createStringError("CommandTool requires arguments"); + + json::Path::Root root; - LLDBCommandToolArguments arguments; - if (!fromJSON(args, arguments, root)) + CommandToolArguments arguments; + if (!fromJSON(std::get(args), arguments, root)) return root.getError(); + lldb::DebuggerSP debugger_sp = + Debugger::GetDebuggerAtIndex(arguments.debugger_id); + if (!debugger_sp) + return createStringError( + llvm::formatv("no debugger with id {0}", arguments.debugger_id)); + // FIXME: Disallow certain commands and their aliases. CommandReturnObject result(/*colors=*/false); - m_debugger.GetCommandInterpreter().HandleCommand(arguments.arguments.c_str(), - eLazyBoolYes, result); + debugger_sp->GetCommandInterpreter().HandleCommand( + arguments.arguments.c_str(), eLazyBoolYes, result); std::string output; llvm::StringRef output_str = result.GetOutputString(); @@ -66,16 +87,64 @@ LLDBCommandTool::Call(const llvm::json::Value &args) { output += err_str; } - mcp::protocol::TextResult text_result; - text_result.content.emplace_back(mcp::protocol::TextContent{{output}}); - text_result.isError = !result.Succeeded(); - return text_result; + return createTextResult(output, !result.Succeeded()); } -std::optional LLDBCommandTool::GetSchema() const { +std::optional CommandTool::GetSchema() const { + llvm::json::Object id_type{{"type", "number"}}; llvm::json::Object str_type{{"type", "string"}}; - llvm::json::Object properties{{"arguments", std::move(str_type)}}; + llvm::json::Object properties{{"debugger_id", std::move(id_type)}, + {"arguments", std::move(str_type)}}; + llvm::json::Array required{"debugger_id"}; llvm::json::Object schema{{"type", "object"}, - {"properties", std::move(properties)}}; + {"properties", std::move(properties)}, + {"required", std::move(required)}}; return schema; } + +llvm::Expected +DebuggerListTool::Call(const protocol::ToolArguments &args) { + if (!std::holds_alternative(args)) + return createStringError("DebuggerListTool takes no arguments"); + + llvm::json::Path::Root root; + + // Return a nested Markdown list with debuggers and target. + // Example output: + // + // - debugger 0 + // - target 0 /path/to/foo + // - target 1 + // - debugger 1 + // - target 0 /path/to/bar + // + // FIXME: Use Structured Content when we adopt protocol version 2025-06-18. + std::string output; + llvm::raw_string_ostream os(output); + + const size_t num_debuggers = Debugger::GetNumDebuggers(); + for (size_t i = 0; i < num_debuggers; ++i) { + lldb::DebuggerSP debugger_sp = Debugger::GetDebuggerAtIndex(i); + if (!debugger_sp) + continue; + + os << "- debugger " << i << '\n'; + + TargetList &target_list = debugger_sp->GetTargetList(); + const size_t num_targets = target_list.GetNumTargets(); + for (size_t j = 0; j < num_targets; ++j) { + lldb::TargetSP target_sp = target_list.GetTargetAtIndex(j); + if (!target_sp) + continue; + os << " - target " << j; + if (target_sp == target_list.GetSelectedTarget()) + os << " (selected)"; + // Append the module path if we have one. + if (Module *exe_module = target_sp->GetExecutableModulePointer()) + os << " " << exe_module->GetFileSpec().GetPath(); + os << '\n'; + } + } + + return createTextResult(output); +} diff --git a/lldb/source/Plugins/Protocol/MCP/Tool.h b/lldb/source/Plugins/Protocol/MCP/Tool.h index 57a5125813b76..74ab04b472522 100644 --- a/lldb/source/Plugins/Protocol/MCP/Tool.h +++ b/lldb/source/Plugins/Protocol/MCP/Tool.h @@ -22,10 +22,10 @@ class Tool { virtual ~Tool() = default; virtual llvm::Expected - Call(const llvm::json::Value &args) = 0; + Call(const protocol::ToolArguments &args) = 0; virtual std::optional GetSchema() const { - return std::nullopt; + return llvm::json::Object{{"type", "object"}}; } protocol::ToolDefinition GetDefinition() const; @@ -37,20 +37,26 @@ class Tool { std::string m_description; }; -class LLDBCommandTool : public mcp::Tool { +class CommandTool : public mcp::Tool { public: - LLDBCommandTool(std::string name, std::string description, - Debugger &debugger); - ~LLDBCommandTool() = default; + using mcp::Tool::Tool; + ~CommandTool() = default; virtual llvm::Expected - Call(const llvm::json::Value &args) override; + Call(const protocol::ToolArguments &args) override; virtual std::optional GetSchema() const override; +}; -private: - Debugger &m_debugger; +class DebuggerListTool : public mcp::Tool { +public: + using mcp::Tool::Tool; + ~DebuggerListTool() = default; + + virtual llvm::Expected + Call(const protocol::ToolArguments &args) override; }; + } // namespace lldb_private::mcp #endif diff --git a/lldb/source/Utility/XcodeSDK.cpp b/lldb/source/Utility/XcodeSDK.cpp index 004b4717e315b..eb2047e67c326 100644 --- a/lldb/source/Utility/XcodeSDK.cpp +++ b/lldb/source/Utility/XcodeSDK.cpp @@ -266,27 +266,6 @@ bool XcodeSDK::SupportsSwift() const { } } -bool XcodeSDK::SDKSupportsBuiltinModules(const llvm::Triple &target_triple, - llvm::VersionTuple sdk_version) { - using namespace llvm; - - switch (target_triple.getOS()) { - case Triple::OSType::MacOSX: - return sdk_version >= VersionTuple(15U); - case Triple::OSType::IOS: - return sdk_version >= VersionTuple(18U); - case Triple::OSType::TvOS: - return sdk_version >= VersionTuple(18U); - case Triple::OSType::WatchOS: - return sdk_version >= VersionTuple(11U); - case Triple::OSType::XROS: - return sdk_version >= VersionTuple(2U); - default: - // New SDKs support builtin modules from the start. - return true; - } -} - bool XcodeSDK::SDKSupportsModules(XcodeSDK::Type desired_type, const FileSpec &sdk_path) { ConstString last_path_component = sdk_path.GetFilename(); diff --git a/lldb/test/API/commands/command/script_alias/TestCommandScriptAlias.py b/lldb/test/API/commands/command/script_alias/TestCommandScriptAlias.py index 2696f703f0e1c..09886baf5406c 100644 --- a/lldb/test/API/commands/command/script_alias/TestCommandScriptAlias.py +++ b/lldb/test/API/commands/command/script_alias/TestCommandScriptAlias.py @@ -11,6 +11,7 @@ class CommandScriptAliasTestCase(TestBase): NO_DEBUG_INFO_TESTCASE = True def test_pycmd(self): + self.runCmd("log enable -f /tmp/gdb.log gdb-remote all") self.runCmd("command script import tcsacmd.py") self.runCmd("command script add -f tcsacmd.some_command_here attach") diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py index 08ac9290ee85a..12b464d3397eb 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py @@ -356,6 +356,78 @@ def A(self, packet): ["vRun;%s;61726731;61726732;61726733" % (exe_hex,)] ) + def test_launch_lengthy_vRun(self): + class MyResponder(MockGDBServerResponder): + def __init__(self, *args, **kwargs): + self.started = False + return super().__init__(*args, **kwargs) + + def qC(self): + if self.started: + return "QCp10.10" + else: + return "E42" + + def qfThreadInfo(self): + if self.started: + return "mp10.10" + else: + return "E42" + + def qsThreadInfo(self): + return "l" + + def qEcho(self, num): + resp = "qEcho:" + str(num) + if num >= 2: + # We have launched our program + self.started = True + return [resp, "T13"] + + return resp + + def qSupported(self, client_supported): + return "PacketSize=3fff;QStartNoAckMode+;qEcho+;" + + def qHostInfo(self): + return "default_packet_timeout:1;" + + def vRun(self, packet): + return [self.RESPONSE_NONE] + + def A(self, packet): + return "E28" + + self.server.responder = MyResponder() + + target = self.createTarget("a.yaml") + # NB: apparently GDB packets are using "/" on Windows too + exe_path = self.getBuildArtifact("a").replace(os.path.sep, "/") + exe_hex = binascii.b2a_hex(exe_path.encode()).decode() + process = self.connect(target) + lldbutil.expect_state_changes( + self, self.dbg.GetListener(), process, [lldb.eStateConnected] + ) + + process = target.Launch( + lldb.SBListener(), + ["arg1", "arg2", "arg3"], # argv + [], # envp + None, # stdin_path + None, # stdout_path + None, # stderr_path + None, # working_directory + 0, # launch_flags + True, # stop_at_entry + lldb.SBError(), + ) # error + self.assertTrue(process, PROCESS_IS_VALID) + self.assertEqual(process.GetProcessID(), 16) + + self.assertPacketLogContains( + ["vRun;%s;61726731;61726732;61726733" % (exe_hex,)] + ) + def test_launch_QEnvironment(self): class MyResponder(MockGDBServerResponder): def qC(self): diff --git a/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py b/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py index a9879f67d8b8f..bfdc8229094f0 100644 --- a/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py +++ b/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py @@ -248,3 +248,26 @@ def test_mte_ctrl_register(self): "TCF: 0 = TCF_NONE, 1 = TCF_SYNC, 2 = TCF_ASYNC, 3 = TCF_ASYMM" ], ) + + @skipIfLLVMTargetMissing("AArch64") + def test_mte_no_tags(self): + """Test that we handle there being a tag segment but that segment does + not contain any tag data. This can happen when the core is dumped + with a restrictive limit or filter.""" + self.runCmd("target create --core core.mte.notags") + + mte_buf_addr = 0xFFFFA4AF3000 + + # We can see which memory was tagged. + self.expect( + f"memory region {mte_buf_addr}", substrs=["memory tagging: enabled"] + ) + + # We cannot read those tags. + self.expect( + f"memory tag read {mte_buf_addr}", + substrs=[ + "Could not read tags from core file segment. Segment is missing some or all tag data." + ], + error=True, + ) diff --git a/lldb/test/API/linux/aarch64/mte_core_file/core.mte.notags b/lldb/test/API/linux/aarch64/mte_core_file/core.mte.notags new file mode 100644 index 0000000000000000000000000000000000000000..8f9d60668a84d99b425ab7bfe20c02b67e153542 GIT binary patch literal 32768 zcmeI5dr(x@8Nk2Yy9-Oix;|2gA>r~cqGMK2V|gaT2FvTCwdw%%dizQ-#`T)6N9VLo=ogU>>02?f2b#4(u+F zv?iTN+jH>V^Z3qp&Ub$2JLlf(A7?k_7cMp$48Vg4eg(rh0EG)8Vg(k$6qYx!xQQns z>QOQyF}i?Pqdv?elIcd_FCVtH>Zz!^mE~AWdRiMYul4r7tQSWrT+e!gdL_?Z8Qo5! zZcv1s(c^2_uc~^LsQb>O5qesI%xdinkFM9lL!IFaexNY255?IUDv3!upzwSu0 z3E|PY<2I%8PMUNdn8)&~C+P{c%wenA@G)`M)+~vbV4_|Pyey`o{CK@sn{!HXALsI^OwOZQSYlzC?t9Z% ze$zQu(3Awh+gP5DD~07hnxog}OL{5Ozm>_8V`7=*VxOu*gJ zwW4VwF)cq3{Z2x4EVv&SBkEZBrj4@y=fK%Wg3SmwAU5=_V2WU)V&Fa(7g4S&Z)9z3 z)ZR(xz2T;aI{dk;F-KWKgzT!Szjbigga{dbA0Zii8CxS{m=!?~5CjAPK|l}?1Ox#= zKoAfF1OY)n5D)|e0YN|z5CjAPK|l}?1Ox#=KoAfF1OY)n5D)|e0YN|z5CjAPK|l}? z1Ox#=KoAfF1OY)n5D)|e0YN|z5CjAPK|l}?1Ox#=KoAfF1OY)n5D)|e0YN|z5CjAP zK|l}?1Ox#=KoAfF1OY)n5csbourcQRe`$>K|NqPn^2Jfo`Tr8$$K%1Fbo4Bpp*vPc zVtPE5Sw%YM6p!|Ue(mhhk@3(W)9h?pel9WT(6y5q7$tV<#lT~#OM%WurLzUeR%(*Y z7mN(l96v*q>qN?V`Uy--gx(P+`P;C3682EY4lt}_3X3qdV)@@#IUfg|ZAkT-7`(%5 zoDZ>bU=_D9qex8kxsg9-;|G_kqBP(ut#o-R{hkWfs?wU8@~YC|VpiZ^>2+P!#M@9g$-W?@+zV?in!%X{!D5efQ?Xs{nC87O zXmx<}s1gUxCqZd@1<~JOhP&Hyq_dk%O4|w(%ukV}vnRU0b2cB0Z@%xD9%=_=pnka= zdg`zeZppNVw`MqIQvGeH{|;CxULI7P3M8JoAjRYH%j0&O&)wB&uRhoZneop-#v8j+ zz<8;^0M7SvcQx*R>R2DOPZy3fKw6{ZY{VDTX!A>0RA1S>B zZ8O4=NgKgf(Jg^97xho8>QtBM)<83WGsOyPHeFYpG`=^n95yJ=#9@821sdwD=Fn!e zb+bVq=Rq9L41j@yCX7umKx5g8au>u`yo_y7|J~RI@o+(01+k+3KE^WY$KkzhO?%|J z)dQP%m!h6IL2q{j@w=i9@mq}iKJeTTdI$TnY}nAX5-g!gWcWHVT*Vkxp?*7z*BI8J zJd)$P$Z@mP9wv?s;)yJY+3R2o8PUcRkS=|QW9M8+X4iO18qX6-S}2Y2JWNcGMf^t_ zx`I{+)x+|(dP%oC4weoWO-nTn+ps*vV6HIWd;s!!A8n{BTh*@sg!0jb6WB)zm`C)h z&!buE*Makx&iZ{C`z3b2Wbt`ohhy5sW9frDd8=g#B-nKP)dE2AY z^TMUg_AvQx2=zA_B+ZZPaMP&|q_|mjNZ3I>M80E}Sl@pT#${nF!)11*>Vg#r6wL#pCj!dFGMasW!-HHroUn!lmsaVCe;z1TJ zFh81#tZ8j37=yg2vnjr*FAv~oZp8X7IhognYkAfcY=3FQS|{J2{pX9~vWYssvyz-L z)|HzsogG|%EIcSd!{z%7;ZGz(>!%MJ!=Fmf`k7>$MeSd$Hiz-t{oZSm)T-vfK+uwR zT%HKdlG#clD8_~qlhV5l^LrFyuidib@k5yY+#`h-{`R^R(v_T)WC(Oy!I^q994*PP z=iu79`||f|evu24FUdQf+5MxSp<$rH6wX6l-Sc2T#`v5c+DbBvIUp$mn;gnO3K%ZA zCxf#--Ja7uPZ=;I20PQheCn!nbE^yB(iODVzB9P{d>p()HuLLkBOJZ_uAyPq7DIze zgEjZ-L^hFaI9HeQaBO>K7&W^-_c+1>6^p`GtMkK`@5>8+`mjAbxc=m!cH~TZ zG(P=Wi6t>?!acJa?LQ|?X+1A}d)CP`eZEH1J1(xIUR3?Z~3lxI;I_kwl7 zKHOJQt#C$K1jpL($~8eRK3tqwzAn{*N>Qpur7>25C~T(`d%$J9PAIkzio+IA}}3v64@{ zjC`mc+>1K*GFz@;9CYZ2CDtLQL<$wj?+2YHfBaUT#df&wP~uPe61;!!-2TE_ zeFd{$>?=4>-)FkbQj~F9QqlB7O5u{VN@A7?CRnHKED9#?FKSM1DLRpSpy=J?gGF`5 zU@_D+7kQFppT|<-v)An_%1%!6r9LowS?c21ev++0S^5K+%hDHT`iJE+P(H(-DjR%u z#pv^tNWQPsP6=yAmV9*nQk{s$RoQaWYseM?Y89ewOa(SJh8g`n>^F?P2MK zS5@`->#90UJK=ozcS-m=DgT`n{;J5|FE6qJ_L>eIxBw{d?}sFo=Re4>Ll+PIMiIT| zQ$U~cQ~aQj?g02hKU$t;p@-$a#|!jo|FJv&?i#CuB@V|ua%NtjO2)Tv+3ELJddl48 zJ~?~7ZLY1jv@%=HwAYqbdS!Q@>P{@YYwpaMx-eUwn>BZSc6K&?64%Y?ix4PJpQVxQeVwj6_5GnJeSAD z^gT;H%FDUm@^~TZiQCBS=LUQi6ZFZ;^<%%+@dF|%aBjTNfe9KDUuPN&5>kg4jwR?Z zzso$q;sT6GpZ9-^{XYYb?~SP+%a6Xpsc+WZi2tY18jq0k@$-GegEDH9efC2?`(Y#= z%P5NsT%X4kC?tLEXS^=|f0TS)z+!HHWXx@ktojI>_}GQQ^&;zvv>+e|2m*qDARq_` S0)l`bAP5Kog24X{fqw$@NM;oP literal 0 HcmV?d00001 diff --git a/lldb/test/API/linux/aarch64/mte_core_file/main.c b/lldb/test/API/linux/aarch64/mte_core_file/main.c index 89027e0ea75d2..6537edd7bdb95 100644 --- a/lldb/test/API/linux/aarch64/mte_core_file/main.c +++ b/lldb/test/API/linux/aarch64/mte_core_file/main.c @@ -5,9 +5,14 @@ // // Compile with: // -march=armv8.5-a+memtag -g main.c -o a.out.mte +// (use a.out.mte to generate core.mte.notags and core.mte) // -march=armv8.5-a+memtag -g main.c -DNO_MTE -o a.out.nomte // -// /proc/self/coredump_filter was set to 2 when the core files were made. +// Set /proc/self/coredump_filter to the following values when generating the +// core files: +// * core.mte - 3 +// * core.mte.notags - 2 +// * core.nomte - 3 #include #include diff --git a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_runInTerminal.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_runInTerminal.py index 3ba7deb285de9..35810feb48366 100644 --- a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_runInTerminal.py +++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_runInTerminal.py @@ -5,7 +5,7 @@ from typing import Dict, Any, List import lldbdap_testcase -from lldbsuite.test.decorators import skipIfWindows, skipIf, skipIfBuildType +from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import line_number @@ -32,6 +32,7 @@ def verify_stopped_on_entry(self, stopped_events: List[Dict[str, Any]]): self.assertEqual(seen_stopped_event, 1, "expect only one stopped entry event.") + @skipIfAsan @skipIfWindows @skipIf(oslist=["linux"], archs=["arm$"]) # Always times out on buildbot def test_basic_functionality(self): @@ -80,6 +81,7 @@ def test_basic_functionality(self): ) self.continue_to_exit() + @skipIfAsan @skipIfWindows @skipIf(oslist=["linux"], archs=["arm$"]) # Always times out on buildbot def test_stopOnEntry(self): diff --git a/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py index 3d07cd8b20e28..af8b6b140da47 100644 --- a/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py +++ b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py @@ -2,7 +2,7 @@ Test lldb-dap runInTerminal reverse request """ -from lldbsuite.test.decorators import skipIfBuildType, skipIfWindows, skipIf, no_match +from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import line_number import lldbdap_testcase import os @@ -26,6 +26,7 @@ def read_error_message(fifo_file): with open(fifo_file, "r") as file: return file.readline() + @skipIfAsan @skipIfWindows @skipIf(oslist=["linux"], archs=no_match(["x86_64"])) def test_runInTerminal(self): @@ -73,6 +74,8 @@ def test_runInTerminal(self): self.continue_to_exit() + @skipIfAsan + @skipIfWindows @skipIf(oslist=["linux"], archs=no_match(["x86_64"])) def test_runInTerminalWithObjectEnv(self): """ diff --git a/lldb/unittests/Host/CMakeLists.txt b/lldb/unittests/Host/CMakeLists.txt index 3b20f1d723d18..5591edda38aca 100644 --- a/lldb/unittests/Host/CMakeLists.txt +++ b/lldb/unittests/Host/CMakeLists.txt @@ -37,7 +37,9 @@ add_lldb_unittest(HostTests lldbUtilityHelpers lldbHostHelpers LLVMTestingSupport - LLVMTargetParser + + LINK_COMPONENTS + TargetParser ) add_subdirectory(common) diff --git a/lldb/unittests/Process/Utility/MemoryTagManagerAArch64MTETest.cpp b/lldb/unittests/Process/Utility/MemoryTagManagerAArch64MTETest.cpp index 40d7c3601ccfd..30199bfe5c254 100644 --- a/lldb/unittests/Process/Utility/MemoryTagManagerAArch64MTETest.cpp +++ b/lldb/unittests/Process/Utility/MemoryTagManagerAArch64MTETest.cpp @@ -87,31 +87,38 @@ TEST(MemoryTagManagerAArch64MTETest, UnpackTagsFromCoreFileSegment) { std::vector tags_data; MemoryTagManager::CoreReaderFn reader = [&tags_data](lldb::offset_t offset, size_t length, void *dst) { + if ((offset + length) >= tags_data.size()) + length = tags_data.size() - offset; + std::memcpy(dst, tags_data.data() + offset, length); return length; }; // Zero length is ok. - std::vector tags = + llvm::Expected> tags = manager.UnpackTagsFromCoreFileSegment(reader, 0, 0, 0, 0); - ASSERT_EQ(tags.size(), (size_t)0); + ASSERT_THAT_EXPECTED(tags, llvm::Succeeded()); + ASSERT_EQ(tags->size(), (size_t)0); // In the simplest case we read 2 tags which are in the same byte. tags_data.push_back(0x21); // The least significant bits are the first tag in memory. std::vector expected{1, 2}; tags = manager.UnpackTagsFromCoreFileSegment(reader, 0, 0, 0, 32); - ASSERT_THAT(expected, testing::ContainerEq(tags)); + ASSERT_THAT_EXPECTED(tags, llvm::Succeeded()); + ASSERT_THAT(expected, testing::ContainerEq(*tags)); // If we read just one then it will have to trim off the second one. expected = std::vector{1}; tags = manager.UnpackTagsFromCoreFileSegment(reader, 0, 0, 0, 16); - ASSERT_THAT(expected, testing::ContainerEq(tags)); + ASSERT_THAT_EXPECTED(tags, llvm::Succeeded()); + ASSERT_THAT(expected, testing::ContainerEq(*tags)); // If we read the second tag only then the first one must be trimmed. expected = std::vector{2}; tags = manager.UnpackTagsFromCoreFileSegment(reader, 0, 0, 16, 16); - ASSERT_THAT(expected, testing::ContainerEq(tags)); + ASSERT_THAT_EXPECTED(tags, llvm::Succeeded()); + ASSERT_THAT(expected, testing::ContainerEq(*tags)); // This trimming logic applies if you read a larger set of tags. tags_data = std::vector{0x21, 0x43, 0x65, 0x87}; @@ -119,31 +126,55 @@ TEST(MemoryTagManagerAArch64MTETest, UnpackTagsFromCoreFileSegment) { // Trailing tag should be trimmed. expected = std::vector{1, 2, 3}; tags = manager.UnpackTagsFromCoreFileSegment(reader, 0, 0, 0, 48); - ASSERT_THAT(expected, testing::ContainerEq(tags)); + ASSERT_THAT_EXPECTED(tags, llvm::Succeeded()); + ASSERT_THAT(expected, testing::ContainerEq(*tags)); // Leading tag should be trimmed. expected = std::vector{2, 3, 4}; tags = manager.UnpackTagsFromCoreFileSegment(reader, 0, 0, 16, 48); - ASSERT_THAT(expected, testing::ContainerEq(tags)); + ASSERT_THAT_EXPECTED(tags, llvm::Succeeded()); + ASSERT_THAT(expected, testing::ContainerEq(*tags)); // Leading and trailing trimmmed. expected = std::vector{2, 3, 4, 5}; tags = manager.UnpackTagsFromCoreFileSegment(reader, 0, 0, 16, 64); - ASSERT_THAT(expected, testing::ContainerEq(tags)); + ASSERT_THAT_EXPECTED(tags, llvm::Succeeded()); + ASSERT_THAT(expected, testing::ContainerEq(*tags)); // The address given is an offset into the whole file so the address requested // from the reader should be beyond that. tags_data = std::vector{0xFF, 0xFF, 0x21, 0x43, 0x65, 0x87}; expected = std::vector{1, 2}; tags = manager.UnpackTagsFromCoreFileSegment(reader, 0, 2, 0, 32); - ASSERT_THAT(expected, testing::ContainerEq(tags)); + ASSERT_THAT_EXPECTED(tags, llvm::Succeeded()); + ASSERT_THAT(expected, testing::ContainerEq(*tags)); // addr is a virtual address that we expect to be >= the tag segment's // starting virtual address. So again an offset must be made from the // difference. expected = std::vector{3, 4}; tags = manager.UnpackTagsFromCoreFileSegment(reader, 32, 2, 64, 32); - ASSERT_THAT(expected, testing::ContainerEq(tags)); + ASSERT_THAT_EXPECTED(tags, llvm::Succeeded()); + ASSERT_THAT(expected, testing::ContainerEq(*tags)); + + // Error when there is not enough data to decode tags. + + // Read 1 tag from an offset just outside the segment's data. + tags_data = {0xAB}; + tags = manager.UnpackTagsFromCoreFileSegment(reader, 0, 0, 32, 16); + const char *expected_err = "Could not read tags from core file segment. " + "Segment is missing some or all tag data."; + EXPECT_THAT_EXPECTED(tags, llvm::FailedWithMessage(expected_err)); + + // First 2 tags come from the segment, second 2 cannot be read. + tags_data.push_back(0xCD); + tags = manager.UnpackTagsFromCoreFileSegment(reader, 0, 0, 32, 64); + EXPECT_THAT_EXPECTED(tags, llvm::FailedWithMessage(expected_err)); + + // Segment is completely empty. + tags_data.clear(); + tags = manager.UnpackTagsFromCoreFileSegment(reader, 0, 0, 0, 16); + EXPECT_THAT_EXPECTED(tags, llvm::FailedWithMessage(expected_err)); } TEST(MemoryTagManagerAArch64MTETest, GetLogicalTag) { diff --git a/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp b/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp index 72b8c7b1fd825..8e61379b5c731 100644 --- a/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp +++ b/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp @@ -46,9 +46,10 @@ class TestTool : public mcp::Tool { using mcp::Tool::Tool; virtual llvm::Expected - Call(const llvm::json::Value &args) override { + Call(const ToolArguments &args) override { std::string argument; - if (const json::Object *args_obj = args.getAsObject()) { + if (const json::Object *args_obj = + std::get(args).getAsObject()) { if (const json::Value *s = args_obj->get("arguments")) { argument = s->getAsString().value_or(""); } @@ -66,7 +67,7 @@ class ErrorTool : public mcp::Tool { using mcp::Tool::Tool; virtual llvm::Expected - Call(const llvm::json::Value &args) override { + Call(const ToolArguments &args) override { return llvm::createStringError("error"); } }; @@ -77,7 +78,7 @@ class FailTool : public mcp::Tool { using mcp::Tool::Tool; virtual llvm::Expected - Call(const llvm::json::Value &args) override { + Call(const ToolArguments &args) override { mcp::protocol::TextResult text_result; text_result.content.emplace_back(mcp::protocol::TextContent{{"failed"}}); text_result.isError = true; @@ -115,7 +116,7 @@ class ProtocolServerMCPTest : public ::testing::Test { ProtocolServer::Connection connection; connection.protocol = Socket::SocketProtocol::ProtocolTcp; connection.name = llvm::formatv("{0}:0", k_localhost).str(); - m_server_up = std::make_unique(*m_debugger_sp); + m_server_up = std::make_unique(); m_server_up->AddTool(std::make_unique("test", "test tool")); ASSERT_THAT_ERROR(m_server_up->Start(connection), llvm::Succeeded()); @@ -145,7 +146,7 @@ class ProtocolServerMCPTest : public ::testing::Test { TEST_F(ProtocolServerMCPTest, Intialization) { llvm::StringLiteral request = - R"json({"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"claude-ai","version":"0.1.0"}},"jsonrpc":"2.0","id":0})json"; + R"json({"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"lldb-unit","version":"0.1.0"}},"jsonrpc":"2.0","id":0})json"; llvm::StringLiteral response = R"json({"jsonrpc":"2.0","id":0,"result":{"capabilities":{"tools":{"listChanged":true}},"protocolVersion":"2024-11-05","serverInfo":{"name":"lldb-mcp","version":"0.1.0"}}})json"; @@ -167,7 +168,7 @@ TEST_F(ProtocolServerMCPTest, ToolsList) { llvm::StringLiteral request = R"json({"method":"tools/list","params":{},"jsonrpc":"2.0","id":1})json"; llvm::StringLiteral response = - R"json({"id":1,"jsonrpc":"2.0","result":{"tools":[{"description":"test tool","name":"test"},{"description":"Run an lldb command.","inputSchema":{"properties":{"arguments":{"type":"string"}},"type":"object"},"name":"lldb_command"}]}})json"; + R"json( {"id":1,"jsonrpc":"2.0","result":{"tools":[{"description":"test tool","inputSchema":{"type":"object"},"name":"test"},{"description":"List debugger instances with their debugger_id.","inputSchema":{"type":"object"},"name":"lldb_debugger_list"},{"description":"Run an lldb command.","inputSchema":{"properties":{"arguments":{"type":"string"},"debugger_id":{"type":"number"}},"required":["debugger_id"],"type":"object"},"name":"lldb_command"}]}})json"; ASSERT_THAT_ERROR(Write(request), llvm::Succeeded()); @@ -205,7 +206,7 @@ TEST_F(ProtocolServerMCPTest, ResourcesList) { TEST_F(ProtocolServerMCPTest, ToolsCall) { llvm::StringLiteral request = - R"json({"method":"tools/call","params":{"name":"test","arguments":{"arguments":"foo"}},"jsonrpc":"2.0","id":11})json"; + R"json({"method":"tools/call","params":{"name":"test","arguments":{"arguments":"foo","debugger_id":0}},"jsonrpc":"2.0","id":11})json"; llvm::StringLiteral response = R"json({"id":11,"jsonrpc":"2.0","result":{"content":[{"text":"foo","type":"text"}],"isError":false}})json"; @@ -227,7 +228,7 @@ TEST_F(ProtocolServerMCPTest, ToolsCallError) { m_server_up->AddTool(std::make_unique("error", "error tool")); llvm::StringLiteral request = - R"json({"method":"tools/call","params":{"name":"error","arguments":{"arguments":"foo"}},"jsonrpc":"2.0","id":11})json"; + R"json({"method":"tools/call","params":{"name":"error","arguments":{"arguments":"foo","debugger_id":0}},"jsonrpc":"2.0","id":11})json"; llvm::StringLiteral response = R"json({"error":{"code":-1,"message":"error"},"id":11,"jsonrpc":"2.0"})json"; @@ -249,7 +250,7 @@ TEST_F(ProtocolServerMCPTest, ToolsCallFail) { m_server_up->AddTool(std::make_unique("fail", "fail tool")); llvm::StringLiteral request = - R"json({"method":"tools/call","params":{"name":"fail","arguments":{"arguments":"foo"}},"jsonrpc":"2.0","id":11})json"; + R"json({"method":"tools/call","params":{"name":"fail","arguments":{"arguments":"foo","debugger_id":0}},"jsonrpc":"2.0","id":11})json"; llvm::StringLiteral response = R"json({"id":11,"jsonrpc":"2.0","result":{"content":[{"text":"failed","type":"text"}],"isError":true}})json"; diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst index 5e5eaccecd2b7..c9f0379694287 100644 --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -278,7 +278,7 @@ OPTIONS any analysis with a special representation (i.e. BlockFrequency, BranchProbability, etc) are printed as raw hex values. - Only works with PowerPC objects or X86 linked images. + Only supported for AArch64, BPF, PowerPC, and X86. Example: A non-symbolized branch instruction with a local target and pc-relative memory access like diff --git a/llvm/docs/HowToReleaseLLVM.rst b/llvm/docs/HowToReleaseLLVM.rst index ca55ee573c002..dd4bb08b81d1c 100644 --- a/llvm/docs/HowToReleaseLLVM.rst +++ b/llvm/docs/HowToReleaseLLVM.rst @@ -38,8 +38,8 @@ Releases should be tagged on Tuesdays. =============================== ========================= Release Approx. Date =============================== ========================= -*release branch: even releases* *4th Tue in January* -*release branch: odd releases* *4th Tue in July* +*release branch: even releases* *2nd Tue in January* +*release branch: odd releases* *2nd Tue in July* X.1.0-rc1 3 days after branch. X.1.0-rc2 2 weeks after branch. X.1.0-rc3 4 weeks after branch @@ -49,7 +49,11 @@ X.1.0-rc3 4 weeks after branch **X.1.3** **12 weeks after branch** **X.1.4** **14 weeks after branch** **X.1.5** **16 weeks after branch** -**X.1.6 (if necessary)** **18 weeks after branch** +**X.1.6** **18 weeks after branch** +**X.1.7** **20 weeks after branch** +**X.1.8** **22 weeks after branch** +**X.1.9** (If necessary) **24 weeks after branch** +**Next release branches** **~25 weeks after branch** =============================== ========================= Release Process Summary @@ -341,10 +345,10 @@ Below are the rules regarding patching the release branch: was created. As with all phases, release managers and maintainers can reject patches that are deemed too invasive. -#. *Before RC2* Patches should be limited to bug fixes or backend specific +#. *Before RC2/RC3* Patches should be limited to bug fixes or backend specific improvements that are determined to be very safe. -#. *Before RC3/Final Major Release* Patches should be limited to critical +#. *Before Final Major Release* Patches should be limited to critical bugs or regressions. #. *Bug fix releases* Patches should be limited to bug fixes or very safe diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 4a1005667692d..73ae2ee599640 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -212,6 +212,7 @@ Changes to the RISC-V Backend * `-mtune=andes-45-series` was added. * Adds assembler support for the Andes `XAndesvbfhcvt` (Andes Vector BFLOAT16 Conversion extension). * `-mcpu=andes-ax45mpv` was added. +* Removed -mattr=+no-rvc-hints that could be used to disable parsing and generation of RVC hints. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/docs/SourceLevelDebugging.rst b/llvm/docs/SourceLevelDebugging.rst index 8a11dcf5254a9..dfc8c53edbb8e 100644 --- a/llvm/docs/SourceLevelDebugging.rst +++ b/llvm/docs/SourceLevelDebugging.rst @@ -143,6 +143,42 @@ debugging information influences optimization passes then it will be reported as a failure. See :doc:`TestingGuide` for more information on LLVM test infrastructure and how to run various tests. +.. _variables_and_variable_fragments: + +Variables and Variable Fragments +================================ + +In this document "variable" refers generally to any source language object +which can have a value, including at least: + +- Variables +- Constants +- Formal parameters + +.. note:: + + There is no special provision for "true" constants in LLVM today, and + they are instead treated as local or global variables. + +A variable is represented by a `local variable `_ +or `global variable `_ metadata node. + +A "variable fragment" (or just "fragment") is a contiguous span of bits of a +variable. + +A :ref:`debug record ` which refers to a ``DIExpression`` ending +with a ``DW_OP_LLVM_fragment`` operation describes a fragment of the variable +it refers to. + +The operands of the ``DW_OP_LLVM_fragment`` operation encode the bit offset of +the fragment relative to the start of the variable, and the size of the +fragment in bits, respectively. + +.. note:: + + The ``DW_OP_LLVM_fragment`` operation acts only to encode the fragment + information, and does not have an effect on the semantics of the expression. + .. _format: Debugging information format @@ -510,10 +546,23 @@ values through compilation, when objects are promoted to SSA values a ``#dbg_value`` record is created for each assignment, recording the variable's new location. Compared with the ``#dbg_declare`` record: -* A #dbg_value terminates the effect of any preceding #dbg_values for (any - overlapping fragments of) the specified variable. -* The #dbg_value's position in the IR defines where in the instruction stream - the variable's value changes. +* A ``#dbg_value`` terminates the effects that any preceding records have on + any common bits of a common variable. + + .. note:: + + The current implementation generally terminates the effect of every + record in its entirety if any of its effects would be terminated, rather + than carrying forward the effect of previous records for non-overlapping + bits as it would be permitted to do by this definition. This is allowed + just as dropping any debug information at any point in the compilation is + allowed. + + One exception to this is :doc:`AssignmentTracking` where certain + memory-based locations are carried forward partially in some situations. + +* The ``#dbg_value``'s position in the IR defines where in the instruction + stream the variable's value changes. * Operands can be constants, indicating the variable is assigned a constant value. diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h index 4819c88471345..ddd2c7ce68c83 100644 --- a/llvm/include/llvm/ADT/ArrayRef.h +++ b/llvm/include/llvm/ADT/ArrayRef.h @@ -84,18 +84,19 @@ namespace llvm { assert(begin <= end); } - /// Construct an ArrayRef from a SmallVector. This is templated in order to - /// avoid instantiating SmallVectorTemplateCommon whenever we - /// copy-construct an ArrayRef. - template - /*implicit*/ ArrayRef(const SmallVectorTemplateCommon &Vec) - : Data(Vec.data()), Length(Vec.size()) { - } - - /// Construct an ArrayRef from a std::vector. - template - /*implicit*/ ArrayRef(const std::vector &Vec) - : Data(Vec.data()), Length(Vec.size()) {} + /// Construct an ArrayRef from a type that has a data() method that returns + /// a pointer convertible to const T *. + template < + typename C, + typename = std::enable_if_t< + std::conjunction_v< + std::is_convertible< + decltype(std::declval().data()) *, + const T *const *>, + std::is_integral().size())>>, + void>> + /*implicit*/ constexpr ArrayRef(const C &V) + : Data(V.data()), Length(V.size()) {} /// Construct an ArrayRef from a std::array template @@ -123,32 +124,6 @@ namespace llvm { #pragma GCC diagnostic pop #endif - /// Construct an ArrayRef from ArrayRef. This uses SFINAE to - /// ensure that only ArrayRefs of pointers can be converted. - template - ArrayRef(const ArrayRef &A, - std::enable_if_t::value> - * = nullptr) - : Data(A.data()), Length(A.size()) {} - - /// Construct an ArrayRef from a SmallVector. This is - /// templated in order to avoid instantiating SmallVectorTemplateCommon - /// whenever we copy-construct an ArrayRef. - template - /*implicit*/ ArrayRef( - const SmallVectorTemplateCommon &Vec, - std::enable_if_t::value> * = - nullptr) - : Data(Vec.data()), Length(Vec.size()) {} - - /// Construct an ArrayRef from std::vector. This uses SFINAE - /// to ensure that only vectors of pointers can be converted. - template - ArrayRef(const std::vector &Vec, - std::enable_if_t::value> - * = nullptr) - : Data(Vec.data()), Length(Vec.size()) {} - /// Construct an ArrayRef from iterator_range. This uses SFINAE /// to ensure that this is only used for iterator ranges over plain pointer /// iterators. diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h index cfc21b3ec202b..9e2dc1ad771cf 100644 --- a/llvm/include/llvm/Analysis/DXILResource.h +++ b/llvm/include/llvm/Analysis/DXILResource.h @@ -34,7 +34,7 @@ namespace dxil { // Returns the resource name from dx_resource_handlefrombinding or // dx_resource_handlefromimplicitbinding call -StringRef getResourceNameFromBindingCall(CallInst *CI); +LLVM_ABI StringRef getResourceNameFromBindingCall(CallInst *CI); /// The dx.RawBuffer target extension type /// @@ -387,7 +387,7 @@ class ResourceInfo { const ResourceBinding &getBinding() const { return Binding; } TargetExtType *getHandleTy() const { return HandleTy; } - const StringRef getName() const { return Name; } + StringRef getName() const { return Name; } bool hasSymbol() const { return Symbol; } LLVM_ABI GlobalVariable *createSymbol(Module &M, StructType *Ty); diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h index de67955d85d7c..1eb4a9b8aaf9e 100644 --- a/llvm/include/llvm/Analysis/IR2Vec.h +++ b/llvm/include/llvm/Analysis/IR2Vec.h @@ -32,6 +32,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/JSON.h" #include @@ -57,9 +58,9 @@ enum class IR2VecKind { Symbolic }; namespace ir2vec { -extern cl::opt OpcWeight; -extern cl::opt TypeWeight; -extern cl::opt ArgWeight; +LLVM_ABI extern cl::opt OpcWeight; +LLVM_ABI extern cl::opt TypeWeight; +LLVM_ABI extern cl::opt ArgWeight; /// Embedding is a datatype that wraps std::vector. It provides /// additional functionality for arithmetic and comparison operations. @@ -106,16 +107,17 @@ struct Embedding { const std::vector &getData() const { return Data; } /// Arithmetic operators - Embedding &operator+=(const Embedding &RHS); - Embedding &operator-=(const Embedding &RHS); + LLVM_ABI Embedding &operator+=(const Embedding &RHS); + LLVM_ABI Embedding &operator-=(const Embedding &RHS); /// Adds Src Embedding scaled by Factor with the called Embedding. /// Called_Embedding += Src * Factor - Embedding &scaleAndAdd(const Embedding &Src, float Factor); + LLVM_ABI Embedding &scaleAndAdd(const Embedding &Src, float Factor); /// Returns true if the embedding is approximately equal to the RHS embedding /// within the specified tolerance. - bool approximatelyEquals(const Embedding &RHS, double Tolerance = 1e-6) const; + LLVM_ABI bool approximatelyEquals(const Embedding &RHS, + double Tolerance = 1e-6) const; }; using InstEmbeddingsMap = DenseMap; @@ -148,7 +150,7 @@ class Embedder { mutable BBEmbeddingsMap BBVecMap; mutable InstEmbeddingsMap InstVecMap; - Embedder(const Function &F, const Vocab &Vocabulary); + LLVM_ABI Embedder(const Function &F, const Vocab &Vocabulary); /// Helper function to compute embeddings. It generates embeddings for all /// the instructions and basic blocks in the function F. Logic of computing @@ -161,38 +163,38 @@ class Embedder { /// Lookup vocabulary for a given Key. If the key is not found, it returns a /// zero vector. - Embedding lookupVocab(const std::string &Key) const; + LLVM_ABI Embedding lookupVocab(const std::string &Key) const; public: virtual ~Embedder() = default; /// Factory method to create an Embedder object. - static Expected> + LLVM_ABI static Expected> create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary); /// Returns a map containing instructions and the corresponding embeddings for /// the function F if it has been computed. If not, it computes the embeddings /// for the function and returns the map. - const InstEmbeddingsMap &getInstVecMap() const; + LLVM_ABI const InstEmbeddingsMap &getInstVecMap() const; /// Returns a map containing basic block and the corresponding embeddings for /// the function F if it has been computed. If not, it computes the embeddings /// for the function and returns the map. - const BBEmbeddingsMap &getBBVecMap() const; + LLVM_ABI const BBEmbeddingsMap &getBBVecMap() const; /// Returns the embedding for a given basic block in the function F if it has /// been computed. If not, it computes the embedding for the basic block and /// returns it. - const Embedding &getBBVector(const BasicBlock &BB) const; + LLVM_ABI const Embedding &getBBVector(const BasicBlock &BB) const; /// Computes and returns the embedding for the current function. - const Embedding &getFunctionVector() const; + LLVM_ABI const Embedding &getFunctionVector() const; }; /// Class for computing the Symbolic embeddings of IR2Vec. /// Symbolic embeddings are constructed based on the entity-level /// representations obtained from the Vocabulary. -class SymbolicEmbedder : public Embedder { +class LLVM_ABI SymbolicEmbedder : public Embedder { private: /// Utility function to compute the embedding for a given type. Embedding getTypeEmbedding(const Type *Ty) const; @@ -219,13 +221,13 @@ class IR2VecVocabResult { public: IR2VecVocabResult() = default; - IR2VecVocabResult(ir2vec::Vocab &&Vocabulary); + LLVM_ABI IR2VecVocabResult(ir2vec::Vocab &&Vocabulary); bool isValid() const { return Valid; } - const ir2vec::Vocab &getVocabulary() const; - unsigned getDimension() const; - bool invalidate(Module &M, const PreservedAnalyses &PA, - ModuleAnalysisManager::Invalidator &Inv) const; + LLVM_ABI const ir2vec::Vocab &getVocabulary() const; + LLVM_ABI unsigned getDimension() const; + LLVM_ABI bool invalidate(Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &Inv) const; }; /// This analysis provides the vocabulary for IR2Vec. The vocabulary provides a @@ -237,12 +239,12 @@ class IR2VecVocabAnalysis : public AnalysisInfoMixin { void emitError(Error Err, LLVMContext &Ctx); public: - static AnalysisKey Key; + LLVM_ABI static AnalysisKey Key; IR2VecVocabAnalysis() = default; - explicit IR2VecVocabAnalysis(const ir2vec::Vocab &Vocab); - explicit IR2VecVocabAnalysis(ir2vec::Vocab &&Vocab); + LLVM_ABI explicit IR2VecVocabAnalysis(const ir2vec::Vocab &Vocab); + LLVM_ABI explicit IR2VecVocabAnalysis(ir2vec::Vocab &&Vocab); using Result = IR2VecVocabResult; - Result run(Module &M, ModuleAnalysisManager &MAM); + LLVM_ABI Result run(Module &M, ModuleAnalysisManager &MAM); }; /// This pass prints the IR2Vec embeddings for instructions, basic blocks, and @@ -253,7 +255,7 @@ class IR2VecPrinterPass : public PassInfoMixin { public: explicit IR2VecPrinterPass(raw_ostream &OS) : OS(OS) {} - PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); + LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); static bool isRequired() { return true; } }; diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 4596b2563c1d8..c804f551f5a75 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -311,11 +311,11 @@ LLVM_ABI std::optional computeKnownFPSignBit(const Value *V, /// Return true if the sign bit of the FP value can be ignored by the user when /// the value is zero. -bool canIgnoreSignBitOfZero(const Use &U); +LLVM_ABI bool canIgnoreSignBitOfZero(const Use &U); /// Return true if the sign bit of the FP value can be ignored by the user when /// the value is NaN. -bool canIgnoreSignBitOfNaN(const Use &U); +LLVM_ABI bool canIgnoreSignBitOfNaN(const Use &U); /// If the specified value can be set by repeating the same byte in memory, /// return the i8 value that it is represented with. This is true for all i8 diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index cabecbec175b3..bf958e100f2ac 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1105,6 +1105,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { VectorType *&SubTy) const { if (Mask.empty()) return Kind; + int NumDstElts = Mask.size(); int NumSrcElts = SrcTy->getElementCount().getKnownMinValue(); switch (Kind) { case TTI::SK_PermuteSingleSrc: { @@ -1115,8 +1116,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (isSplatMask(Mask, NumSrcElts, Index)) return TTI::SK_Broadcast; if (ShuffleVectorInst::isExtractSubvectorMask(Mask, NumSrcElts, Index) && - (Index + Mask.size()) <= (size_t)NumSrcElts) { - SubTy = FixedVectorType::get(SrcTy->getElementType(), Mask.size()); + (Index + NumDstElts) <= NumSrcElts) { + SubTy = FixedVectorType::get(SrcTy->getElementType(), NumDstElts); return TTI::SK_ExtractSubvector; } break; @@ -1126,8 +1127,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return improveShuffleKindFromMask(TTI::SK_PermuteSingleSrc, Mask, SrcTy, Index, SubTy); int NumSubElts; - if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask( - Mask, NumSrcElts, NumSubElts, Index)) { + if (NumDstElts > 2 && ShuffleVectorInst::isInsertSubvectorMask( + Mask, NumSrcElts, NumSubElts, Index)) { if (Index + NumSubElts > NumSrcElts) return Kind; SubTy = FixedVectorType::get(SrcTy->getElementType(), NumSubElts); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 109dc8812c24d..75c051712ae43 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -609,6 +609,15 @@ class LLVM_ABI CallLowering { virtual bool isTypeIsValidForThisReturn(EVT Ty) const { return false; } }; +extern template LLVM_ABI void +CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, + const DataLayout &DL, + const Function &FuncInfo) const; + +extern template LLVM_ABI void +CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, + const DataLayout &DL, + const CallBase &FuncInfo) const; } // end namespace llvm #endif // LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 4106be4c81cea..ea0873f41ebba 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -295,6 +295,11 @@ class LegalizerHelper { getNeutralElementForVecReduce(unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty); + LegalizeResult emitSincosLibcall(MachineInstr &MI, + MachineIRBuilder &MIRBuilder, unsigned Size, + Type *OpType, + LostDebugLocObserver &LocObserver); + public: /// Return the alignment to use for a stack temporary object with the given /// type. diff --git a/llvm/include/llvm/CodeGenTypes/LowLevelType.h b/llvm/include/llvm/CodeGenTypes/LowLevelType.h index 06879e1f8d15b..d8e0848aff84d 100644 --- a/llvm/include/llvm/CodeGenTypes/LowLevelType.h +++ b/llvm/include/llvm/CodeGenTypes/LowLevelType.h @@ -28,6 +28,7 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/CodeGenTypes/MachineValueType.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include @@ -140,7 +141,7 @@ class LLT { explicit constexpr LLT() : IsScalar(false), IsPointer(false), IsVector(false), RawData(0) {} - explicit LLT(MVT VT); + LLVM_ABI explicit LLT(MVT VT); constexpr bool isValid() const { return IsScalar || RawData != 0; } constexpr bool isScalar() const { return IsScalar; } @@ -282,7 +283,7 @@ class LLT { return scalar(getScalarSizeInBits()); } - void print(raw_ostream &OS) const; + LLVM_ABI void print(raw_ostream &OS) const; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const; diff --git a/llvm/include/llvm/CodeGenTypes/MachineValueType.h b/llvm/include/llvm/CodeGenTypes/MachineValueType.h index c14abca027350..b8e91a022ec5e 100644 --- a/llvm/include/llvm/CodeGenTypes/MachineValueType.h +++ b/llvm/include/llvm/CodeGenTypes/MachineValueType.h @@ -17,6 +17,7 @@ #define LLVM_CODEGEN_MACHINEVALUETYPE_H #include "llvm/ADT/Sequence.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TypeSize.h" @@ -65,10 +66,10 @@ namespace llvm { bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; } /// Support for debugging, callable in GDB: VT.dump() - void dump() const; + LLVM_ABI void dump() const; /// Implement operator<<. - void print(raw_ostream &OS) const; + LLVM_ABI void print(raw_ostream &OS) const; /// Return true if this is a valid simple valuetype. bool isValid() const { @@ -509,11 +510,11 @@ namespace llvm { /// otherwise they are invalid. /// NB: This includes pointer types, which require a DataLayout to convert /// to a concrete value type. - static MVT getVT(Type *Ty, bool HandleUnknown = false); + LLVM_ABI static MVT getVT(Type *Ty, bool HandleUnknown = false); /// Returns an APFloat semantics tag appropriate for the value type. If this /// is a vector type, the element semantics are returned. - const fltSemantics &getFltSemantics() const; + LLVM_ABI const fltSemantics &getFltSemantics() const; public: /// SimpleValueType Iteration diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index f4569850b093c..5533652736dc8 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -4468,7 +4468,9 @@ Node *AbstractManglingParser::parseType() { return nullptr; if (!consumeIf('_')) return nullptr; - return make(Size, Signed); + // The front end expects this to be available for Substitution + Result = make(Size, Signed); + break; } // ::= Di # char32_t case 'i': diff --git a/llvm/include/llvm/ExecutionEngine/Orc/COFF.h b/llvm/include/llvm/ExecutionEngine/Orc/COFF.h index adc9e9e171165..42a6c85a577fa 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/COFF.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/COFF.h @@ -13,6 +13,7 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_COFF_H #define LLVM_EXECUTIONENGINE_ORC_COFF_H +#include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" @@ -31,8 +32,8 @@ class COFFImportFileScanner { public: COFFImportFileScanner(std::set &ImportedDynamicLibraries) : ImportedDynamicLibraries(ImportedDynamicLibraries) {} - Expected operator()(object::Archive &A, MemoryBufferRef MemberBuf, - size_t Index) const; + LLVM_ABI Expected + operator()(object::Archive &A, MemoryBufferRef MemberBuf, size_t Index) const; private: std::set &ImportedDynamicLibraries; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h index e63f5f7842520..6fce74ddf72fe 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h @@ -23,7 +23,6 @@ #include "llvm/ExecutionEngine/Orc/TargetProcess/UnwindInfoManager.h" #include "llvm/ExecutionEngine/Orc/TaskDispatch.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/TargetParser/Triple.h" @@ -425,47 +424,6 @@ class LLVM_ABI InProcessMemoryAccess bool IsArch64Bit; }; -/// A ExecutorProcessControl instance that asserts if any of its methods are -/// used. Suitable for use is unit tests, and by ORC clients who haven't moved -/// to ExecutorProcessControl-based APIs yet. -class UnsupportedExecutorProcessControl : public ExecutorProcessControl, - private InProcessMemoryAccess { -public: - UnsupportedExecutorProcessControl( - std::shared_ptr SSP = nullptr, - std::unique_ptr D = nullptr, const std::string &TT = "", - unsigned PageSize = 0) - : ExecutorProcessControl( - SSP ? std::move(SSP) : std::make_shared(), - D ? std::move(D) : std::make_unique()), - InProcessMemoryAccess(Triple(TT).isArch64Bit()) { - this->TargetTriple = Triple(TT); - this->PageSize = PageSize; - this->MemAccess = this; - } - - Expected runAsMain(ExecutorAddr MainFnAddr, - ArrayRef Args) override { - llvm_unreachable("Unsupported"); - } - - Expected runAsVoidFunction(ExecutorAddr VoidFnAddr) override { - llvm_unreachable("Unsupported"); - } - - Expected runAsIntFunction(ExecutorAddr IntFnAddr, int Arg) override { - llvm_unreachable("Unsupported"); - } - - void callWrapperAsync(ExecutorAddr WrapperFnAddr, - IncomingWFRHandler OnComplete, - ArrayRef ArgBuffer) override { - llvm_unreachable("Unsupported"); - } - - Error disconnect() override { return Error::success(); } -}; - /// A ExecutorProcessControl implementation targeting the current process. class LLVM_ABI SelfExecutorProcessControl : public ExecutorProcessControl, private InProcessMemoryAccess, diff --git a/llvm/include/llvm/Frontend/Directive/Spelling.h b/llvm/include/llvm/Frontend/Directive/Spelling.h index a101489603254..a13e26e88823d 100644 --- a/llvm/include/llvm/Frontend/Directive/Spelling.h +++ b/llvm/include/llvm/Frontend/Directive/Spelling.h @@ -10,6 +10,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Compiler.h" #include #include @@ -33,7 +34,8 @@ struct Spelling { VersionRange Versions; }; -StringRef FindName(llvm::iterator_range, unsigned Version); +LLVM_ABI StringRef FindName(llvm::iterator_range, + unsigned Version); } // namespace llvm::directive diff --git a/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h b/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h index f0168c0407884..17ba28b6de443 100644 --- a/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h +++ b/llvm/include/llvm/Frontend/Driver/CodeGenOptions.h @@ -63,7 +63,7 @@ enum ProfileInstrKind { }; // Default filename used for profile generation. -std::string getDefaultProfileGenName(); +LLVM_ABI std::string getDefaultProfileGenName(); } // end namespace llvm::driver #endif diff --git a/llvm/include/llvm/Frontend/HLSL/HLSLRootSignatureUtils.h b/llvm/include/llvm/Frontend/HLSL/HLSLRootSignatureUtils.h index b871d3839fe03..43c8805ce06b6 100644 --- a/llvm/include/llvm/Frontend/HLSL/HLSLRootSignatureUtils.h +++ b/llvm/include/llvm/Frontend/HLSL/HLSLRootSignatureUtils.h @@ -99,13 +99,14 @@ class ResourceRange { // Returns a reference to the first RangeInfo that overlaps with // [Info.LowerBound;Info.UpperBound], or, std::nullopt if there is no overlap - std::optional getOverlapping(const RangeInfo &Info) const; + LLVM_ABI std::optional + getOverlapping(const RangeInfo &Info) const; // Return the mapped RangeInfo at X or nullptr if no mapping exists - const RangeInfo *lookup(uint32_t X) const; + LLVM_ABI const RangeInfo *lookup(uint32_t X) const; // Removes all entries of the ResourceRange - void clear(); + LLVM_ABI void clear(); // Insert the required (sub-)intervals such that the interval of [a;b] = // [Info.LowerBound, Info.UpperBound] is covered and points to a valid @@ -133,7 +134,7 @@ class ResourceRange { // Returns a reference to the first RangeInfo that overlaps with // [Info.LowerBound;Info.UpperBound], or, std::nullopt if there is no overlap // (equivalent to getOverlapping) - std::optional insert(const RangeInfo &Info); + LLVM_ABI std::optional insert(const RangeInfo &Info); }; } // namespace rootsig diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index b37c28477fb34..f2610011a7e04 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -651,7 +651,8 @@ def OMP_EndAssumes : Directive<[Spelling<"end assumes">]> { let category = OMP_BeginAssumes.category; let languages = OMP_BeginAssumes.languages; } -def OMP_BeginDeclareTarget : Directive<[Spelling<"begin declare target">]> { +def OMP_BeginDeclareTarget : Directive<[Spelling<"begin declare target", 1, 52>, + Spelling<"begin declare_target", 60>]> { let allowedClauses = [ VersionedClause, VersionedClause, @@ -662,17 +663,21 @@ def OMP_BeginDeclareTarget : Directive<[Spelling<"begin declare target">]> { let category = CA_Declarative; let languages = [L_C]; } -def OMP_EndDeclareTarget : Directive<[Spelling<"end declare target">]> { +def OMP_EndDeclareTarget : Directive<[Spelling<"end declare target", 1, 52>, + Spelling<"end declare_target", 60>]> { let association = AS_Delimited; let category = OMP_BeginDeclareTarget.category; let languages = OMP_BeginDeclareTarget.languages; } -def OMP_BeginDeclareVariant : Directive<[Spelling<"begin declare variant">]> { +def OMP_BeginDeclareVariant + : Directive<[Spelling<"begin declare variant", 1, 52>, + Spelling<"begin declare_variant", 60>]> { let association = AS_Delimited; let category = CA_Declarative; let languages = [L_C]; } -def OMP_EndDeclareVariant : Directive<[Spelling<"end declare variant">]> { +def OMP_EndDeclareVariant : Directive<[Spelling<"end declare variant", 1, 52>, + Spelling<"end declare_variant", 60>]> { let association = AS_Delimited; let category = OMP_BeginDeclareVariant.category; let languages = OMP_BeginDeclareVariant.languages; @@ -685,7 +690,8 @@ def OMP_Cancel : Directive<[Spelling<"cancel">]> { let association = AS_None; let category = CA_Executable; } -def OMP_CancellationPoint : Directive<[Spelling<"cancellation point">]> { +def OMP_CancellationPoint : Directive<[Spelling<"cancellation point", 1, 52>, + Spelling<"cancellation_point", 60>]> { let allowedOnceClauses = [ VersionedClause, ]; @@ -699,21 +705,24 @@ def OMP_Critical : Directive<[Spelling<"critical">]> { let association = AS_Block; let category = CA_Executable; } -def OMP_DeclareMapper : Directive<[Spelling<"declare mapper">]> { +def OMP_DeclareMapper : Directive<[Spelling<"declare mapper", 1, 52>, + Spelling<"declare_mapper", 60>]> { let requiredClauses = [ VersionedClause, ]; let association = AS_None; let category = CA_Declarative; } -def OMP_DeclareReduction : Directive<[Spelling<"declare reduction">]> { +def OMP_DeclareReduction : Directive<[Spelling<"declare reduction", 1, 52>, + Spelling<"declare_reduction", 60>]> { let allowedOnceClauses = [ VersionedClause, ]; let association = AS_None; let category = CA_Declarative; } -def OMP_DeclareSimd : Directive<[Spelling<"declare simd">]> { +def OMP_DeclareSimd : Directive<[Spelling<"declare simd", 1, 52>, + Spelling<"declare_simd", 60>]> { let allowedClauses = [ VersionedClause, VersionedClause, @@ -729,7 +738,8 @@ def OMP_DeclareSimd : Directive<[Spelling<"declare simd">]> { let association = AS_Declaration; let category = CA_Declarative; } -def OMP_DeclareTarget : Directive<[Spelling<"declare target">]> { +def OMP_DeclareTarget : Directive<[Spelling<"declare target", 1, 52>, + Spelling<"declare_target", 60>]> { let allowedClauses = [ VersionedClause, VersionedClause, @@ -742,7 +752,8 @@ def OMP_DeclareTarget : Directive<[Spelling<"declare target">]> { let association = AS_None; let category = CA_Declarative; } -def OMP_DeclareVariant : Directive<[Spelling<"declare variant">]> { +def OMP_DeclareVariant : Directive<[Spelling<"declare variant", 1, 52>, + Spelling<"declare_variant", 60>]> { let allowedClauses = [ VersionedClause, ]; @@ -1101,7 +1112,8 @@ def OMP_Target : Directive<[Spelling<"target">]> { let association = AS_Block; let category = CA_Executable; } -def OMP_TargetData : Directive<[Spelling<"target data">]> { +def OMP_TargetData : Directive<[Spelling<"target data", 1, 52>, + Spelling<"target_data", 60>]> { let allowedOnceClauses = [ VersionedClause, VersionedClause, @@ -1114,7 +1126,8 @@ def OMP_TargetData : Directive<[Spelling<"target data">]> { let association = AS_Block; let category = CA_Executable; } -def OMP_TargetEnterData : Directive<[Spelling<"target enter data">]> { +def OMP_TargetEnterData : Directive<[Spelling<"target enter data", 1, 52>, + Spelling<"target_enter_data", 60>]> { let allowedClauses = [ VersionedClause, ]; @@ -1129,7 +1142,8 @@ def OMP_TargetEnterData : Directive<[Spelling<"target enter data">]> { let association = AS_None; let category = CA_Executable; } -def OMP_TargetExitData : Directive<[Spelling<"target exit data">]> { +def OMP_TargetExitData : Directive<[Spelling<"target exit data", 1, 52>, + Spelling<"target_exit_data", 60>]> { let allowedClauses = [ VersionedClause, ]; @@ -1144,7 +1158,8 @@ def OMP_TargetExitData : Directive<[Spelling<"target exit data">]> { let association = AS_None; let category = CA_Executable; } -def OMP_TargetUpdate : Directive<[Spelling<"target update">]> { +def OMP_TargetUpdate : Directive<[Spelling<"target update", 1, 52>, + Spelling<"target_update", 60>]> { let allowedClauses = [ VersionedClause, VersionedClause, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 93fb0d8e8d078..19a4058b64382 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -484,7 +484,7 @@ class OpenMPIRBuilder { /// not have an effect on \p M (see initialize) OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()), OffloadInfoManager(this), - T(M.getTargetTriple()) {} + T(M.getTargetTriple()), IsFinalized(false) {} LLVM_ABI ~OpenMPIRBuilder(); class AtomicInfo : public llvm::AtomicInfo { @@ -521,6 +521,10 @@ class OpenMPIRBuilder { /// all functions are finalized. LLVM_ABI void finalize(Function *Fn = nullptr); + /// Check whether the finalize function has already run + /// \return true if the finalize function has already run + LLVM_ABI bool isFinalized(); + /// Add attributes known for \p FnID to \p Fn. LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn); @@ -3286,6 +3290,8 @@ class OpenMPIRBuilder { Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2, AtomicRMWInst::BinOp RMWOp); + bool IsFinalized; + public: /// a struct to pack relevant information while generating atomic Ops struct AtomicOpValue { diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index 43fca571ee6d5..99f7491b1b9b5 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -367,6 +367,22 @@ namespace llvm { uint32_t VBPtrOffset, DINode::DIFlags Flags); + /// Create debugging information entry for a member. + /// \param Scope Member scope. + /// \param Name Member name. + /// \param File File where this member is defined. + /// \param LineNo Line number. + /// \param SizeInBits Member size. + /// \param AlignInBits Member alignment. + /// \param OffsetInBits Member offset. + /// \param Flags Flags to encode member attribute, e.g. private + /// \param Ty Parent type. + /// \param Annotations Member annotations. + LLVM_ABI DIDerivedType *createMemberType( + DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo, + Metadata *SizeInBits, uint32_t AlignInBits, Metadata *OffsetInBits, + DINode::DIFlags Flags, DIType *Ty, DINodeArray Annotations = nullptr); + /// Create debugging information entry for a member. /// \param Scope Member scope. /// \param Name Member name. @@ -419,6 +435,23 @@ namespace llvm { Constant *Discriminant, DIType *Ty); + /// Create debugging information entry for a bit field member. + /// \param Scope Member scope. + /// \param Name Member name. + /// \param File File where this member is defined. + /// \param LineNo Line number. + /// \param SizeInBits Member size. + /// \param OffsetInBits Member offset. + /// \param StorageOffsetInBits Member storage offset. + /// \param Flags Flags to encode member attribute. + /// \param Ty Parent type. + /// \param Annotations Member annotations. + LLVM_ABI DIDerivedType *createBitFieldMemberType( + DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo, + Metadata *SizeInBits, Metadata *OffsetInBits, + uint64_t StorageOffsetInBits, DINode::DIFlags Flags, DIType *Ty, + DINodeArray Annotations = nullptr); + /// Create debugging information entry for a bit field member. /// \param Scope Member scope. /// \param Name Member name. @@ -510,6 +543,29 @@ namespace llvm { unsigned RunTimeLang = 0, DIType *VTableHolder = nullptr, MDNode *TemplateParms = nullptr, StringRef UniqueIdentifier = ""); + /// Create debugging information entry for a struct. + /// \param Scope Scope in which this struct is defined. + /// \param Name Struct name. + /// \param File File where this member is defined. + /// \param LineNumber Line number. + /// \param SizeInBits Member size. + /// \param AlignInBits Member alignment. + /// \param Flags Flags to encode member attribute, e.g. private + /// \param Elements Struct elements. + /// \param RunTimeLang Optional parameter, Objective-C runtime version. + /// \param UniqueIdentifier A unique identifier for the struct. + /// \param Specification The type that this type completes. This is used by + /// Swift to represent generic types. + /// \param NumExtraInhabitants The number of extra inhabitants of the type. + /// An extra inhabitant is a bit pattern that does not represent a valid + /// value for instances of a given type. This is used by the Swift language. + LLVM_ABI DICompositeType *createStructType( + DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber, + Metadata *SizeInBits, uint32_t AlignInBits, DINode::DIFlags Flags, + DIType *DerivedFrom, DINodeArray Elements, unsigned RunTimeLang = 0, + DIType *VTableHolder = nullptr, StringRef UniqueIdentifier = "", + DIType *Specification = nullptr, uint32_t NumExtraInhabitants = 0); + /// Create debugging information entry for a struct. /// \param Scope Scope in which this struct is defined. /// \param Name Struct name. diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 18228b7757897..f80e44ce3abbc 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -717,40 +717,33 @@ std::optional DIScope::getSource() const { class DIType : public DIScope { unsigned Line; DIFlags Flags; - uint64_t SizeInBits; - uint64_t OffsetInBits; uint32_t NumExtraInhabitants; protected: + static constexpr unsigned N_OPERANDS = 5; + DIType(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag, - unsigned Line, uint64_t SizeInBits, uint32_t AlignInBits, - uint64_t OffsetInBits, uint32_t NumExtraInhabitants, DIFlags Flags, - ArrayRef Ops) + unsigned Line, uint32_t AlignInBits, uint32_t NumExtraInhabitants, + DIFlags Flags, ArrayRef Ops) : DIScope(C, ID, Storage, Tag, Ops) { - init(Line, SizeInBits, AlignInBits, OffsetInBits, NumExtraInhabitants, - Flags); + init(Line, AlignInBits, NumExtraInhabitants, Flags); } ~DIType() = default; - void init(unsigned Line, uint64_t SizeInBits, uint32_t AlignInBits, - uint64_t OffsetInBits, uint32_t NumExtraInhabitants, + void init(unsigned Line, uint32_t AlignInBits, uint32_t NumExtraInhabitants, DIFlags Flags) { this->Line = Line; this->Flags = Flags; - this->SizeInBits = SizeInBits; this->SubclassData32 = AlignInBits; - this->OffsetInBits = OffsetInBits; this->NumExtraInhabitants = NumExtraInhabitants; } /// Change fields in place. - void mutate(unsigned Tag, unsigned Line, uint64_t SizeInBits, - uint32_t AlignInBits, uint64_t OffsetInBits, + void mutate(unsigned Tag, unsigned Line, uint32_t AlignInBits, uint32_t NumExtraInhabitants, DIFlags Flags) { assert(isDistinct() && "Only distinct nodes can mutate"); setTag(Tag); - init(Line, SizeInBits, AlignInBits, OffsetInBits, NumExtraInhabitants, - Flags); + init(Line, AlignInBits, NumExtraInhabitants, Flags); } public: @@ -759,10 +752,8 @@ class DIType : public DIScope { } unsigned getLine() const { return Line; } - uint64_t getSizeInBits() const { return SizeInBits; } LLVM_ABI uint32_t getAlignInBits() const; uint32_t getAlignInBytes() const { return getAlignInBits() / CHAR_BIT; } - uint64_t getOffsetInBits() const { return OffsetInBits; } uint32_t getNumExtraInhabitants() const { return NumExtraInhabitants; } DIFlags getFlags() const { return Flags; } @@ -772,6 +763,24 @@ class DIType : public DIScope { Metadata *getRawScope() const { return getOperand(1); } MDString *getRawName() const { return getOperandAs(2); } + Metadata *getRawSizeInBits() const { return getOperand(3); } + uint64_t getSizeInBits() const { + if (auto *MD = dyn_cast_or_null(getRawSizeInBits())) { + if (ConstantInt *CI = dyn_cast_or_null(MD->getValue())) + return CI->getZExtValue(); + } + return 0; + } + + Metadata *getRawOffsetInBits() const { return getOperand(4); } + uint64_t getOffsetInBits() const { + if (auto *MD = dyn_cast_or_null(getRawOffsetInBits())) { + if (ConstantInt *CI = dyn_cast_or_null(MD->getValue())) + return CI->getZExtValue(); + } + return 0; + } + /// Returns a new temporary DIType with updated Flags TempDIType cloneWithFlags(DIFlags NewFlags) const { auto NewTy = clone(); @@ -837,18 +846,18 @@ class DIBasicType : public DIType { protected: DIBasicType(LLVMContext &C, StorageType Storage, unsigned Tag, - uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, + uint32_t AlignInBits, unsigned Encoding, uint32_t NumExtraInhabitants, DIFlags Flags, ArrayRef Ops) - : DIType(C, DIBasicTypeKind, Storage, Tag, 0, SizeInBits, AlignInBits, 0, + : DIType(C, DIBasicTypeKind, Storage, Tag, 0, AlignInBits, NumExtraInhabitants, Flags, Ops), Encoding(Encoding) {} DIBasicType(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag, - uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, + uint32_t AlignInBits, unsigned Encoding, uint32_t NumExtraInhabitants, DIFlags Flags, ArrayRef Ops) - : DIType(C, ID, Storage, Tag, 0, SizeInBits, AlignInBits, 0, - NumExtraInhabitants, Flags, Ops), + : DIType(C, ID, Storage, Tag, 0, AlignInBits, NumExtraInhabitants, Flags, + Ops), Encoding(Encoding) {} ~DIBasicType() = default; @@ -866,11 +875,21 @@ class DIBasicType : public DIType { uint32_t AlignInBits, unsigned Encoding, uint32_t NumExtraInhabitants, DIFlags Flags, StorageType Storage, - bool ShouldCreate = true); + bool ShouldCreate = true) { + auto *SizeInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), SizeInBits)); + return getImpl(Context, Tag, Name, SizeInBitsNode, AlignInBits, Encoding, + NumExtraInhabitants, Flags, Storage, ShouldCreate); + } + static DIBasicType *getImpl(LLVMContext &Context, unsigned Tag, + MDString *Name, Metadata *SizeInBits, + uint32_t AlignInBits, unsigned Encoding, + uint32_t NumExtraInhabitants, DIFlags Flags, + StorageType Storage, bool ShouldCreate = true); TempDIBasicType cloneImpl() const { - return getTemporary(getContext(), getTag(), getName(), getSizeInBits(), - getAlignInBits(), getEncoding(), + return getTemporary(getContext(), getTag(), getRawName(), + getRawSizeInBits(), getAlignInBits(), getEncoding(), getNumExtraInhabitants(), getFlags()); } @@ -903,6 +922,12 @@ class DIBasicType : public DIType { uint32_t NumExtraInhabitants, DIFlags Flags), (Tag, Name, SizeInBits, AlignInBits, Encoding, NumExtraInhabitants, Flags)) + DEFINE_MDNODE_GET(DIBasicType, + (unsigned Tag, MDString *Name, Metadata *SizeInBits, + uint32_t AlignInBits, unsigned Encoding, + uint32_t NumExtraInhabitants, DIFlags Flags), + (Tag, Name, SizeInBits, AlignInBits, Encoding, + NumExtraInhabitants, Flags)) TempDIBasicType clone() const { return cloneImpl(); } @@ -934,29 +959,28 @@ class DIFixedPointType : public DIBasicType { APInt Denominator; DIFixedPointType(LLVMContext &C, StorageType Storage, unsigned Tag, - uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, - DIFlags Flags, unsigned Kind, int Factor, - ArrayRef Ops) - : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, SizeInBits, - AlignInBits, Encoding, 0, Flags, Ops), + uint32_t AlignInBits, unsigned Encoding, DIFlags Flags, + unsigned Kind, int Factor, ArrayRef Ops) + : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, AlignInBits, + Encoding, 0, Flags, Ops), Kind(Kind), Factor(Factor) { assert(Kind == FixedPointBinary || Kind == FixedPointDecimal); } DIFixedPointType(LLVMContext &C, StorageType Storage, unsigned Tag, - uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, - DIFlags Flags, unsigned Kind, APInt Numerator, - APInt Denominator, ArrayRef Ops) - : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, SizeInBits, - AlignInBits, Encoding, 0, Flags, Ops), + uint32_t AlignInBits, unsigned Encoding, DIFlags Flags, + unsigned Kind, APInt Numerator, APInt Denominator, + ArrayRef Ops) + : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, AlignInBits, + Encoding, 0, Flags, Ops), Kind(Kind), Factor(0), Numerator(Numerator), Denominator(Denominator) { assert(Kind == FixedPointRational); } DIFixedPointType(LLVMContext &C, StorageType Storage, unsigned Tag, - uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, - DIFlags Flags, unsigned Kind, int Factor, APInt Numerator, + uint32_t AlignInBits, unsigned Encoding, DIFlags Flags, + unsigned Kind, int Factor, APInt Numerator, APInt Denominator, ArrayRef Ops) - : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, SizeInBits, - AlignInBits, Encoding, 0, Flags, Ops), + : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, AlignInBits, + Encoding, 0, Flags, Ops), Kind(Kind), Factor(Factor), Numerator(Numerator), Denominator(Denominator) {} ~DIFixedPointType() = default; @@ -966,6 +990,17 @@ class DIFixedPointType : public DIBasicType { uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, DIFlags Flags, unsigned Kind, int Factor, APInt Numerator, APInt Denominator, StorageType Storage, bool ShouldCreate = true) { + auto *SizeInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), SizeInBits)); + return getImpl(Context, Tag, getCanonicalMDString(Context, Name), + SizeInBitsNode, AlignInBits, Encoding, Flags, Kind, Factor, + Numerator, Denominator, Storage, ShouldCreate); + } + static DIFixedPointType * + getImpl(LLVMContext &Context, unsigned Tag, StringRef Name, + Metadata *SizeInBits, uint32_t AlignInBits, unsigned Encoding, + DIFlags Flags, unsigned Kind, int Factor, APInt Numerator, + APInt Denominator, StorageType Storage, bool ShouldCreate = true) { return getImpl(Context, Tag, getCanonicalMDString(Context, Name), SizeInBits, AlignInBits, Encoding, Flags, Kind, Factor, Numerator, Denominator, Storage, ShouldCreate); @@ -974,12 +1009,23 @@ class DIFixedPointType : public DIBasicType { getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, DIFlags Flags, unsigned Kind, int Factor, APInt Numerator, + APInt Denominator, StorageType Storage, bool ShouldCreate = true) { + auto *SizeInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), SizeInBits)); + return getImpl(Context, Tag, Name, SizeInBitsNode, AlignInBits, Encoding, + Flags, Kind, Factor, Numerator, Denominator, Storage, + ShouldCreate); + } + static DIFixedPointType * + getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, + Metadata *SizeInBits, uint32_t AlignInBits, unsigned Encoding, + DIFlags Flags, unsigned Kind, int Factor, APInt Numerator, APInt Denominator, StorageType Storage, bool ShouldCreate = true); TempDIFixedPointType cloneImpl() const { - return getTemporary(getContext(), getTag(), getName(), getSizeInBits(), - getAlignInBits(), getEncoding(), getFlags(), Kind, - Factor, Numerator, Denominator); + return getTemporary(getContext(), getTag(), getRawName(), + getRawSizeInBits(), getAlignInBits(), getEncoding(), + getFlags(), Kind, Factor, Numerator, Denominator); } public: @@ -1011,6 +1057,13 @@ class DIFixedPointType : public DIBasicType { APInt Denominator), (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags, Kind, Factor, Numerator, Denominator)) + DEFINE_MDNODE_GET(DIFixedPointType, + (unsigned Tag, MDString *Name, Metadata *SizeInBits, + uint32_t AlignInBits, unsigned Encoding, DIFlags Flags, + unsigned Kind, int Factor, APInt Numerator, + APInt Denominator), + (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags, Kind, + Factor, Numerator, Denominator)) TempDIFixedPointType clone() const { return cloneImpl(); } @@ -1050,13 +1103,15 @@ class DIStringType : public DIType { friend class LLVMContextImpl; friend class MDNode; + static constexpr unsigned MY_FIRST_OPERAND = DIType::N_OPERANDS; + unsigned Encoding; DIStringType(LLVMContext &C, StorageType Storage, unsigned Tag, - uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, + uint32_t AlignInBits, unsigned Encoding, ArrayRef Ops) - : DIType(C, DIStringTypeKind, Storage, Tag, 0, SizeInBits, AlignInBits, 0, - 0, FlagZero, Ops), + : DIType(C, DIStringTypeKind, Storage, Tag, 0, AlignInBits, 0, FlagZero, + Ops), Encoding(Encoding) {} ~DIStringType() = default; @@ -1066,20 +1121,34 @@ class DIStringType : public DIType { uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, StorageType Storage, bool ShouldCreate = true) { + auto *SizeInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), SizeInBits)); return getImpl(Context, Tag, getCanonicalMDString(Context, Name), - StringLength, StrLenExp, StrLocationExp, SizeInBits, + StringLength, StrLenExp, StrLocationExp, SizeInBitsNode, AlignInBits, Encoding, Storage, ShouldCreate); } LLVM_ABI static DIStringType * getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *StringLength, Metadata *StrLenExp, Metadata *StrLocationExp, uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, - StorageType Storage, bool ShouldCreate = true); + StorageType Storage, bool ShouldCreate = true) { + auto *SizeInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), SizeInBits)); + return getImpl(Context, Tag, Name, StringLength, StrLenExp, StrLocationExp, + SizeInBitsNode, AlignInBits, Encoding, Storage, + ShouldCreate); + } + static DIStringType *getImpl(LLVMContext &Context, unsigned Tag, + MDString *Name, Metadata *StringLength, + Metadata *StrLenExp, Metadata *StrLocationExp, + Metadata *SizeInBits, uint32_t AlignInBits, + unsigned Encoding, StorageType Storage, + bool ShouldCreate = true); TempDIStringType cloneImpl() const { return getTemporary(getContext(), getTag(), getRawName(), getRawStringLength(), getRawStringLengthExp(), - getRawStringLocationExp(), getSizeInBits(), + getRawStringLocationExp(), getRawSizeInBits(), getAlignInBits(), getEncoding()); } @@ -1103,6 +1172,13 @@ class DIStringType : public DIType { unsigned Encoding), (Tag, Name, StringLength, StringLengthExp, StringLocationExp, SizeInBits, AlignInBits, Encoding)) + DEFINE_MDNODE_GET(DIStringType, + (unsigned Tag, MDString *Name, Metadata *StringLength, + Metadata *StringLengthExp, Metadata *StringLocationExp, + Metadata *SizeInBits, uint32_t AlignInBits, + unsigned Encoding), + (Tag, Name, StringLength, StringLengthExp, + StringLocationExp, SizeInBits, AlignInBits, Encoding)) TempDIStringType clone() const { return cloneImpl(); } @@ -1124,11 +1200,15 @@ class DIStringType : public DIType { unsigned getEncoding() const { return Encoding; } - Metadata *getRawStringLength() const { return getOperand(3); } + Metadata *getRawStringLength() const { return getOperand(MY_FIRST_OPERAND); } - Metadata *getRawStringLengthExp() const { return getOperand(4); } + Metadata *getRawStringLengthExp() const { + return getOperand(MY_FIRST_OPERAND + 1); + } - Metadata *getRawStringLocationExp() const { return getOperand(5); } + Metadata *getRawStringLocationExp() const { + return getOperand(MY_FIRST_OPERAND + 2); + } }; /// Derived types. @@ -1170,18 +1250,19 @@ class DIDerivedType : public DIType { friend class LLVMContextImpl; friend class MDNode; + static constexpr unsigned MY_FIRST_OPERAND = DIType::N_OPERANDS; + /// The DWARF address space of the memory pointed to or referenced by a /// pointer or reference type respectively. std::optional DWARFAddressSpace; DIDerivedType(LLVMContext &C, StorageType Storage, unsigned Tag, - unsigned Line, uint64_t SizeInBits, uint32_t AlignInBits, - uint64_t OffsetInBits, + unsigned Line, uint32_t AlignInBits, std::optional DWARFAddressSpace, std::optional PtrAuthData, DIFlags Flags, ArrayRef Ops) - : DIType(C, DIDerivedTypeKind, Storage, Tag, Line, SizeInBits, - AlignInBits, OffsetInBits, 0, Flags, Ops), + : DIType(C, DIDerivedTypeKind, Storage, Tag, Line, AlignInBits, 0, Flags, + Ops), DWARFAddressSpace(DWARFAddressSpace) { if (PtrAuthData) SubclassData32 = PtrAuthData->RawData; @@ -1195,6 +1276,40 @@ class DIDerivedType : public DIType { std::optional PtrAuthData, DIFlags Flags, Metadata *ExtraData, DINodeArray Annotations, StorageType Storage, bool ShouldCreate = true) { + auto *SizeInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), SizeInBits)); + auto *OffsetInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), OffsetInBits)); + return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File, + Line, Scope, BaseType, SizeInBitsNode, AlignInBits, + OffsetInBitsNode, DWARFAddressSpace, PtrAuthData, Flags, + ExtraData, Annotations.get(), Storage, ShouldCreate); + } + static DIDerivedType * + getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, DIFile *File, + unsigned Line, DIScope *Scope, DIType *BaseType, uint64_t SizeInBits, + uint32_t AlignInBits, uint64_t OffsetInBits, + std::optional DWARFAddressSpace, + std::optional PtrAuthData, DIFlags Flags, + Metadata *ExtraData, DINodeArray Annotations, StorageType Storage, + bool ShouldCreate = true) { + auto *SizeInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), SizeInBits)); + auto *OffsetInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), OffsetInBits)); + return getImpl(Context, Tag, Name, File, Line, Scope, BaseType, + SizeInBitsNode, AlignInBits, OffsetInBitsNode, + DWARFAddressSpace, PtrAuthData, Flags, ExtraData, + Annotations.get(), Storage, ShouldCreate); + } + static DIDerivedType * + getImpl(LLVMContext &Context, unsigned Tag, StringRef Name, DIFile *File, + unsigned Line, DIScope *Scope, DIType *BaseType, Metadata *SizeInBits, + uint32_t AlignInBits, Metadata *OffsetInBits, + std::optional DWARFAddressSpace, + std::optional PtrAuthData, DIFlags Flags, + Metadata *ExtraData, DINodeArray Annotations, StorageType Storage, + bool ShouldCreate = true) { return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, DWARFAddressSpace, PtrAuthData, Flags, ExtraData, @@ -1203,26 +1318,26 @@ class DIDerivedType : public DIType { LLVM_ABI static DIDerivedType * getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File, unsigned Line, Metadata *Scope, Metadata *BaseType, - uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, + Metadata *SizeInBits, uint32_t AlignInBits, Metadata *OffsetInBits, std::optional DWARFAddressSpace, std::optional PtrAuthData, DIFlags Flags, Metadata *ExtraData, Metadata *Annotations, StorageType Storage, bool ShouldCreate = true); TempDIDerivedType cloneImpl() const { - return getTemporary(getContext(), getTag(), getName(), getFile(), getLine(), - getScope(), getBaseType(), getSizeInBits(), - getAlignInBits(), getOffsetInBits(), - getDWARFAddressSpace(), getPtrAuthData(), getFlags(), - getExtraData(), getAnnotations()); + return getTemporary( + getContext(), getTag(), getRawName(), getFile(), getLine(), getScope(), + getBaseType(), getRawSizeInBits(), getAlignInBits(), + getRawOffsetInBits(), getDWARFAddressSpace(), getPtrAuthData(), + getFlags(), getExtraData(), getRawAnnotations()); } public: DEFINE_MDNODE_GET(DIDerivedType, (unsigned Tag, MDString *Name, Metadata *File, unsigned Line, Metadata *Scope, Metadata *BaseType, - uint64_t SizeInBits, uint32_t AlignInBits, - uint64_t OffsetInBits, + Metadata *SizeInBits, uint32_t AlignInBits, + Metadata *OffsetInBits, std::optional DWARFAddressSpace, std::optional PtrAuthData, DIFlags Flags, Metadata *ExtraData = nullptr, @@ -1230,6 +1345,28 @@ class DIDerivedType : public DIType { (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, DWARFAddressSpace, PtrAuthData, Flags, ExtraData, Annotations)) + DEFINE_MDNODE_GET(DIDerivedType, + (unsigned Tag, StringRef Name, DIFile *File, unsigned Line, + DIScope *Scope, DIType *BaseType, Metadata *SizeInBits, + uint32_t AlignInBits, Metadata *OffsetInBits, + std::optional DWARFAddressSpace, + std::optional PtrAuthData, DIFlags Flags, + Metadata *ExtraData = nullptr, + DINodeArray Annotations = nullptr), + (Tag, Name, File, Line, Scope, BaseType, SizeInBits, + AlignInBits, OffsetInBits, DWARFAddressSpace, PtrAuthData, + Flags, ExtraData, Annotations)) + DEFINE_MDNODE_GET(DIDerivedType, + (unsigned Tag, MDString *Name, DIFile *File, unsigned Line, + DIScope *Scope, DIType *BaseType, uint64_t SizeInBits, + uint32_t AlignInBits, uint64_t OffsetInBits, + std::optional DWARFAddressSpace, + std::optional PtrAuthData, DIFlags Flags, + Metadata *ExtraData = nullptr, + DINodeArray Annotations = nullptr), + (Tag, Name, File, Line, Scope, BaseType, SizeInBits, + AlignInBits, OffsetInBits, DWARFAddressSpace, PtrAuthData, + Flags, ExtraData, Annotations)) DEFINE_MDNODE_GET(DIDerivedType, (unsigned Tag, StringRef Name, DIFile *File, unsigned Line, DIScope *Scope, DIType *BaseType, uint64_t SizeInBits, @@ -1246,7 +1383,7 @@ class DIDerivedType : public DIType { /// Get the base type this is derived from. DIType *getBaseType() const { return cast_or_null(getRawBaseType()); } - Metadata *getRawBaseType() const { return getOperand(3); } + Metadata *getRawBaseType() const { return getOperand(MY_FIRST_OPERAND); } /// \returns The DWARF address space of the memory pointed to or referenced by /// a pointer or reference type respectively. @@ -1266,7 +1403,7 @@ class DIDerivedType : public DIType { /// TODO: Separate out types that need this extra operand: pointer-to-member /// types and member fields (static members and ivars). Metadata *getExtraData() const { return getRawExtraData(); } - Metadata *getRawExtraData() const { return getOperand(4); } + Metadata *getRawExtraData() const { return getOperand(MY_FIRST_OPERAND + 1); } /// Get the template parameters from a template alias. DITemplateParameterArray getTemplateParams() const { @@ -1277,7 +1414,9 @@ class DIDerivedType : public DIType { DINodeArray getAnnotations() const { return cast_or_null(getRawAnnotations()); } - Metadata *getRawAnnotations() const { return getOperand(5); } + Metadata *getRawAnnotations() const { + return getOperand(MY_FIRST_OPERAND + 2); + } /// Get casted version of extra data. /// @{ @@ -1321,9 +1460,10 @@ class DISubrangeType : public DIType { friend class LLVMContextImpl; friend class MDNode; + static constexpr unsigned MY_FIRST_OPERAND = DIType::N_OPERANDS; + DISubrangeType(LLVMContext &C, StorageType Storage, unsigned Line, - uint64_t SizeInBits, uint32_t AlignInBits, DIFlags Flags, - ArrayRef Ops); + uint32_t AlignInBits, DIFlags Flags, ArrayRef Ops); ~DISubrangeType() = default; @@ -1333,21 +1473,23 @@ class DISubrangeType : public DIType { DIFlags Flags, DIType *BaseType, Metadata *LowerBound, Metadata *UpperBound, Metadata *Stride, Metadata *Bias, StorageType Storage, bool ShouldCreate = true) { + auto *SizeInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), SizeInBits)); return getImpl(Context, getCanonicalMDString(Context, Name), File, Line, - Scope, SizeInBits, AlignInBits, Flags, BaseType, LowerBound, - UpperBound, Stride, Bias, Storage, ShouldCreate); + Scope, SizeInBitsNode, AlignInBits, Flags, BaseType, + LowerBound, UpperBound, Stride, Bias, Storage, ShouldCreate); } LLVM_ABI static DISubrangeType * getImpl(LLVMContext &Context, MDString *Name, Metadata *File, unsigned Line, - Metadata *Scope, uint64_t SizeInBits, uint32_t AlignInBits, + Metadata *Scope, Metadata *SizeInBits, uint32_t AlignInBits, DIFlags Flags, Metadata *BaseType, Metadata *LowerBound, Metadata *UpperBound, Metadata *Stride, Metadata *Bias, StorageType Storage, bool ShouldCreate = true); TempDISubrangeType cloneImpl() const { - return getTemporary(getContext(), getName(), getFile(), getLine(), - getScope(), getSizeInBits(), getAlignInBits(), + return getTemporary(getContext(), getRawName(), getFile(), getLine(), + getScope(), getRawSizeInBits(), getAlignInBits(), getFlags(), getBaseType(), getRawLowerBound(), getRawUpperBound(), getRawStride(), getRawBias()); } @@ -1357,9 +1499,10 @@ class DISubrangeType : public DIType { public: DEFINE_MDNODE_GET(DISubrangeType, (MDString * Name, Metadata *File, unsigned Line, - Metadata *Scope, uint64_t SizeInBits, uint32_t AlignInBits, - DIFlags Flags, Metadata *BaseType, Metadata *LowerBound, - Metadata *UpperBound, Metadata *Stride, Metadata *Bias), + Metadata *Scope, Metadata *SizeInBits, + uint32_t AlignInBits, DIFlags Flags, Metadata *BaseType, + Metadata *LowerBound, Metadata *UpperBound, + Metadata *Stride, Metadata *Bias), (Name, File, Line, Scope, SizeInBits, AlignInBits, Flags, BaseType, LowerBound, UpperBound, Stride, Bias)) DEFINE_MDNODE_GET(DISubrangeType, @@ -1374,15 +1517,23 @@ class DISubrangeType : public DIType { /// Get the base type this is derived from. DIType *getBaseType() const { return cast_or_null(getRawBaseType()); } - Metadata *getRawBaseType() const { return getOperand(3); } + Metadata *getRawBaseType() const { return getOperand(MY_FIRST_OPERAND); } - Metadata *getRawLowerBound() const { return getOperand(4).get(); } + Metadata *getRawLowerBound() const { + return getOperand(MY_FIRST_OPERAND + 1).get(); + } - Metadata *getRawUpperBound() const { return getOperand(5).get(); } + Metadata *getRawUpperBound() const { + return getOperand(MY_FIRST_OPERAND + 2).get(); + } - Metadata *getRawStride() const { return getOperand(6).get(); } + Metadata *getRawStride() const { + return getOperand(MY_FIRST_OPERAND + 3).get(); + } - Metadata *getRawBias() const { return getOperand(7).get(); } + Metadata *getRawBias() const { + return getOperand(MY_FIRST_OPERAND + 4).get(); + } BoundType getLowerBound() const { return convertRawToBound(getRawLowerBound()); @@ -1409,31 +1560,30 @@ class DICompositeType : public DIType { friend class LLVMContextImpl; friend class MDNode; + static constexpr unsigned MY_FIRST_OPERAND = DIType::N_OPERANDS; + unsigned RuntimeLang; std::optional EnumKind; DICompositeType(LLVMContext &C, StorageType Storage, unsigned Tag, - unsigned Line, unsigned RuntimeLang, uint64_t SizeInBits, - uint32_t AlignInBits, uint64_t OffsetInBits, + unsigned Line, unsigned RuntimeLang, uint32_t AlignInBits, uint32_t NumExtraInhabitants, std::optional EnumKind, DIFlags Flags, ArrayRef Ops) - : DIType(C, DICompositeTypeKind, Storage, Tag, Line, SizeInBits, - AlignInBits, OffsetInBits, NumExtraInhabitants, Flags, Ops), + : DIType(C, DICompositeTypeKind, Storage, Tag, Line, AlignInBits, + NumExtraInhabitants, Flags, Ops), RuntimeLang(RuntimeLang), EnumKind(EnumKind) {} ~DICompositeType() = default; /// Change fields in place. void mutate(unsigned Tag, unsigned Line, unsigned RuntimeLang, - uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, - uint32_t NumExtraInhabitants, std::optional EnumKind, - DIFlags Flags) { + uint32_t AlignInBits, uint32_t NumExtraInhabitants, + std::optional EnumKind, DIFlags Flags) { assert(isDistinct() && "Only distinct nodes can mutate"); assert(getRawIdentifier() && "Only ODR-uniqued nodes should mutate"); this->RuntimeLang = RuntimeLang; this->EnumKind = EnumKind; - DIType::mutate(Tag, Line, SizeInBits, AlignInBits, OffsetInBits, - NumExtraInhabitants, Flags); + DIType::mutate(Tag, Line, AlignInBits, NumExtraInhabitants, Flags); } static DICompositeType * @@ -1447,6 +1597,52 @@ class DICompositeType : public DIType { Metadata *DataLocation, Metadata *Associated, Metadata *Allocated, Metadata *Rank, DINodeArray Annotations, Metadata *BitStride, StorageType Storage, bool ShouldCreate = true) { + auto *SizeInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), SizeInBits)); + auto *OffsetInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), OffsetInBits)); + return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File, + Line, Scope, BaseType, SizeInBitsNode, AlignInBits, + OffsetInBitsNode, Flags, Elements.get(), RuntimeLang, + EnumKind, VTableHolder, TemplateParams.get(), + getCanonicalMDString(Context, Identifier), Discriminator, + DataLocation, Associated, Allocated, Rank, Annotations.get(), + Specification, NumExtraInhabitants, BitStride, Storage, + ShouldCreate); + } + static DICompositeType * + getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File, + unsigned Line, Metadata *Scope, Metadata *BaseType, + uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, + DIFlags Flags, Metadata *Elements, unsigned RuntimeLang, + std::optional EnumKind, Metadata *VTableHolder, + Metadata *TemplateParams, MDString *Identifier, + Metadata *Discriminator, Metadata *DataLocation, Metadata *Associated, + Metadata *Allocated, Metadata *Rank, Metadata *Annotations, + Metadata *Specification, uint32_t NumExtraInhabitants, + Metadata *BitStride, StorageType Storage, bool ShouldCreate = true) { + auto *SizeInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), SizeInBits)); + auto *OffsetInBitsNode = ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), OffsetInBits)); + return getImpl(Context, Tag, Name, File, Line, Scope, BaseType, + SizeInBitsNode, AlignInBits, OffsetInBitsNode, Flags, + Elements, RuntimeLang, EnumKind, VTableHolder, + TemplateParams, Identifier, Discriminator, DataLocation, + Associated, Allocated, Rank, Annotations, Specification, + NumExtraInhabitants, BitStride, Storage, ShouldCreate); + } + static DICompositeType * + getImpl(LLVMContext &Context, unsigned Tag, StringRef Name, Metadata *File, + unsigned Line, DIScope *Scope, DIType *BaseType, Metadata *SizeInBits, + uint32_t AlignInBits, Metadata *OffsetInBits, DIType *Specification, + uint32_t NumExtraInhabitants, DIFlags Flags, DINodeArray Elements, + unsigned RuntimeLang, std::optional EnumKind, + DIType *VTableHolder, DITemplateParameterArray TemplateParams, + StringRef Identifier, DIDerivedType *Discriminator, + Metadata *DataLocation, Metadata *Associated, Metadata *Allocated, + Metadata *Rank, DINodeArray Annotations, Metadata *BitStride, + StorageType Storage, bool ShouldCreate = true) { return getImpl( Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements.get(), @@ -1458,7 +1654,7 @@ class DICompositeType : public DIType { LLVM_ABI static DICompositeType * getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File, unsigned Line, Metadata *Scope, Metadata *BaseType, - uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, + Metadata *SizeInBits, uint32_t AlignInBits, Metadata *OffsetInBits, DIFlags Flags, Metadata *Elements, unsigned RuntimeLang, std::optional EnumKind, Metadata *VTableHolder, Metadata *TemplateParams, MDString *Identifier, @@ -1469,13 +1665,14 @@ class DICompositeType : public DIType { TempDICompositeType cloneImpl() const { return getTemporary( - getContext(), getTag(), getName(), getFile(), getLine(), getScope(), - getBaseType(), getSizeInBits(), getAlignInBits(), getOffsetInBits(), - getFlags(), getElements(), getRuntimeLang(), getEnumKind(), - getVTableHolder(), getTemplateParams(), getIdentifier(), - getDiscriminator(), getRawDataLocation(), getRawAssociated(), - getRawAllocated(), getRawRank(), getAnnotations(), getSpecification(), - getNumExtraInhabitants(), getRawBitStride()); + getContext(), getTag(), getRawName(), getFile(), getLine(), getScope(), + getBaseType(), getRawSizeInBits(), getAlignInBits(), + getRawOffsetInBits(), getFlags(), getRawElements(), getRuntimeLang(), + getEnumKind(), getVTableHolder(), getRawTemplateParams(), + getRawIdentifier(), getDiscriminator(), getRawDataLocation(), + getRawAssociated(), getRawAllocated(), getRawRank(), + getRawAnnotations(), getSpecification(), getNumExtraInhabitants(), + getRawBitStride()); } public: @@ -1515,6 +1712,42 @@ class DICompositeType : public DIType { TemplateParams, Identifier, Discriminator, DataLocation, Associated, Allocated, Rank, Annotations, Specification, NumExtraInhabitants, BitStride)) + DEFINE_MDNODE_GET( + DICompositeType, + (unsigned Tag, StringRef Name, DIFile *File, unsigned Line, + DIScope *Scope, DIType *BaseType, Metadata *SizeInBits, + uint32_t AlignInBits, Metadata *OffsetInBits, DIFlags Flags, + DINodeArray Elements, unsigned RuntimeLang, + std::optional EnumKind, DIType *VTableHolder, + DITemplateParameterArray TemplateParams = nullptr, + StringRef Identifier = "", DIDerivedType *Discriminator = nullptr, + Metadata *DataLocation = nullptr, Metadata *Associated = nullptr, + Metadata *Allocated = nullptr, Metadata *Rank = nullptr, + DINodeArray Annotations = nullptr, DIType *Specification = nullptr, + uint32_t NumExtraInhabitants = 0, Metadata *BitStride = nullptr), + (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits, + OffsetInBits, Specification, NumExtraInhabitants, Flags, Elements, + RuntimeLang, EnumKind, VTableHolder, TemplateParams, Identifier, + Discriminator, DataLocation, Associated, Allocated, Rank, Annotations, + BitStride)) + DEFINE_MDNODE_GET( + DICompositeType, + (unsigned Tag, MDString *Name, Metadata *File, unsigned Line, + Metadata *Scope, Metadata *BaseType, Metadata *SizeInBits, + uint32_t AlignInBits, Metadata *OffsetInBits, DIFlags Flags, + Metadata *Elements, unsigned RuntimeLang, + std::optional EnumKind, Metadata *VTableHolder, + Metadata *TemplateParams = nullptr, MDString *Identifier = nullptr, + Metadata *Discriminator = nullptr, Metadata *DataLocation = nullptr, + Metadata *Associated = nullptr, Metadata *Allocated = nullptr, + Metadata *Rank = nullptr, Metadata *Annotations = nullptr, + Metadata *Specification = nullptr, uint32_t NumExtraInhabitants = 0, + Metadata *BitStride = nullptr), + (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits, + OffsetInBits, Flags, Elements, RuntimeLang, EnumKind, VTableHolder, + TemplateParams, Identifier, Discriminator, DataLocation, Associated, + Allocated, Rank, Annotations, Specification, NumExtraInhabitants, + BitStride)) TempDICompositeType clone() const { return cloneImpl(); } @@ -1528,8 +1761,8 @@ class DICompositeType : public DIType { LLVM_ABI static DICompositeType * getODRType(LLVMContext &Context, MDString &Identifier, unsigned Tag, MDString *Name, Metadata *File, unsigned Line, Metadata *Scope, - Metadata *BaseType, uint64_t SizeInBits, uint32_t AlignInBits, - uint64_t OffsetInBits, Metadata *Specification, + Metadata *BaseType, Metadata *SizeInBits, uint32_t AlignInBits, + Metadata *OffsetInBits, Metadata *Specification, uint32_t NumExtraInhabitants, DIFlags Flags, Metadata *Elements, unsigned RuntimeLang, std::optional EnumKind, Metadata *VTableHolder, Metadata *TemplateParams, @@ -1551,8 +1784,8 @@ class DICompositeType : public DIType { LLVM_ABI static DICompositeType * buildODRType(LLVMContext &Context, MDString &Identifier, unsigned Tag, MDString *Name, Metadata *File, unsigned Line, Metadata *Scope, - Metadata *BaseType, uint64_t SizeInBits, uint32_t AlignInBits, - uint64_t OffsetInBits, Metadata *Specification, + Metadata *BaseType, Metadata *SizeInBits, uint32_t AlignInBits, + Metadata *OffsetInBits, Metadata *Specification, uint32_t NumExtraInhabitants, DIFlags Flags, Metadata *Elements, unsigned RuntimeLang, std::optional EnumKind, Metadata *VTableHolder, Metadata *TemplateParams, @@ -1570,41 +1803,55 @@ class DICompositeType : public DIType { DITemplateParameterArray getTemplateParams() const { return cast_or_null(getRawTemplateParams()); } - StringRef getIdentifier() const { return getStringOperand(7); } + StringRef getIdentifier() const { + return getStringOperand(MY_FIRST_OPERAND + 4); + } unsigned getRuntimeLang() const { return RuntimeLang; } std::optional getEnumKind() const { return EnumKind; } - Metadata *getRawBaseType() const { return getOperand(3); } - Metadata *getRawElements() const { return getOperand(4); } - Metadata *getRawVTableHolder() const { return getOperand(5); } - Metadata *getRawTemplateParams() const { return getOperand(6); } - MDString *getRawIdentifier() const { return getOperandAs(7); } - Metadata *getRawDiscriminator() const { return getOperand(8); } + Metadata *getRawBaseType() const { return getOperand(MY_FIRST_OPERAND); } + Metadata *getRawElements() const { return getOperand(MY_FIRST_OPERAND + 1); } + Metadata *getRawVTableHolder() const { + return getOperand(MY_FIRST_OPERAND + 2); + } + Metadata *getRawTemplateParams() const { + return getOperand(MY_FIRST_OPERAND + 3); + } + MDString *getRawIdentifier() const { + return getOperandAs(MY_FIRST_OPERAND + 4); + } + Metadata *getRawDiscriminator() const { + return getOperand(MY_FIRST_OPERAND + 5); + } DIDerivedType *getDiscriminator() const { - return getOperandAs(8); + return getOperandAs(MY_FIRST_OPERAND + 5); + } + Metadata *getRawDataLocation() const { + return getOperand(MY_FIRST_OPERAND + 6); } - Metadata *getRawDataLocation() const { return getOperand(9); } DIVariable *getDataLocation() const { return dyn_cast_or_null(getRawDataLocation()); } DIExpression *getDataLocationExp() const { return dyn_cast_or_null(getRawDataLocation()); } - Metadata *getRawAssociated() const { return getOperand(10); } + Metadata *getRawAssociated() const { + return getOperand(MY_FIRST_OPERAND + 7); + } DIVariable *getAssociated() const { return dyn_cast_or_null(getRawAssociated()); } DIExpression *getAssociatedExp() const { return dyn_cast_or_null(getRawAssociated()); } - Metadata *getRawAllocated() const { return getOperand(11); } + Metadata *getRawAllocated() const { return getOperand(MY_FIRST_OPERAND + 8); } DIVariable *getAllocated() const { return dyn_cast_or_null(getRawAllocated()); } DIExpression *getAllocatedExp() const { return dyn_cast_or_null(getRawAllocated()); } - Metadata *getRawRank() const { return getOperand(12); } + Metadata *getRawRank() const { return getOperand(MY_FIRST_OPERAND + 9); } ConstantInt *getRankConst() const { if (auto *MD = dyn_cast_or_null(getRawRank())) return dyn_cast_or_null(MD->getValue()); @@ -1614,17 +1861,23 @@ class DICompositeType : public DIType { return dyn_cast_or_null(getRawRank()); } - Metadata *getRawAnnotations() const { return getOperand(13); } + Metadata *getRawAnnotations() const { + return getOperand(MY_FIRST_OPERAND + 10); + } DINodeArray getAnnotations() const { return cast_or_null(getRawAnnotations()); } - Metadata *getRawSpecification() const { return getOperand(14); } + Metadata *getRawSpecification() const { + return getOperand(MY_FIRST_OPERAND + 11); + } DIType *getSpecification() const { return cast_or_null(getRawSpecification()); } - Metadata *getRawBitStride() const { return getOperand(15); } + Metadata *getRawBitStride() const { + return getOperand(MY_FIRST_OPERAND + 12); + } ConstantInt *getBitStrideConst() const { if (auto *MD = dyn_cast_or_null(getRawBitStride())) return dyn_cast_or_null(MD->getValue()); @@ -1643,15 +1896,15 @@ class DICompositeType : public DIType { assert(is_contained(Elements->operands(), Op) && "Lost a member during member list replacement"); #endif - replaceOperandWith(4, Elements.get()); + replaceOperandWith(MY_FIRST_OPERAND + 1, Elements.get()); } void replaceVTableHolder(DIType *VTableHolder) { - replaceOperandWith(5, VTableHolder); + replaceOperandWith(MY_FIRST_OPERAND + 2, VTableHolder); } void replaceTemplateParams(DITemplateParameterArray TemplateParams) { - replaceOperandWith(6, TemplateParams.get()); + replaceOperandWith(MY_FIRST_OPERAND + 3, TemplateParams.get()); } /// @} @@ -1667,6 +1920,8 @@ class DISubroutineType : public DIType { friend class LLVMContextImpl; friend class MDNode; + static constexpr unsigned MY_FIRST_OPERAND = DIType::N_OPERANDS; + /// The calling convention used with DW_AT_calling_convention. Actually of /// type dwarf::CallingConvention. uint8_t CC; @@ -1712,7 +1967,7 @@ class DISubroutineType : public DIType { return cast_or_null(getRawTypeArray()); } - Metadata *getRawTypeArray() const { return getOperand(3); } + Metadata *getRawTypeArray() const { return getOperand(MY_FIRST_OPERAND); } static bool classof(const Metadata *MD) { return MD->getMetadataID() == DISubroutineTypeKind; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 7add4a27ce9e9..bd6f94ac1286c 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1842,11 +1842,11 @@ def int_ubsantrap : Intrinsic<[], [llvm_i8_ty], // Return true if ubsan check is allowed. def int_allow_ubsan_check : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i8_ty], - [IntrInaccessibleMemOnly, IntrWriteMem, ImmArg>, NoUndef]>; + [IntrInaccessibleMemOnly, ImmArg>, NoUndef]>; // Return true if runtime check is allowed. def int_allow_runtime_check : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_metadata_ty], - [IntrInaccessibleMemOnly, IntrWriteMem, NoUndef]>, + [IntrInaccessibleMemOnly, NoUndef]>, ClangBuiltin<"__builtin_allow_runtime_check">; // Support for dynamic deoptimization (or de-specialization) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index e6f0bf6276086..6f974c97361de 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -588,6 +588,14 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; +def int_amdgcn_cvt_pk_f16_fp8 : DefaultAttrsIntrinsic< + [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_fp8">; + +def int_amdgcn_cvt_pk_f16_bf8 : DefaultAttrsIntrinsic< + [llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_cvt_pk_f16_bf8">; + class AMDGPUCvtScaleF32Intrinsic : DefaultAttrsIntrinsic< [DstTy], [Src0Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_"#name>; diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 84c26599b5b70..7dd9ff7f08b8b 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -655,6 +655,14 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". DefaultAttrsIntrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>; // BCD intrinsics. + def int_ppc_national2packed: ClangBuiltin<"__builtin_ppc_national2packed">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty],[llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; + def int_ppc_packed2national: ClangBuiltin<"__builtin_ppc_packed2national">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty],[llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_packed2zoned: ClangBuiltin<"__builtin_ppc_packed2zoned">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty],[llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; + def int_ppc_zoned2packed: ClangBuiltin<"__builtin_ppc_zoned2packed">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty],[llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_ppc_cdtbcdd : ClangBuiltin<"__builtin_ppc_cdtbcd">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>; def int_ppc_cbcdtdd: ClangBuiltin<"__builtin_ppc_cbcdtd">, diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 2a095be58a49e..5bd5fd1ce8d3f 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -149,6 +149,8 @@ struct RuntimeLibcallsInfo { return true; } + static bool darwinHasExp10(const Triple &TT); + /// Return true if the target has sincosf/sincos/sincosl functions static bool hasSinCos(const Triple &TT) { return TT.isGNUEnvironment() || TT.isOSFuchsia() || diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h index eb71f4581bd61..d41a7412a9830 100644 --- a/llvm/include/llvm/MC/MCSchedule.h +++ b/llvm/include/llvm/MC/MCSchedule.h @@ -15,6 +15,7 @@ #define LLVM_MC_MCSCHEDULE_H #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringTable.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" @@ -124,7 +125,7 @@ struct MCSchedClassDesc { static const unsigned short VariantNumMicroOps = InvalidNumMicroOps - 1; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - const char* Name; + uint32_t NameOff; #endif uint16_t NumMicroOps : 13; uint16_t BeginGroup : 1; @@ -324,6 +325,7 @@ struct MCSchedModel { const MCSchedClassDesc *SchedClassTable; unsigned NumProcResourceKinds; unsigned NumSchedClasses; + const StringTable *SchedClassNames; // Instruction itinerary tables used by InstrItineraryData. friend class InstrItineraryData; const InstrItinerary *InstrItineraries; @@ -368,6 +370,14 @@ struct MCSchedModel { return &SchedClassTable[SchedClassIdx]; } + StringRef getSchedClassName(unsigned SchedClassIdx) const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + return (*SchedClassNames)[SchedClassTable[SchedClassIdx].NameOff]; +#else + return ""; +#endif + } + /// Returns the latency value for the scheduling class. LLVM_ABI static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc); diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h index 2ea7829d668a4..9b8d1f3c31121 100644 --- a/llvm/include/llvm/TargetParser/TargetParser.h +++ b/llvm/include/llvm/TargetParser/TargetParser.h @@ -214,7 +214,7 @@ struct BasicSubtargetSubTypeKV { } }; -std::optional> +LLVM_ABI std::optional> getCPUDefaultTargetFeatures(StringRef CPU, ArrayRef ProcDesc, ArrayRef ProcFeatures); diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index d6fa4537ee3b4..1865be6e95dea 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -581,6 +581,9 @@ class Triple { /// Is this an Apple XROS triple. bool isXROS() const { return getOS() == Triple::XROS; } + /// Is this an Apple BridgeOS triple. + bool isBridgeOS() const { return getOS() == Triple::BridgeOS; } + /// Is this an Apple DriverKit triple. bool isDriverKit() const { return getOS() == Triple::DriverKit; } @@ -591,9 +594,11 @@ class Triple { return (getVendor() == Triple::Apple) && isOSBinFormatMachO(); } - /// Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit). + /// Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or + /// bridgeOS). bool isOSDarwin() const { - return isMacOSX() || isiOS() || isWatchOS() || isDriverKit() || isXROS(); + return isMacOSX() || isiOS() || isWatchOS() || isDriverKit() || isXROS() || + isBridgeOS(); } bool isSimulatorEnvironment() const { diff --git a/llvm/include/llvm/Testing/Demangle/DemangleTestCases.inc b/llvm/include/llvm/Testing/Demangle/DemangleTestCases.inc index 1e3f7459deaa2..2721d2aa5504e 100644 --- a/llvm/include/llvm/Testing/Demangle/DemangleTestCases.inc +++ b/llvm/include/llvm/Testing/Demangle/DemangleTestCases.inc @@ -6,6 +6,7 @@ {"_Z1fDU10_", "f(unsigned _BitInt(10))"}, {"_Z1fIfEvDUstPT__", "void f(unsigned _BitInt(sizeof (float*)))"}, {"_Z1fIiEvDBstPT__", "void f(_BitInt(sizeof (int*)))"}, +{"_Z6myfuncRDB8_S0_", "myfunc(_BitInt(8)&, _BitInt(8)&)"}, {"_Z4testI1A1BE1Cv", "C test()"}, {"_Z4testI1A1BET0_T_S3_", "B test(A, A)"}, {"_ZN1SgtEi", "S::operator>(int)"}, diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 31611dfe4fd2f..86a2edbd8bd41 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -839,6 +839,10 @@ MemoryEffects BasicAAResult::getMemoryEffects(const CallBase *Call, FuncME |= MemoryEffects::readOnly(); if (Call->hasClobberingOperandBundles()) FuncME |= MemoryEffects::writeOnly(); + if (Call->isVolatile()) { + // Volatile operations also access inaccessible memory. + FuncME |= MemoryEffects::inaccessibleMemOnly(); + } Min &= FuncME; } diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index b58f9b26a8651..9e3c271f7d93f 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -2553,6 +2553,9 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, case Intrinsic::cosh: return ConstantFoldFP(cosh, APF, Ty); case Intrinsic::atan: + // Implement optional behavior from C's Annex F for +/-0.0. + if (U.isZero()) + return ConstantFP::get(Ty->getContext(), U); return ConstantFoldFP(atan, APF, Ty); case Intrinsic::sqrt: return ConstantFoldFP(sqrt, APF, Ty); @@ -2606,6 +2609,9 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, break; case LibFunc_atan: case LibFunc_atanf: + // Implement optional behavior from C's Annex F for +/-0.0. + if (U.isZero()) + return ConstantFP::get(Ty->getContext(), U); if (TLI->has(Func)) return ConstantFoldFP(atan, APF, Ty); break; diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 926dc6211eb8d..792a194aeb499 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -4818,6 +4818,34 @@ struct MDSignedOrMDField : MDEitherFieldImpl { } }; +struct MDUnsignedOrMDField : MDEitherFieldImpl { + MDUnsignedOrMDField(uint64_t Default = 0, bool AllowNull = true) + : ImplTy(MDUnsignedField(Default), MDField(AllowNull)) {} + + MDUnsignedOrMDField(uint64_t Default, uint64_t Max, bool AllowNull = true) + : ImplTy(MDUnsignedField(Default, Max), MDField(AllowNull)) {} + + bool isMDUnsignedField() const { return WhatIs == IsTypeA; } + bool isMDField() const { return WhatIs == IsTypeB; } + uint64_t getMDUnsignedValue() const { + assert(isMDUnsignedField() && "Wrong field type"); + return A.Val; + } + Metadata *getMDFieldValue() const { + assert(isMDField() && "Wrong field type"); + return B.Val; + } + + Metadata *getValueAsMetadata(LLVMContext &Context) const { + if (isMDUnsignedField()) + return ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), getMDUnsignedValue())); + if (isMDField()) + return getMDFieldValue(); + return nullptr; + } +}; + } // end anonymous namespace namespace llvm { @@ -5201,6 +5229,29 @@ bool LLParser::parseMDField(LocTy Loc, StringRef Name, return true; } +template <> +bool LLParser::parseMDField(LocTy Loc, StringRef Name, + MDUnsignedOrMDField &Result) { + // Try to parse an unsigned int. + if (Lex.getKind() == lltok::APSInt) { + MDUnsignedField Res = Result.A; + if (!parseMDField(Loc, Name, Res)) { + Result.assign(Res); + return false; + } + return true; + } + + // Otherwise, try to parse as an MDField. + MDField Res = Result.B; + if (!parseMDField(Loc, Name, Res)) { + Result.assign(Res); + return false; + } + + return true; +} + template <> bool LLParser::parseMDField(LocTy Loc, StringRef Name, MDStringField &Result) { LocTy ValueLoc = Lex.getLoc(); @@ -5382,7 +5433,7 @@ bool LLParser::parseDISubrangeType(MDNode *&Result, bool IsDistinct) { OPTIONAL(line, LineField, ); \ OPTIONAL(scope, MDField, ); \ OPTIONAL(baseType, MDField, ); \ - OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX)); \ + OPTIONAL(size, MDUnsignedOrMDField, (0, UINT64_MAX)); \ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \ OPTIONAL(flags, DIFlagField, ); \ OPTIONAL(lowerBound, MDSignedOrMDField, ); \ @@ -5406,10 +5457,10 @@ bool LLParser::parseDISubrangeType(MDNode *&Result, bool IsDistinct) { Metadata *Stride = convToMetadata(stride); Metadata *Bias = convToMetadata(bias); - Result = GET_OR_DISTINCT(DISubrangeType, - (Context, name.Val, file.Val, line.Val, scope.Val, - size.Val, align.Val, flags.Val, baseType.Val, - LowerBound, UpperBound, Stride, Bias)); + Result = GET_OR_DISTINCT( + DISubrangeType, (Context, name.Val, file.Val, line.Val, scope.Val, + size.getValueAsMetadata(Context), align.Val, flags.Val, + baseType.Val, LowerBound, UpperBound, Stride, Bias)); return false; } @@ -5517,7 +5568,7 @@ bool LLParser::parseDIBasicType(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_base_type)); \ OPTIONAL(name, MDStringField, ); \ - OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX)); \ + OPTIONAL(size, MDUnsignedOrMDField, (0, UINT64_MAX)); \ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \ OPTIONAL(encoding, DwarfAttEncodingField, ); \ OPTIONAL(num_extra_inhabitants, MDUnsignedField, (0, UINT32_MAX)); \ @@ -5525,7 +5576,8 @@ bool LLParser::parseDIBasicType(MDNode *&Result, bool IsDistinct) { PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS - Result = GET_OR_DISTINCT(DIBasicType, (Context, tag.Val, name.Val, size.Val, + Result = GET_OR_DISTINCT(DIBasicType, (Context, tag.Val, name.Val, + size.getValueAsMetadata(Context), align.Val, encoding.Val, num_extra_inhabitants.Val, flags.Val)); return false; @@ -5540,7 +5592,7 @@ bool LLParser::parseDIFixedPointType(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_base_type)); \ OPTIONAL(name, MDStringField, ); \ - OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX)); \ + OPTIONAL(size, MDUnsignedOrMDField, (0, UINT64_MAX)); \ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \ OPTIONAL(encoding, DwarfAttEncodingField, ); \ OPTIONAL(flags, DIFlagField, ); \ @@ -5552,7 +5604,8 @@ bool LLParser::parseDIFixedPointType(MDNode *&Result, bool IsDistinct) { #undef VISIT_MD_FIELDS Result = GET_OR_DISTINCT(DIFixedPointType, - (Context, tag.Val, name.Val, size.Val, align.Val, + (Context, tag.Val, name.Val, + size.getValueAsMetadata(Context), align.Val, encoding.Val, flags.Val, kind.Val, factor.Val, numerator.Val, denominator.Val)); return false; @@ -5567,7 +5620,7 @@ bool LLParser::parseDIStringType(MDNode *&Result, bool IsDistinct) { OPTIONAL(stringLength, MDField, ); \ OPTIONAL(stringLengthExpression, MDField, ); \ OPTIONAL(stringLocationExpression, MDField, ); \ - OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX)); \ + OPTIONAL(size, MDUnsignedOrMDField, (0, UINT64_MAX)); \ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \ OPTIONAL(encoding, DwarfAttEncodingField, ); PARSE_MD_FIELDS(); @@ -5576,7 +5629,8 @@ bool LLParser::parseDIStringType(MDNode *&Result, bool IsDistinct) { Result = GET_OR_DISTINCT( DIStringType, (Context, tag.Val, name.Val, stringLength.Val, stringLengthExpression.Val, - stringLocationExpression.Val, size.Val, align.Val, encoding.Val)); + stringLocationExpression.Val, size.getValueAsMetadata(Context), + align.Val, encoding.Val)); return false; } @@ -5597,9 +5651,9 @@ bool LLParser::parseDIDerivedType(MDNode *&Result, bool IsDistinct) { OPTIONAL(line, LineField, ); \ OPTIONAL(scope, MDField, ); \ REQUIRED(baseType, MDField, ); \ - OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX)); \ + OPTIONAL(size, MDUnsignedOrMDField, (0, UINT64_MAX)); \ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \ - OPTIONAL(offset, MDUnsignedField, (0, UINT64_MAX)); \ + OPTIONAL(offset, MDUnsignedOrMDField, (0, UINT64_MAX)); \ OPTIONAL(flags, DIFlagField, ); \ OPTIONAL(extraData, MDField, ); \ OPTIONAL(dwarfAddressSpace, MDUnsignedField, (UINT32_MAX, UINT32_MAX)); \ @@ -5622,11 +5676,11 @@ bool LLParser::parseDIDerivedType(MDNode *&Result, bool IsDistinct) { (unsigned)ptrAuthExtraDiscriminator.Val, ptrAuthIsaPointer.Val, ptrAuthAuthenticatesNullValues.Val); - Result = GET_OR_DISTINCT(DIDerivedType, - (Context, tag.Val, name.Val, file.Val, line.Val, - scope.Val, baseType.Val, size.Val, align.Val, - offset.Val, DWARFAddressSpace, PtrAuthData, - flags.Val, extraData.Val, annotations.Val)); + Result = GET_OR_DISTINCT( + DIDerivedType, (Context, tag.Val, name.Val, file.Val, line.Val, scope.Val, + baseType.Val, size.getValueAsMetadata(Context), align.Val, + offset.getValueAsMetadata(Context), DWARFAddressSpace, + PtrAuthData, flags.Val, extraData.Val, annotations.Val)); return false; } @@ -5638,9 +5692,9 @@ bool LLParser::parseDICompositeType(MDNode *&Result, bool IsDistinct) { OPTIONAL(line, LineField, ); \ OPTIONAL(scope, MDField, ); \ OPTIONAL(baseType, MDField, ); \ - OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX)); \ + OPTIONAL(size, MDUnsignedOrMDField, (0, UINT64_MAX)); \ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \ - OPTIONAL(offset, MDUnsignedField, (0, UINT64_MAX)); \ + OPTIONAL(offset, MDUnsignedOrMDField, (0, UINT64_MAX)); \ OPTIONAL(flags, DIFlagField, ); \ OPTIONAL(elements, MDField, ); \ OPTIONAL(runtimeLang, DwarfLangField, ); \ @@ -5675,12 +5729,12 @@ bool LLParser::parseDICompositeType(MDNode *&Result, bool IsDistinct) { if (identifier.Val) if (auto *CT = DICompositeType::buildODRType( Context, *identifier.Val, tag.Val, name.Val, file.Val, line.Val, - scope.Val, baseType.Val, size.Val, align.Val, offset.Val, - specification.Val, num_extra_inhabitants.Val, flags.Val, - elements.Val, runtimeLang.Val, EnumKind, vtableHolder.Val, - templateParams.Val, discriminator.Val, dataLocation.Val, - associated.Val, allocated.Val, Rank, annotations.Val, - bitStride.Val)) { + scope.Val, baseType.Val, size.getValueAsMetadata(Context), + align.Val, offset.getValueAsMetadata(Context), specification.Val, + num_extra_inhabitants.Val, flags.Val, elements.Val, runtimeLang.Val, + EnumKind, vtableHolder.Val, templateParams.Val, discriminator.Val, + dataLocation.Val, associated.Val, allocated.Val, Rank, + annotations.Val, bitStride.Val)) { Result = CT; return false; } @@ -5690,7 +5744,8 @@ bool LLParser::parseDICompositeType(MDNode *&Result, bool IsDistinct) { Result = GET_OR_DISTINCT( DICompositeType, (Context, tag.Val, name.Val, file.Val, line.Val, scope.Val, baseType.Val, - size.Val, align.Val, offset.Val, flags.Val, elements.Val, + size.getValueAsMetadata(Context), align.Val, + offset.getValueAsMetadata(Context), flags.Val, elements.Val, runtimeLang.Val, EnumKind, vtableHolder.Val, templateParams.Val, identifier.Val, discriminator.Val, dataLocation.Val, associated.Val, allocated.Val, Rank, annotations.Val, specification.Val, diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 1cd1797c1092d..a9467d16c9a14 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -1287,6 +1287,14 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( return MetadataList.upgradeTypeRef(getMDOrNull(ID)); }; + auto getMetadataOrConstant = [&](bool IsMetadata, + uint64_t Entry) -> Metadata * { + if (IsMetadata) + return getMDOrNull(Entry); + return ConstantAsMetadata::get( + ConstantInt::get(Type::getInt64Ty(Context), Entry)); + }; + #define GET_OR_DISTINCT(CLASS, ARGS) \ (IsDistinct ? CLASS::getDistinct ARGS : CLASS::get ARGS) @@ -1525,15 +1533,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (Record.size() < 6 || Record.size() > 8) return error("Invalid record"); - IsDistinct = Record[0]; + IsDistinct = Record[0] & 1; + bool SizeIsMetadata = Record[0] & 2; DINode::DIFlags Flags = (Record.size() > 6) ? static_cast(Record[6]) : DINode::FlagZero; uint32_t NumExtraInhabitants = (Record.size() > 7) ? Record[7] : 0; + Metadata *SizeInBits = getMetadataOrConstant(SizeIsMetadata, Record[3]); + MetadataList.assignValue( GET_OR_DISTINCT(DIBasicType, - (Context, Record[1], getMDString(Record[2]), Record[3], + (Context, Record[1], getMDString(Record[2]), SizeInBits, Record[4], Record[5], NumExtraInhabitants, Flags)), NextMetadataNo); NextMetadataNo++; @@ -1543,9 +1554,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (Record.size() < 11) return error("Invalid record"); - IsDistinct = Record[0]; + IsDistinct = Record[0] & 1; + bool SizeIsMetadata = Record[0] & 2; DINode::DIFlags Flags = static_cast(Record[6]); + Metadata *SizeInBits = getMetadataOrConstant(SizeIsMetadata, Record[3]); + size_t Offset = 9; auto ReadWideInt = [&]() { @@ -1565,7 +1579,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( GET_OR_DISTINCT(DIFixedPointType, - (Context, Record[1], getMDString(Record[2]), Record[3], + (Context, Record[1], getMDString(Record[2]), SizeInBits, Record[4], Record[5], Flags, Record[7], Record[8], Numerator, Denominator)), NextMetadataNo); @@ -1576,17 +1590,21 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (Record.size() > 9 || Record.size() < 8) return error("Invalid record"); - IsDistinct = Record[0]; + IsDistinct = Record[0] & 1; + bool SizeIsMetadata = Record[0] & 2; bool SizeIs8 = Record.size() == 8; // StringLocationExp (i.e. Record[5]) is added at a later time // than the other fields. The code here enables backward compatibility. Metadata *StringLocationExp = SizeIs8 ? nullptr : getMDOrNull(Record[5]); unsigned Offset = SizeIs8 ? 5 : 6; + Metadata *SizeInBits = + getMetadataOrConstant(SizeIsMetadata, Record[Offset]); + MetadataList.assignValue( GET_OR_DISTINCT(DIStringType, (Context, Record[1], getMDString(Record[2]), getMDOrNull(Record[3]), getMDOrNull(Record[4]), - StringLocationExp, Record[Offset], Record[Offset + 1], + StringLocationExp, SizeInBits, Record[Offset + 1], Record[Offset + 2])), NextMetadataNo); NextMetadataNo++; @@ -1615,15 +1633,20 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( PtrAuthData.emplace(Record[14]); } - IsDistinct = Record[0]; + IsDistinct = Record[0] & 1; + bool SizeIsMetadata = Record[0] & 2; DINode::DIFlags Flags = static_cast(Record[10]); + + Metadata *SizeInBits = getMetadataOrConstant(SizeIsMetadata, Record[7]); + Metadata *OffsetInBits = getMetadataOrConstant(SizeIsMetadata, Record[9]); + MetadataList.assignValue( GET_OR_DISTINCT(DIDerivedType, (Context, Record[1], getMDString(Record[2]), getMDOrNull(Record[3]), Record[4], getDITypeRefOrNull(Record[5]), - getDITypeRefOrNull(Record[6]), Record[7], Record[8], - Record[9], DWARFAddressSpace, PtrAuthData, Flags, + getDITypeRefOrNull(Record[6]), SizeInBits, Record[8], + OffsetInBits, DWARFAddressSpace, PtrAuthData, Flags, getDITypeRefOrNull(Record[11]), Annotations)), NextMetadataNo); NextMetadataNo++; @@ -1633,13 +1656,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (Record.size() != 13) return error("Invalid record"); - IsDistinct = Record[0]; + IsDistinct = Record[0] & 1; + bool SizeIsMetadata = Record[0] & 2; DINode::DIFlags Flags = static_cast(Record[7]); + + Metadata *SizeInBits = getMetadataOrConstant(SizeIsMetadata, Record[5]); + MetadataList.assignValue( GET_OR_DISTINCT(DISubrangeType, (Context, getMDString(Record[1]), getMDOrNull(Record[2]), Record[3], - getMDOrNull(Record[4]), Record[5], Record[6], Flags, + getMDOrNull(Record[4]), SizeInBits, Record[6], Flags, getDITypeRefOrNull(Record[8]), getMDOrNull(Record[9]), getMDOrNull(Record[10]), getMDOrNull(Record[11]), getMDOrNull(Record[12]))), @@ -1654,18 +1681,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( // If we have a UUID and this is not a forward declaration, lookup the // mapping. IsDistinct = Record[0] & 0x1; - bool IsNotUsedInTypeRef = Record[0] >= 2; + bool IsNotUsedInTypeRef = Record[0] & 2; + bool SizeIsMetadata = Record[0] & 4; unsigned Tag = Record[1]; MDString *Name = getMDString(Record[2]); Metadata *File = getMDOrNull(Record[3]); unsigned Line = Record[4]; Metadata *Scope = getDITypeRefOrNull(Record[5]); Metadata *BaseType = nullptr; - uint64_t SizeInBits = Record[7]; if (Record[8] > (uint64_t)std::numeric_limits::max()) return error("Alignment value is too large"); uint32_t AlignInBits = Record[8]; - uint64_t OffsetInBits = 0; + Metadata *OffsetInBits = nullptr; uint32_t NumExtraInhabitants = (Record.size() > 22) ? Record[22] : 0; DINode::DIFlags Flags = static_cast(Record[10]); Metadata *Elements = nullptr; @@ -1712,7 +1739,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( TemplateParams = getMDOrNull(Record[14]); } else { BaseType = getDITypeRefOrNull(Record[6]); - OffsetInBits = Record[9]; + + OffsetInBits = getMetadataOrConstant(SizeIsMetadata, Record[9]); + Elements = getMDOrNull(Record[11]); VTableHolder = getDITypeRefOrNull(Record[13]); TemplateParams = getMDOrNull(Record[14]); @@ -1740,6 +1769,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (Record.size() > 24 && Record[24] != dwarf::DW_APPLE_ENUM_KIND_invalid) EnumKind = Record[24]; + Metadata *SizeInBits = getMetadataOrConstant(SizeIsMetadata, Record[7]); + DICompositeType *CT = nullptr; if (Identifier) CT = DICompositeType::buildODRType( diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 628b939af19ce..2a2dd085a9461 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1899,10 +1899,11 @@ void ModuleBitcodeWriter::writeDIEnumerator(const DIEnumerator *N, void ModuleBitcodeWriter::writeDIBasicType(const DIBasicType *N, SmallVectorImpl &Record, unsigned Abbrev) { - Record.push_back(N->isDistinct()); + const unsigned SizeIsMetadata = 0x2; + Record.push_back(SizeIsMetadata | (unsigned)N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); - Record.push_back(N->getSizeInBits()); + Record.push_back(VE.getMetadataOrNullID(N->getRawSizeInBits())); Record.push_back(N->getAlignInBits()); Record.push_back(N->getEncoding()); Record.push_back(N->getFlags()); @@ -1915,10 +1916,11 @@ void ModuleBitcodeWriter::writeDIBasicType(const DIBasicType *N, void ModuleBitcodeWriter::writeDIFixedPointType( const DIFixedPointType *N, SmallVectorImpl &Record, unsigned Abbrev) { - Record.push_back(N->isDistinct()); + const unsigned SizeIsMetadata = 0x2; + Record.push_back(SizeIsMetadata | (unsigned)N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); - Record.push_back(N->getSizeInBits()); + Record.push_back(VE.getMetadataOrNullID(N->getRawSizeInBits())); Record.push_back(N->getAlignInBits()); Record.push_back(N->getEncoding()); Record.push_back(N->getFlags()); @@ -1944,13 +1946,14 @@ void ModuleBitcodeWriter::writeDIFixedPointType( void ModuleBitcodeWriter::writeDIStringType(const DIStringType *N, SmallVectorImpl &Record, unsigned Abbrev) { - Record.push_back(N->isDistinct()); + const unsigned SizeIsMetadata = 0x2; + Record.push_back(SizeIsMetadata | (unsigned)N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getStringLength())); Record.push_back(VE.getMetadataOrNullID(N->getStringLengthExp())); Record.push_back(VE.getMetadataOrNullID(N->getStringLocationExp())); - Record.push_back(N->getSizeInBits()); + Record.push_back(VE.getMetadataOrNullID(N->getRawSizeInBits())); Record.push_back(N->getAlignInBits()); Record.push_back(N->getEncoding()); @@ -1961,16 +1964,17 @@ void ModuleBitcodeWriter::writeDIStringType(const DIStringType *N, void ModuleBitcodeWriter::writeDIDerivedType(const DIDerivedType *N, SmallVectorImpl &Record, unsigned Abbrev) { - Record.push_back(N->isDistinct()); + const unsigned SizeIsMetadata = 0x2; + Record.push_back(SizeIsMetadata | (unsigned)N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getBaseType())); - Record.push_back(N->getSizeInBits()); + Record.push_back(VE.getMetadataOrNullID(N->getRawSizeInBits())); Record.push_back(N->getAlignInBits()); - Record.push_back(N->getOffsetInBits()); + Record.push_back(VE.getMetadataOrNullID(N->getRawOffsetInBits())); Record.push_back(N->getFlags()); Record.push_back(VE.getMetadataOrNullID(N->getExtraData())); @@ -1995,12 +1999,13 @@ void ModuleBitcodeWriter::writeDIDerivedType(const DIDerivedType *N, void ModuleBitcodeWriter::writeDISubrangeType(const DISubrangeType *N, SmallVectorImpl &Record, unsigned Abbrev) { - Record.push_back(N->isDistinct()); + const unsigned SizeIsMetadata = 0x2; + Record.push_back(SizeIsMetadata | (unsigned)N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); - Record.push_back(N->getSizeInBits()); + Record.push_back(VE.getMetadataOrNullID(N->getRawSizeInBits())); Record.push_back(N->getAlignInBits()); Record.push_back(N->getFlags()); Record.push_back(VE.getMetadataOrNullID(N->getBaseType())); @@ -2017,16 +2022,18 @@ void ModuleBitcodeWriter::writeDICompositeType( const DICompositeType *N, SmallVectorImpl &Record, unsigned Abbrev) { const unsigned IsNotUsedInOldTypeRef = 0x2; - Record.push_back(IsNotUsedInOldTypeRef | (unsigned)N->isDistinct()); + const unsigned SizeIsMetadata = 0x4; + Record.push_back(SizeIsMetadata | IsNotUsedInOldTypeRef | + (unsigned)N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getBaseType())); - Record.push_back(N->getSizeInBits()); + Record.push_back(VE.getMetadataOrNullID(N->getRawSizeInBits())); Record.push_back(N->getAlignInBits()); - Record.push_back(N->getOffsetInBits()); + Record.push_back(VE.getMetadataOrNullID(N->getRawOffsetInBits())); Record.push_back(N->getFlags()); Record.push_back(VE.getMetadataOrNullID(N->getElements().get())); Record.push_back(N->getRuntimeLang()); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3b96225236cd6..754dba73673c2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2142,16 +2142,20 @@ void AsmPrinter::emitFunctionBody() { } /// Compute the number of Global Variables that uses a Constant. -static unsigned getNumGlobalVariableUses(const Constant *C) { - if (!C) +static unsigned getNumGlobalVariableUses(const Constant *C, + bool &HasNonGlobalUsers) { + if (!C) { + HasNonGlobalUsers = true; return 0; + } if (isa(C)) return 1; unsigned NumUses = 0; for (const auto *CU : C->users()) - NumUses += getNumGlobalVariableUses(dyn_cast(CU)); + NumUses += + getNumGlobalVariableUses(dyn_cast(CU), HasNonGlobalUsers); return NumUses; } @@ -2162,7 +2166,8 @@ static unsigned getNumGlobalVariableUses(const Constant *C) { /// candidates are skipped and are emitted later in case at least one cstexpr /// isn't replaced by a PC relative GOT entry access. static bool isGOTEquivalentCandidate(const GlobalVariable *GV, - unsigned &NumGOTEquivUsers) { + unsigned &NumGOTEquivUsers, + bool &HasNonGlobalUsers) { // Global GOT equivalents are unnamed private globals with a constant // pointer initializer to another global symbol. They must point to a // GlobalVariable or Function, i.e., as GlobalValue. @@ -2174,7 +2179,8 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV, // To be a got equivalent, at least one of its users need to be a constant // expression used by another global variable. for (const auto *U : GV->users()) - NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast(U)); + NumGOTEquivUsers += + getNumGlobalVariableUses(dyn_cast(U), HasNonGlobalUsers); return NumGOTEquivUsers > 0; } @@ -2192,9 +2198,13 @@ void AsmPrinter::computeGlobalGOTEquivs(Module &M) { for (const auto &G : M.globals()) { unsigned NumGOTEquivUsers = 0; - if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers)) + bool HasNonGlobalUsers = false; + if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers, HasNonGlobalUsers)) continue; - + // If non-global variables use it, we still need to emit it. + // Add 1 here, then emit it in `emitGlobalGOTEquivs`. + if (HasNonGlobalUsers) + NumGOTEquivUsers += 1; const MCSymbol *GOTEquivSym = getSymbol(&G); GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index 5442fb15202ea..171fb8394990d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -374,6 +374,18 @@ static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV, DbgValueHistoryMap &HistMap) { EntryIndex NewIndex; if (HistMap.startDbgValue(Var, DV, NewIndex)) { + // As we already need to iterate all LiveEntries when handling a DbgValue, + // we use this map to avoid a more expensive check against RegVars. There + // is an assert that we handle this correctly in addRegDescribedVar. + // + // In other terms, the presence in this map indicates the presence of a + // corresponding entry in RegVars. + // + // The bool value then tracks whether an entry is to be retained (true) or + // removed (false); as we end previous entries we speculatively assume they + // can be dropped from RegVars, but we then also visit the new entry whose + // set of debug register operands may overlap and "save" a reg from being + // dropped. SmallDenseMap TrackedRegs; // If we have created a new debug value entry, close all preceding @@ -467,9 +479,6 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, for (const auto &MI : MBB) { if (MI.isDebugValue()) { assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); - // Use the base variable (without any DW_OP_piece expressions) - // as index into History. The full variables including the - // piece expressions are attached to the MI. const DILocalVariable *RawVar = MI.getDebugVariable(); assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) && "Expected inlined-at fields to agree"); @@ -493,8 +502,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, if (MI.isMetaInstruction()) continue; - // Not a DBG_VALUE instruction. It may clobber registers which describe - // some variables. + // Other instructions may clobber registers which describe some variables. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef() && MO.getReg()) { // Ignore call instructions that claim to clobber SP. The AArch64 diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index bfe6e7d6a802a..9bd337a962b86 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1013,7 +1013,6 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { // Add name if not anonymous or intermediate type. StringRef Name = CTy->getName(); - uint64_t Size = CTy->getSizeInBits() >> 3; uint16_t Tag = Buffer.getTag(); switch (Tag) { @@ -1176,15 +1175,28 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { - // Add size if non-zero (derived types might be zero-sized.) - // Ignore the size if it's a non-enum forward decl. - // TODO: Do we care about size for enum forward declarations? - if (Size && - (!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type)) - addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size); - else if (!CTy->isForwardDecl()) - // Add zero size if it is not a forward declaration. - addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, 0); + if (auto *Var = dyn_cast_or_null(CTy->getRawSizeInBits())) { + if (auto *VarDIE = getDIE(Var)) + addDIEEntry(Buffer, dwarf::DW_AT_bit_size, *VarDIE); + } else if (auto *Exp = + dyn_cast_or_null(CTy->getRawSizeInBits())) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(Exp); + addBlock(Buffer, dwarf::DW_AT_bit_size, DwarfExpr.finalize()); + } else { + uint64_t Size = CTy->getSizeInBits() >> 3; + // Add size if non-zero (derived types might be zero-sized.) + // Ignore the size if it's a non-enum forward decl. + // TODO: Do we care about size for enum forward declarations? + if (Size && + (!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type)) + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size); + else if (!CTy->isForwardDecl()) + // Add zero size if it is not a forward declaration. + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, 0); + } // If we're a forward decl, say so. if (CTy->isForwardDecl()) @@ -1864,74 +1876,117 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); } else { - uint64_t Size = DT->getSizeInBits(); - uint64_t FieldSize = DD->getBaseTypeSize(DT); - uint32_t AlignInBytes = DT->getAlignInBytes(); - uint64_t OffsetInBytes; + uint64_t Size = 0; + uint64_t FieldSize = 0; bool IsBitfield = DT->isBitField(); - if (IsBitfield) { - // Handle bitfield, assume bytes are 8 bits. - if (DD->useDWARF2Bitfields()) - addUInt(MemberDie, dwarf::DW_AT_byte_size, std::nullopt, FieldSize / 8); - addUInt(MemberDie, dwarf::DW_AT_bit_size, std::nullopt, Size); - - assert(DT->getOffsetInBits() <= - (uint64_t)std::numeric_limits::max()); - int64_t Offset = DT->getOffsetInBits(); - // We can't use DT->getAlignInBits() here: AlignInBits for member type - // is non-zero if and only if alignment was forced (e.g. _Alignas()), - // which can't be done with bitfields. Thus we use FieldSize here. - uint32_t AlignInBits = FieldSize; - uint32_t AlignMask = ~(AlignInBits - 1); - // The bits from the start of the storage unit to the start of the field. - uint64_t StartBitOffset = Offset - (Offset & AlignMask); - // The byte offset of the field's aligned storage unit inside the struct. - OffsetInBytes = (Offset - StartBitOffset) / 8; - - if (DD->useDWARF2Bitfields()) { - uint64_t HiMark = (Offset + FieldSize) & AlignMask; - uint64_t FieldOffset = (HiMark - FieldSize); - Offset -= FieldOffset; - - // Maybe we need to work from the other end. - if (Asm->getDataLayout().isLittleEndian()) - Offset = FieldSize - (Offset + Size); - - if (Offset < 0) - addSInt(MemberDie, dwarf::DW_AT_bit_offset, dwarf::DW_FORM_sdata, + + // Handle the size. + if (auto *Var = dyn_cast_or_null(DT->getRawSizeInBits())) { + if (auto *VarDIE = getDIE(Var)) + addDIEEntry(MemberDie, dwarf::DW_AT_bit_size, *VarDIE); + } else if (auto *Exp = + dyn_cast_or_null(DT->getRawSizeInBits())) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(Exp); + addBlock(MemberDie, dwarf::DW_AT_bit_size, DwarfExpr.finalize()); + } else { + Size = DT->getSizeInBits(); + FieldSize = DD->getBaseTypeSize(DT); + if (IsBitfield) { + // Handle bitfield, assume bytes are 8 bits. + if (DD->useDWARF2Bitfields()) + addUInt(MemberDie, dwarf::DW_AT_byte_size, std::nullopt, + FieldSize / 8); + addUInt(MemberDie, dwarf::DW_AT_bit_size, std::nullopt, Size); + } + } + + // Handle the location. DW_AT_data_bit_offset won't allow an + // expression until DWARF 6, but it can be used as an extension. + // See https://dwarfstd.org/issues/250501.1.html + if (auto *Var = dyn_cast_or_null(DT->getRawOffsetInBits())) { + if (!Asm->TM.Options.DebugStrictDwarf || DD->getDwarfVersion() >= 6) { + if (auto *VarDIE = getDIE(Var)) + addDIEEntry(MemberDie, dwarf::DW_AT_data_bit_offset, *VarDIE); + } + } else if (auto *Expr = + dyn_cast_or_null(DT->getRawOffsetInBits())) { + if (!Asm->TM.Options.DebugStrictDwarf || DD->getDwarfVersion() >= 6) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(Expr); + addBlock(MemberDie, dwarf::DW_AT_data_bit_offset, DwarfExpr.finalize()); + } + } else { + uint32_t AlignInBytes = DT->getAlignInBytes(); + uint64_t OffsetInBytes; + + if (IsBitfield) { + assert(DT->getOffsetInBits() <= + (uint64_t)std::numeric_limits::max()); + int64_t Offset = DT->getOffsetInBits(); + // We can't use DT->getAlignInBits() here: AlignInBits for member type + // is non-zero if and only if alignment was forced (e.g. _Alignas()), + // which can't be done with bitfields. Thus we use FieldSize here. + uint32_t AlignInBits = FieldSize; + uint32_t AlignMask = ~(AlignInBits - 1); + // The bits from the start of the storage unit to the start of the + // field. + uint64_t StartBitOffset = Offset - (Offset & AlignMask); + // The byte offset of the field's aligned storage unit inside the + // struct. + OffsetInBytes = (Offset - StartBitOffset) / 8; + + if (DD->useDWARF2Bitfields()) { + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getDataLayout().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + + if (Offset < 0) + addSInt(MemberDie, dwarf::DW_AT_bit_offset, dwarf::DW_FORM_sdata, + Offset); + else + addUInt(MemberDie, dwarf::DW_AT_bit_offset, std::nullopt, + (uint64_t)Offset); + OffsetInBytes = FieldOffset >> 3; + } else { + addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, std::nullopt, Offset); - else - addUInt(MemberDie, dwarf::DW_AT_bit_offset, std::nullopt, - (uint64_t)Offset); - OffsetInBytes = FieldOffset >> 3; + } } else { - addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, std::nullopt, Offset); + // This is not a bitfield. + OffsetInBytes = DT->getOffsetInBits() / 8; + if (AlignInBytes) + addUInt(MemberDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); } - } else { - // This is not a bitfield. - OffsetInBytes = DT->getOffsetInBits() / 8; - if (AlignInBytes) - addUInt(MemberDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, - AlignInBytes); - } - if (DD->getDwarfVersion() <= 2) { - DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc; - addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); - addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); - } else if (!IsBitfield || DD->useDWARF2Bitfields()) { - // In DWARF v3, DW_FORM_data4/8 in DW_AT_data_member_location are - // interpreted as location-list pointers. Interpreting constants as - // pointers is not expected, so we use DW_FORM_udata to encode the - // constants here. - if (DD->getDwarfVersion() == 3) - addUInt(MemberDie, dwarf::DW_AT_data_member_location, - dwarf::DW_FORM_udata, OffsetInBytes); - else - addUInt(MemberDie, dwarf::DW_AT_data_member_location, std::nullopt, - OffsetInBytes); + if (DD->getDwarfVersion() <= 2) { + DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc; + addUInt(*MemLocationDie, dwarf::DW_FORM_data1, + dwarf::DW_OP_plus_uconst); + addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); + addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); + } else if (!IsBitfield || DD->useDWARF2Bitfields()) { + // In DWARF v3, DW_FORM_data4/8 in DW_AT_data_member_location are + // interpreted as location-list pointers. Interpreting constants as + // pointers is not expected, so we use DW_FORM_udata to encode the + // constants here. + if (DD->getDwarfVersion() == 3) + addUInt(MemberDie, dwarf::DW_AT_data_member_location, + dwarf::DW_FORM_udata, OffsetInBytes); + else + addUInt(MemberDie, dwarf::DW_AT_data_member_location, std::nullopt, + OffsetInBytes); + } } } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 5f5af5cad778c..461fc35337eac 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -469,6 +469,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(COSH_F); case TargetOpcode::G_FTANH: RTLIBCASE(TANH_F); + case TargetOpcode::G_FSINCOS: + RTLIBCASE(SINCOS_F); case TargetOpcode::G_FLOG10: RTLIBCASE(LOG10_F); case TargetOpcode::G_FLOG: @@ -648,6 +650,54 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, LocObserver, &MI); } +LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall( + MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, + LostDebugLocObserver &LocObserver) { + MachineFunction &MF = *MI.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + Register DstSin = MI.getOperand(0).getReg(); + Register DstCos = MI.getOperand(1).getReg(); + Register Src = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(DstSin); + + int MemSize = DstTy.getSizeInBytes(); + Align Alignment = getStackTemporaryAlignment(DstTy); + const DataLayout &DL = MIRBuilder.getDataLayout(); + unsigned AddrSpace = DL.getAllocaAddrSpace(); + MachinePointerInfo PtrInfo; + + Register StackPtrSin = + createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo) + .getReg(0); + Register StackPtrCos = + createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo) + .getReg(0); + + auto &Ctx = MF.getFunction().getContext(); + auto LibcallResult = + createLibcall(MIRBuilder, getRTLibDesc(MI.getOpcode(), Size), + {{0}, Type::getVoidTy(Ctx), 0}, + {{Src, OpType, 0}, + {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1}, + {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}}, + LocObserver, &MI); + + if (LibcallResult != LegalizeResult::Legalized) + return LegalizerHelper::UnableToLegalize; + + MachineMemOperand *LoadMMOSin = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment); + MachineMemOperand *LoadMMOCos = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment); + + MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin); + MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos); + MI.eraseFromParent(); + + return LegalizerHelper::Legalized; +} + LegalizerHelper::LegalizeResult llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) { @@ -1275,6 +1325,16 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { return Status; break; } + case TargetOpcode::G_FSINCOS: { + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); + Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); + if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { + LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); + return UnableToLegalize; + } + return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver); + } case TargetOpcode::G_LROUND: case TargetOpcode::G_LLROUND: case TargetOpcode::G_INTRINSIC_LRINT: diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp index 84dc4ab0a5522..92ecfadf97c99 100644 --- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/CGData/CodeGenData.h" #include "llvm/CGData/CodeGenDataWriter.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/StructuralHash.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 5265bd74d2dbf..f80e1e8b683b3 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -117,7 +117,11 @@ namespace { /// IsAnalyzed - True if BB has been analyzed (info is still valid). /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed. /// IsBrAnalyzable - True if analyzeBranch() returns false. - /// HasFallThrough - True if BB may fallthrough to the following BB. + /// HasFallThrough - True if BB has fallthrough to the following BB. + /// Note that BB may have a fallthrough if both + /// !HasFallThrough and !IsBrAnalyzable is true. Also note + /// that blockNeverFallThrough() can be used to prove that + /// there is no fall through. /// IsUnpredicable - True if BB is known to be unpredicable. /// ClobbersPred - True if BB could modify predicates (e.g. has /// cmp, call, etc.) @@ -125,7 +129,10 @@ namespace { /// ExtraCost - Extra cost for multi-cycle instructions. /// ExtraCost2 - Some instructions are slower when predicated /// BB - Corresponding MachineBasicBlock. - /// TrueBB / FalseBB- See analyzeBranch(). + /// TrueBB / FalseBB- See analyzeBranch(), but note that FalseBB can be set + /// by AnalyzeBranches even if there is a fallthrough. So + /// it doesn't correspond exactly to the result from + /// TTI::analyzeBranch. /// BrCond - Conditions for end of block conditional branches. /// Predicate - Predicate used in the BB. struct BBInfo { @@ -397,6 +404,21 @@ namespace { return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr; } + /// Returns true if Block is known not to fallthrough to the following BB. + bool blockNeverFallThrough(BBInfo &BBI) const { + // Trust "HasFallThrough" if we could analyze branches. + if (BBI.IsBrAnalyzable) + return !BBI.HasFallThrough; + // If this is the last MBB in the function, or if the textual successor + // isn't in the successor list, then there is no fallthrough. + MachineFunction::iterator PI = BBI.BB->getIterator(); + MachineFunction::iterator I = std::next(PI); + if (I == BBI.BB->getParent()->end() || !PI->isSuccessor(&*I)) + return true; + // Could not prove that there is no fallthrough. + return false; + } + /// Used to sort if-conversion candidates. static bool IfcvtTokenCmp(const std::unique_ptr &C1, const std::unique_ptr &C2) { @@ -1715,9 +1737,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Only merge them if the true block does not fallthrough to the false // block. By not merging them, we make it possible to iteratively // ifcvt the blocks. - if (!HasEarlyExit && - NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough && - !NextMBB.hasAddressTaken()) { + if (!HasEarlyExit && NextMBB.pred_size() == 1 && + blockNeverFallThrough(*NextBBI) && !NextMBB.hasAddressTaken()) { MergeBlocks(BBI, *NextBBI); FalseBBDead = true; } else { @@ -2052,8 +2073,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, BBI.BB->removeSuccessor(FalseBBI.BB, true); BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; - bool CanMergeTail = !TailBBI.HasFallThrough && - !TailBBI.BB->hasAddressTaken(); + bool CanMergeTail = + blockNeverFallThrough(TailBBI) && !TailBBI.BB->hasAddressTaken(); // The if-converted block can still have a predicated terminator // (e.g. a predicated return). If that is the case, we cannot merge // it with the tail block. diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 49f1504d244ed..9c4c86cebe7e5 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -629,173 +629,12 @@ static unsigned getIntrinsicFactor(const IntrinsicInst *II) { } } -// For an (de)interleave tree like this: -// -// A C B D -// |___| |___| -// |_____| -// | -// A B C D -// -// We will get ABCD at the end while the leaf operands/results -// are ACBD, which are also what we initially collected in -// getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI -// hooks (e.g. lowerDeinterleaveIntrinsicToLoad) expect ABCD, so we need -// to reorder them by interleaving these values. -static void interleaveLeafValues(MutableArrayRef SubLeaves) { - unsigned NumLeaves = SubLeaves.size(); - assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1); - if (NumLeaves == 2) - return; - - const unsigned HalfLeaves = NumLeaves / 2; - // Visit the sub-trees. - interleaveLeafValues(SubLeaves.take_front(HalfLeaves)); - interleaveLeafValues(SubLeaves.drop_front(HalfLeaves)); - - SmallVector Buffer; - // a0 a1 a2 a3 b0 b1 b2 b3 - // -> a0 b0 a1 b1 a2 b2 a3 b3 - for (unsigned i = 0U; i < NumLeaves; ++i) - Buffer.push_back(SubLeaves[i / 2 + (i % 2 ? HalfLeaves : 0)]); - - llvm::copy(Buffer, SubLeaves.begin()); -} - -static bool -getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl &Operands, - SmallVectorImpl &DeadInsts) { - assert(isInterleaveIntrinsic(II->getIntrinsicID())); - - // Visit with BFS - SmallVector Queue; - Queue.push_back(II); - while (!Queue.empty()) { - IntrinsicInst *Current = Queue.front(); - Queue.erase(Queue.begin()); - - // All the intermediate intrinsics will be deleted. - DeadInsts.push_back(Current); - - for (unsigned I = 0; I < getIntrinsicFactor(Current); ++I) { - Value *Op = Current->getOperand(I); - if (auto *OpII = dyn_cast(Op)) - if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) { - Queue.push_back(OpII); - continue; - } - - // If this is not a perfectly balanced tree, the leaf - // result types would be different. - if (!Operands.empty() && Op->getType() != Operands.back()->getType()) - return false; - - Operands.push_back(Op); - } - } - - const unsigned Factor = Operands.size(); - // Currently we only recognize factors 2...8 and other powers of 2. - // FIXME: should we assert here instead? - if (Factor <= 1 || - (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II))) - return false; - - // Recursively interleaved factors need to have their values reordered - // TODO: Remove once the loop vectorizer no longer recursively interleaves - // factors 4 + 8 - if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2) - interleaveLeafValues(Operands); - return true; -} - -static bool -getVectorDeinterleaveFactor(IntrinsicInst *II, - SmallVectorImpl &Results, - SmallVectorImpl &DeadInsts) { - assert(isDeinterleaveIntrinsic(II->getIntrinsicID())); - using namespace PatternMatch; - if (!II->hasNUses(getIntrinsicFactor(II))) - return false; - - // Visit with BFS - SmallVector Queue; - Queue.push_back(II); - while (!Queue.empty()) { - IntrinsicInst *Current = Queue.front(); - Queue.erase(Queue.begin()); - assert(Current->hasNUses(getIntrinsicFactor(Current))); - - // All the intermediate intrinsics will be deleted from the bottom-up. - DeadInsts.insert(DeadInsts.begin(), Current); - - SmallVector EVs(getIntrinsicFactor(Current), nullptr); - for (User *Usr : Current->users()) { - if (!isa(Usr)) - return 0; - - auto *EV = cast(Usr); - // Intermediate ExtractValue instructions will also be deleted. - DeadInsts.insert(DeadInsts.begin(), EV); - ArrayRef Indices = EV->getIndices(); - if (Indices.size() != 1) - return false; - - if (!EVs[Indices[0]]) - EVs[Indices[0]] = EV; - else - return false; - } - - // We have legal indices. At this point we're either going - // to continue the traversal or push the leaf values into Results. - for (ExtractValueInst *EV : EVs) { - // Continue the traversal. We're playing safe here and matching only the - // expression consisting of a perfectly balanced binary tree in which all - // intermediate values are only used once. - if (EV->hasOneUse() && - match(EV->user_back(), - m_Intrinsic()) && - EV->user_back()->hasNUses(2)) { - auto *EVUsr = cast(EV->user_back()); - Queue.push_back(EVUsr); - continue; - } - - // If this is not a perfectly balanced tree, the leaf - // result types would be different. - if (!Results.empty() && EV->getType() != Results.back()->getType()) - return false; - - // Save the leaf value. - Results.push_back(EV); - } - } - - const unsigned Factor = Results.size(); - // Currently we only recognize factors of 2...8 and other powers of 2. - // FIXME: should we assert here instead? - if (Factor <= 1 || - (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II))) - return 0; - - // Recursively interleaved factors need to have their values reordered - // TODO: Remove once the loop vectorizer no longer recursively interleaves - // factors 4 + 8 - if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2) - interleaveLeafValues(Results); - return true; -} - static Value *getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC) { if (auto *IMI = dyn_cast(WideMask)) { - SmallVector Operands; - SmallVector DeadInsts; - if (getVectorInterleaveFactor(IMI, Operands, DeadInsts)) { - assert(!Operands.empty()); - if (Operands.size() == Factor && llvm::all_equal(Operands)) - return Operands[0]; + if (isInterleaveIntrinsic(IMI->getIntrinsicID()) && + getIntrinsicFactor(IMI) == Factor && llvm::all_equal(IMI->args())) { + return IMI->getArgOperand(0); } } @@ -830,13 +669,19 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( if (!LoadedVal->hasOneUse() || !isa(LoadedVal)) return false; - SmallVector DeinterleaveValues; - SmallVector DeinterleaveDeadInsts; - if (!getVectorDeinterleaveFactor(DI, DeinterleaveValues, - DeinterleaveDeadInsts)) + const unsigned Factor = getIntrinsicFactor(DI); + if (!DI->hasNUses(Factor)) return false; - - const unsigned Factor = DeinterleaveValues.size(); + SmallVector DeinterleaveValues(Factor); + for (auto *User : DI->users()) { + auto *Extract = dyn_cast(User); + if (!Extract || Extract->getNumIndices() != 1) + return false; + unsigned Idx = Extract->getIndices()[0]; + if (DeinterleaveValues[Idx]) + return false; + DeinterleaveValues[Idx] = Extract; + } if (auto *VPLoad = dyn_cast(LoadedVal)) { if (VPLoad->getIntrinsicID() != Intrinsic::vp_load) @@ -869,7 +714,9 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( return false; } - DeadInsts.insert_range(DeinterleaveDeadInsts); + for (Value *V : DeinterleaveValues) + DeadInsts.insert(cast(V)); + DeadInsts.insert(DI); // We now have a target-specific load, so delete the old one. DeadInsts.insert(cast(LoadedVal)); return true; @@ -883,12 +730,8 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic( if (!isa(StoredBy)) return false; - SmallVector InterleaveValues; - SmallVector InterleaveDeadInsts; - if (!getVectorInterleaveFactor(II, InterleaveValues, InterleaveDeadInsts)) - return false; - - const unsigned Factor = InterleaveValues.size(); + SmallVector InterleaveValues(II->args()); + const unsigned Factor = getIntrinsicFactor(II); if (auto *VPStore = dyn_cast(StoredBy)) { if (VPStore->getIntrinsicID() != Intrinsic::vp_store) @@ -922,7 +765,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic( // We now have a target-specific store, so delete the old one. DeadInsts.insert(cast(StoredBy)); - DeadInsts.insert_range(InterleaveDeadInsts); + DeadInsts.insert(II); return true; } diff --git a/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp b/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp index 72e4be0165bf8..15a175d6391dd 100644 --- a/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp +++ b/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp @@ -14,21 +14,22 @@ #include "llvm/CodeGen/MachineDomTreeUpdater.h" #include "llvm/Analysis/GenericDomTreeUpdaterImpl.h" #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/Support/Compiler.h" namespace llvm { -template class GenericDomTreeUpdater< +template class LLVM_EXPORT_TEMPLATE GenericDomTreeUpdater< MachineDomTreeUpdater, MachineDominatorTree, MachinePostDominatorTree>; -template void +template LLVM_EXPORT_TEMPLATE void GenericDomTreeUpdater::recalculate(MachineFunction &MF); -template void GenericDomTreeUpdater< +template LLVM_EXPORT_TEMPLATE void GenericDomTreeUpdater< MachineDomTreeUpdater, MachineDominatorTree, MachinePostDominatorTree>::applyUpdatesImpl(); -template void GenericDomTreeUpdater< +template LLVM_EXPORT_TEMPLATE void GenericDomTreeUpdater< MachineDomTreeUpdater, MachineDominatorTree, MachinePostDominatorTree>::applyUpdatesImpl(); diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp index 917519f12a039..b221fa8b6de84 100644 --- a/llvm/lib/CodeGen/MachineDominators.cpp +++ b/llvm/lib/CodeGen/MachineDominators.cpp @@ -17,6 +17,7 @@ #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/GenericDomTreeConstruction.h" using namespace llvm; @@ -35,24 +36,29 @@ static cl::opt VerifyMachineDomInfoX( cl::desc("Verify machine dominator info (time consuming)")); namespace llvm { -template class DomTreeNodeBase; -template class DominatorTreeBase; // DomTreeBase +template class LLVM_EXPORT_TEMPLATE DomTreeNodeBase; +template class LLVM_EXPORT_TEMPLATE + DominatorTreeBase; // DomTreeBase namespace DomTreeBuilder { -template void Calculate(MBBDomTree &DT); -template void CalculateWithUpdates(MBBDomTree &DT, MBBUpdates U); +template LLVM_EXPORT_TEMPLATE void Calculate(MBBDomTree &DT); +template LLVM_EXPORT_TEMPLATE void +CalculateWithUpdates(MBBDomTree &DT, MBBUpdates U); -template void InsertEdge(MBBDomTree &DT, MachineBasicBlock *From, - MachineBasicBlock *To); +template LLVM_EXPORT_TEMPLATE void +InsertEdge(MBBDomTree &DT, MachineBasicBlock *From, + MachineBasicBlock *To); -template void DeleteEdge(MBBDomTree &DT, MachineBasicBlock *From, - MachineBasicBlock *To); +template LLVM_EXPORT_TEMPLATE void +DeleteEdge(MBBDomTree &DT, MachineBasicBlock *From, + MachineBasicBlock *To); -template void ApplyUpdates(MBBDomTree &DT, MBBDomTreeGraphDiff &, - MBBDomTreeGraphDiff *); +template LLVM_EXPORT_TEMPLATE void +ApplyUpdates(MBBDomTree &DT, MBBDomTreeGraphDiff &, + MBBDomTreeGraphDiff *); -template bool Verify(const MBBDomTree &DT, - MBBDomTree::VerificationLevel VL); +template LLVM_EXPORT_TEMPLATE bool +Verify(const MBBDomTree &DT, MBBDomTree::VerificationLevel VL); } // namespace DomTreeBuilder } diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp index 1c97e5c9063e4..fdb1a470493ce 100644 --- a/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -22,13 +22,16 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/GenericLoopInfoImpl.h" using namespace llvm; // Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops. -template class llvm::LoopBase; -template class llvm::LoopInfoBase; +template class LLVM_EXPORT_TEMPLATE + llvm::LoopBase; +template class LLVM_EXPORT_TEMPLATE + llvm::LoopInfoBase; AnalysisKey MachineLoopAnalysis::Key; diff --git a/llvm/lib/CodeGen/MachinePassManager.cpp b/llvm/lib/CodeGen/MachinePassManager.cpp index bbe386507fcd2..6e445f6787903 100644 --- a/llvm/lib/CodeGen/MachinePassManager.cpp +++ b/llvm/lib/CodeGen/MachinePassManager.cpp @@ -17,20 +17,21 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManagerImpl.h" +#include "llvm/Support/Compiler.h" using namespace llvm; AnalysisKey FunctionAnalysisManagerMachineFunctionProxy::Key; namespace llvm { -template class AnalysisManager; +template class LLVM_EXPORT_TEMPLATE AnalysisManager; template class PassManager; -template class InnerAnalysisManagerProxy; -template class InnerAnalysisManagerProxy; -template class OuterAnalysisManagerProxy; +template class LLVM_EXPORT_TEMPLATE + InnerAnalysisManagerProxy; +template class LLVM_EXPORT_TEMPLATE + InnerAnalysisManagerProxy; +template class LLVM_EXPORT_TEMPLATE + OuterAnalysisManagerProxy; } // namespace llvm bool FunctionAnalysisManagerMachineFunctionProxy::Result::invalidate( diff --git a/llvm/lib/CodeGen/MachinePostDominators.cpp b/llvm/lib/CodeGen/MachinePostDominators.cpp index 51637130addc4..1cb7e465881a2 100644 --- a/llvm/lib/CodeGen/MachinePostDominators.cpp +++ b/llvm/lib/CodeGen/MachinePostDominators.cpp @@ -18,22 +18,25 @@ using namespace llvm; namespace llvm { -template class DominatorTreeBase; // PostDomTreeBase +template class LLVM_EXPORT_TEMPLATE + DominatorTreeBase; // PostDomTreeBase namespace DomTreeBuilder { -template void Calculate(MBBPostDomTree &DT); -template void InsertEdge(MBBPostDomTree &DT, - MachineBasicBlock *From, - MachineBasicBlock *To); -template void DeleteEdge(MBBPostDomTree &DT, - MachineBasicBlock *From, - MachineBasicBlock *To); -template void ApplyUpdates(MBBPostDomTree &DT, - MBBPostDomTreeGraphDiff &, - MBBPostDomTreeGraphDiff *); -template bool Verify(const MBBPostDomTree &DT, - MBBPostDomTree::VerificationLevel VL); +template LLVM_EXPORT_TEMPLATE void +Calculate(MBBPostDomTree &DT); +template LLVM_EXPORT_TEMPLATE void +InsertEdge(MBBPostDomTree &DT, MachineBasicBlock *From, + MachineBasicBlock *To); +template LLVM_EXPORT_TEMPLATE void +DeleteEdge(MBBPostDomTree &DT, MachineBasicBlock *From, + MachineBasicBlock *To); +template LLVM_EXPORT_TEMPLATE void +ApplyUpdates(MBBPostDomTree &DT, MBBPostDomTreeGraphDiff &, + MBBPostDomTreeGraphDiff *); +template LLVM_EXPORT_TEMPLATE bool +Verify(const MBBPostDomTree &DT, + MBBPostDomTree::VerificationLevel VL); } // namespace DomTreeBuilder extern bool VerifyMachineDomInfo; diff --git a/llvm/lib/CodeGen/RegAllocScore.cpp b/llvm/lib/CodeGen/RegAllocScore.cpp index 8c140261c11ca..b86647dbe0a48 100644 --- a/llvm/lib/CodeGen/RegAllocScore.cpp +++ b/llvm/lib/CodeGen/RegAllocScore.cpp @@ -23,13 +23,16 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; -cl::opt CopyWeight("regalloc-copy-weight", cl::init(0.2), cl::Hidden); -cl::opt LoadWeight("regalloc-load-weight", cl::init(4.0), cl::Hidden); -cl::opt StoreWeight("regalloc-store-weight", cl::init(1.0), cl::Hidden); -cl::opt CheapRematWeight("regalloc-cheap-remat-weight", cl::init(0.2), - cl::Hidden); -cl::opt ExpensiveRematWeight("regalloc-expensive-remat-weight", - cl::init(1.0), cl::Hidden); +LLVM_ABI cl::opt CopyWeight("regalloc-copy-weight", cl::init(0.2), + cl::Hidden); +LLVM_ABI cl::opt LoadWeight("regalloc-load-weight", cl::init(4.0), + cl::Hidden); +LLVM_ABI cl::opt StoreWeight("regalloc-store-weight", cl::init(1.0), + cl::Hidden); +LLVM_ABI cl::opt CheapRematWeight("regalloc-cheap-remat-weight", + cl::init(0.2), cl::Hidden); +LLVM_ABI cl::opt ExpensiveRematWeight("regalloc-expensive-remat-weight", + cl::init(1.0), cl::Hidden); #define DEBUG_TYPE "regalloc-score" RegAllocScore &RegAllocScore::operator+=(const RegAllocScore &Other) { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 66717135c9adf..a0b5f67c2e6c7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3587,6 +3587,19 @@ bool TargetLowering::SimplifyDemandedVectorElts( DemandedRHS.setBit(M - NumElts); } + // If either side isn't demanded, replace it by UNDEF. We handle this + // explicitly here to also simplify in case of multiple uses (on the + // contrary to the SimplifyDemandedVectorElts calls below). + bool FoldLHS = !DemandedLHS && !LHS.isUndef(); + bool FoldRHS = !DemandedRHS && !RHS.isUndef(); + if (FoldLHS || FoldRHS) { + LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS; + RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS; + SDValue NewOp = + TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask); + return TLO.CombineTo(Op, NewOp); + } + // See if we can simplify either shuffle operand. APInt UndefLHS, ZeroLHS; APInt UndefRHS, ZeroRHS; diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp index 45cb28af56050..854e6d7135860 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp @@ -11,6 +11,7 @@ #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/DefaultHostBootstrapValues.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h" +#include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Process.h" #include "llvm/TargetParser/Host.h" diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index ddc9c5392f922..d4f95be083a47 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -824,8 +824,12 @@ void OpenMPIRBuilder::finalize(Function *Fn) { M.getGlobalVariable("__openmp_nvptx_data_transfer_temporary_storage")}; emitUsed("llvm.compiler.used", LLVMCompilerUsed); } + + IsFinalized = true; } +bool OpenMPIRBuilder::isFinalized() { return IsFinalized; } + OpenMPIRBuilder::~OpenMPIRBuilder() { assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index fd8c2d7bb5cc3..6001ed421183b 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -325,21 +325,22 @@ DIStringType *DIBuilder::createStringType(StringRef Name, } DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) { - return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, FromTy, 0, - 0, 0, std::nullopt, std::nullopt, DINode::FlagZero); + return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, FromTy, + (uint64_t)0, 0, (uint64_t)0, std::nullopt, + std::nullopt, DINode::FlagZero); } DIDerivedType *DIBuilder::createPtrAuthQualifiedType( DIType *FromTy, unsigned Key, bool IsAddressDiscriminated, unsigned ExtraDiscriminator, bool IsaPointer, bool AuthenticatesNullValues) { - return DIDerivedType::get(VMContext, dwarf::DW_TAG_LLVM_ptrauth_type, "", - nullptr, 0, nullptr, FromTy, 0, 0, 0, std::nullopt, - std::optional( - std::in_place, Key, IsAddressDiscriminated, - ExtraDiscriminator, IsaPointer, - AuthenticatesNullValues), - DINode::FlagZero); + return DIDerivedType::get( + VMContext, dwarf::DW_TAG_LLVM_ptrauth_type, "", nullptr, 0, nullptr, + FromTy, (uint64_t)0, 0, (uint64_t)0, std::nullopt, + std::optional( + std::in_place, Key, IsAddressDiscriminated, ExtraDiscriminator, + IsaPointer, AuthenticatesNullValues), + DINode::FlagZero); } DIDerivedType * @@ -381,9 +382,9 @@ DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name, DINode::DIFlags Flags, DINodeArray Annotations) { return DIDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File, - LineNo, getNonCompileUnitScope(Context), Ty, 0, - AlignInBits, 0, std::nullopt, std::nullopt, Flags, - nullptr, Annotations); + LineNo, getNonCompileUnitScope(Context), Ty, + (uint64_t)0, AlignInBits, (uint64_t)0, std::nullopt, + std::nullopt, Flags, nullptr, Annotations); } DIDerivedType * @@ -392,17 +393,17 @@ DIBuilder::createTemplateAlias(DIType *Ty, StringRef Name, DIFile *File, DINodeArray TParams, uint32_t AlignInBits, DINode::DIFlags Flags, DINodeArray Annotations) { return DIDerivedType::get(VMContext, dwarf::DW_TAG_template_alias, Name, File, - LineNo, getNonCompileUnitScope(Context), Ty, 0, - AlignInBits, 0, std::nullopt, std::nullopt, Flags, - TParams.get(), Annotations); + LineNo, getNonCompileUnitScope(Context), Ty, + (uint64_t)0, AlignInBits, (uint64_t)0, std::nullopt, + std::nullopt, Flags, TParams.get(), Annotations); } DIDerivedType *DIBuilder::createFriend(DIType *Ty, DIType *FriendTy) { assert(Ty && "Invalid type!"); assert(FriendTy && "Invalid friend type!"); return DIDerivedType::get(VMContext, dwarf::DW_TAG_friend, "", nullptr, 0, Ty, - FriendTy, 0, 0, 0, std::nullopt, std::nullopt, - DINode::FlagZero); + FriendTy, (uint64_t)0, 0, (uint64_t)0, std::nullopt, + std::nullopt, DINode::FlagZero); } DIDerivedType *DIBuilder::createInheritance(DIType *Ty, DIType *BaseTy, @@ -427,6 +428,16 @@ DIDerivedType *DIBuilder::createMemberType( std::nullopt, Flags, nullptr, Annotations); } +DIDerivedType *DIBuilder::createMemberType( + DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber, + Metadata *SizeInBits, uint32_t AlignInBits, Metadata *OffsetInBits, + DINode::DIFlags Flags, DIType *Ty, DINodeArray Annotations) { + return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File, + LineNumber, getNonCompileUnitScope(Scope), Ty, + SizeInBits, AlignInBits, OffsetInBits, std::nullopt, + std::nullopt, Flags, nullptr, Annotations); +} + static ConstantAsMetadata *getConstantOrNull(Constant *C) { if (C) return ConstantAsMetadata::get(C); @@ -451,14 +462,29 @@ DIDerivedType *DIBuilder::createVariantMemberType(DIScope *Scope, Constant *Discriminant, DIType *Ty) { auto *V = DICompositeType::get(VMContext, dwarf::DW_TAG_variant, {}, nullptr, - 0, getNonCompileUnitScope(Scope), {}, 0, 0, 0, - DINode::FlagZero, Elements, 0, {}, nullptr); + 0, getNonCompileUnitScope(Scope), {}, + (uint64_t)0, 0, (uint64_t)0, DINode::FlagZero, + Elements, 0, {}, nullptr); trackIfUnresolved(V); return createVariantMemberType(Scope, {}, nullptr, 0, 0, 0, 0, Discriminant, DINode::FlagZero, V); } +DIDerivedType *DIBuilder::createBitFieldMemberType( + DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber, + Metadata *SizeInBits, Metadata *OffsetInBits, uint64_t StorageOffsetInBits, + DINode::DIFlags Flags, DIType *Ty, DINodeArray Annotations) { + Flags |= DINode::FlagBitField; + return DIDerivedType::get( + VMContext, dwarf::DW_TAG_member, Name, File, LineNumber, + getNonCompileUnitScope(Scope), Ty, SizeInBits, /*AlignInBits=*/0, + OffsetInBits, std::nullopt, std::nullopt, Flags, + ConstantAsMetadata::get(ConstantInt::get(IntegerType::get(VMContext, 64), + StorageOffsetInBits)), + Annotations); +} + DIDerivedType *DIBuilder::createBitFieldMemberType( DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber, uint64_t SizeInBits, uint64_t OffsetInBits, uint64_t StorageOffsetInBits, @@ -480,9 +506,9 @@ DIBuilder::createStaticMemberType(DIScope *Scope, StringRef Name, DIFile *File, unsigned Tag, uint32_t AlignInBits) { Flags |= DINode::FlagStaticMember; return DIDerivedType::get(VMContext, Tag, Name, File, LineNumber, - getNonCompileUnitScope(Scope), Ty, 0, AlignInBits, - 0, std::nullopt, std::nullopt, Flags, - getConstantOrNull(Val)); + getNonCompileUnitScope(Scope), Ty, (uint64_t)0, + AlignInBits, (uint64_t)0, std::nullopt, + std::nullopt, Flags, getConstantOrNull(Val)); } DIDerivedType * @@ -563,6 +589,22 @@ DICompositeType *DIBuilder::createClassType( return R; } +DICompositeType *DIBuilder::createStructType( + DIScope *Context, StringRef Name, DIFile *File, unsigned LineNumber, + Metadata *SizeInBits, uint32_t AlignInBits, DINode::DIFlags Flags, + DIType *DerivedFrom, DINodeArray Elements, unsigned RunTimeLang, + DIType *VTableHolder, StringRef UniqueIdentifier, DIType *Specification, + uint32_t NumExtraInhabitants) { + auto *R = DICompositeType::get( + VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber, + getNonCompileUnitScope(Context), DerivedFrom, SizeInBits, AlignInBits, 0, + Flags, Elements, RunTimeLang, /*EnumKind=*/std::nullopt, VTableHolder, + nullptr, UniqueIdentifier, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, Specification, NumExtraInhabitants); + trackIfUnresolved(R); + return R; +} + DICompositeType *DIBuilder::createStructType( DIScope *Context, StringRef Name, DIFile *File, unsigned LineNumber, uint64_t SizeInBits, uint32_t AlignInBits, DINode::DIFlags Flags, diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 473114b99225b..44b0f0d50067c 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -825,25 +825,23 @@ DIGenericSubrange::BoundType DIGenericSubrange::getStride() const { } DISubrangeType::DISubrangeType(LLVMContext &C, StorageType Storage, - unsigned Line, uint64_t SizeInBits, - uint32_t AlignInBits, DIFlags Flags, - ArrayRef Ops) + unsigned Line, uint32_t AlignInBits, + DIFlags Flags, ArrayRef Ops) : DIType(C, DISubrangeTypeKind, Storage, dwarf::DW_TAG_subrange_type, Line, - SizeInBits, AlignInBits, 0, 0, Flags, Ops) {} + AlignInBits, 0, Flags, Ops) {} DISubrangeType *DISubrangeType::getImpl( LLVMContext &Context, MDString *Name, Metadata *File, unsigned Line, - Metadata *Scope, uint64_t SizeInBits, uint32_t AlignInBits, DIFlags Flags, + Metadata *Scope, Metadata *SizeInBits, uint32_t AlignInBits, DIFlags Flags, Metadata *BaseType, Metadata *LowerBound, Metadata *UpperBound, Metadata *Stride, Metadata *Bias, StorageType Storage, bool ShouldCreate) { assert(isCanonical(Name) && "Expected canonical MDString"); DEFINE_GETIMPL_LOOKUP(DISubrangeType, (Name, File, Line, Scope, SizeInBits, AlignInBits, Flags, BaseType, LowerBound, UpperBound, Stride, Bias)); - Metadata *Ops[] = {File, Scope, Name, BaseType, - LowerBound, UpperBound, Stride, Bias}; - DEFINE_GETIMPL_STORE(DISubrangeType, (Line, SizeInBits, AlignInBits, Flags), - Ops); + Metadata *Ops[] = {File, Scope, Name, SizeInBits, nullptr, + BaseType, LowerBound, UpperBound, Stride, Bias}; + DEFINE_GETIMPL_STORE(DISubrangeType, (Line, AlignInBits, Flags), Ops); } DISubrangeType::BoundType @@ -883,18 +881,17 @@ DIEnumerator *DIEnumerator::getImpl(LLVMContext &Context, const APInt &Value, } DIBasicType *DIBasicType::getImpl(LLVMContext &Context, unsigned Tag, - MDString *Name, uint64_t SizeInBits, + MDString *Name, Metadata *SizeInBits, uint32_t AlignInBits, unsigned Encoding, uint32_t NumExtraInhabitants, DIFlags Flags, StorageType Storage, bool ShouldCreate) { assert(isCanonical(Name) && "Expected canonical MDString"); DEFINE_GETIMPL_LOOKUP(DIBasicType, (Tag, Name, SizeInBits, AlignInBits, Encoding, NumExtraInhabitants, Flags)); - Metadata *Ops[] = {nullptr, nullptr, Name}; - DEFINE_GETIMPL_STORE( - DIBasicType, - (Tag, SizeInBits, AlignInBits, Encoding, NumExtraInhabitants, Flags), - Ops); + Metadata *Ops[] = {nullptr, nullptr, Name, SizeInBits, nullptr}; + DEFINE_GETIMPL_STORE(DIBasicType, + (Tag, AlignInBits, Encoding, NumExtraInhabitants, Flags), + Ops); } std::optional DIBasicType::getSignedness() const { @@ -914,18 +911,18 @@ std::optional DIBasicType::getSignedness() const { DIFixedPointType * DIFixedPointType::getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, - uint64_t SizeInBits, uint32_t AlignInBits, + Metadata *SizeInBits, uint32_t AlignInBits, unsigned Encoding, DIFlags Flags, unsigned Kind, int Factor, APInt Numerator, APInt Denominator, StorageType Storage, bool ShouldCreate) { DEFINE_GETIMPL_LOOKUP(DIFixedPointType, (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags, Kind, Factor, Numerator, Denominator)); - Metadata *Ops[] = {nullptr, nullptr, Name}; - DEFINE_GETIMPL_STORE(DIFixedPointType, - (Tag, SizeInBits, AlignInBits, Encoding, Flags, Kind, - Factor, Numerator, Denominator), - Ops); + Metadata *Ops[] = {nullptr, nullptr, Name, SizeInBits, nullptr}; + DEFINE_GETIMPL_STORE( + DIFixedPointType, + (Tag, AlignInBits, Encoding, Flags, Kind, Factor, Numerator, Denominator), + Ops); } bool DIFixedPointType::isSigned() const { @@ -957,17 +954,17 @@ DIStringType *DIStringType::getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *StringLength, Metadata *StringLengthExp, Metadata *StringLocationExp, - uint64_t SizeInBits, uint32_t AlignInBits, + Metadata *SizeInBits, uint32_t AlignInBits, unsigned Encoding, StorageType Storage, bool ShouldCreate) { assert(isCanonical(Name) && "Expected canonical MDString"); DEFINE_GETIMPL_LOOKUP(DIStringType, (Tag, Name, StringLength, StringLengthExp, StringLocationExp, SizeInBits, AlignInBits, Encoding)); - Metadata *Ops[] = {nullptr, nullptr, Name, - StringLength, StringLengthExp, StringLocationExp}; - DEFINE_GETIMPL_STORE(DIStringType, (Tag, SizeInBits, AlignInBits, Encoding), - Ops); + Metadata *Ops[] = {nullptr, nullptr, Name, + SizeInBits, nullptr, StringLength, + StringLengthExp, StringLocationExp}; + DEFINE_GETIMPL_STORE(DIStringType, (Tag, AlignInBits, Encoding), Ops); } DIType *DIDerivedType::getClassType() const { assert(getTag() == dwarf::DW_TAG_ptr_to_member_type); @@ -1004,8 +1001,8 @@ Constant *DIDerivedType::getDiscriminantValue() const { DIDerivedType *DIDerivedType::getImpl( LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File, - unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits, - uint32_t AlignInBits, uint64_t OffsetInBits, + unsigned Line, Metadata *Scope, Metadata *BaseType, Metadata *SizeInBits, + uint32_t AlignInBits, Metadata *OffsetInBits, std::optional DWARFAddressSpace, std::optional PtrAuthData, DIFlags Flags, Metadata *ExtraData, Metadata *Annotations, StorageType Storage, bool ShouldCreate) { @@ -1014,11 +1011,11 @@ DIDerivedType *DIDerivedType::getImpl( (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, DWARFAddressSpace, PtrAuthData, Flags, ExtraData, Annotations)); - Metadata *Ops[] = {File, Scope, Name, BaseType, ExtraData, Annotations}; - DEFINE_GETIMPL_STORE(DIDerivedType, - (Tag, Line, SizeInBits, AlignInBits, OffsetInBits, - DWARFAddressSpace, PtrAuthData, Flags), - Ops); + Metadata *Ops[] = {File, Scope, Name, SizeInBits, + OffsetInBits, BaseType, ExtraData, Annotations}; + DEFINE_GETIMPL_STORE( + DIDerivedType, + (Tag, Line, AlignInBits, DWARFAddressSpace, PtrAuthData, Flags), Ops); } std::optional @@ -1030,8 +1027,8 @@ DIDerivedType::getPtrAuthData() const { DICompositeType *DICompositeType::getImpl( LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File, - unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits, - uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags, + unsigned Line, Metadata *Scope, Metadata *BaseType, Metadata *SizeInBits, + uint32_t AlignInBits, Metadata *OffsetInBits, DIFlags Flags, Metadata *Elements, unsigned RuntimeLang, std::optional EnumKind, Metadata *VTableHolder, Metadata *TemplateParams, MDString *Identifier, Metadata *Discriminator, Metadata *DataLocation, Metadata *Associated, @@ -1047,20 +1044,21 @@ DICompositeType *DICompositeType::getImpl( OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams, Identifier, Discriminator, DataLocation, Associated, Allocated, Rank, Annotations, Specification, NumExtraInhabitants, BitStride)); - Metadata *Ops[] = {File, Scope, Name, BaseType, - Elements, VTableHolder, TemplateParams, Identifier, - Discriminator, DataLocation, Associated, Allocated, - Rank, Annotations, Specification, BitStride}; + Metadata *Ops[] = {File, Scope, Name, SizeInBits, + OffsetInBits, BaseType, Elements, VTableHolder, + TemplateParams, Identifier, Discriminator, DataLocation, + Associated, Allocated, Rank, Annotations, + Specification, BitStride}; DEFINE_GETIMPL_STORE(DICompositeType, - (Tag, Line, RuntimeLang, SizeInBits, AlignInBits, - OffsetInBits, NumExtraInhabitants, EnumKind, Flags), + (Tag, Line, RuntimeLang, AlignInBits, + NumExtraInhabitants, EnumKind, Flags), Ops); } DICompositeType *DICompositeType::buildODRType( LLVMContext &Context, MDString &Identifier, unsigned Tag, MDString *Name, Metadata *File, unsigned Line, Metadata *Scope, Metadata *BaseType, - uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, + Metadata *SizeInBits, uint32_t AlignInBits, Metadata *OffsetInBits, Metadata *Specification, uint32_t NumExtraInhabitants, DIFlags Flags, Metadata *Elements, unsigned RuntimeLang, std::optional EnumKind, Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator, @@ -1086,12 +1084,13 @@ DICompositeType *DICompositeType::buildODRType( return CT; // Mutate CT in place. Keep this in sync with getImpl. - CT->mutate(Tag, Line, RuntimeLang, SizeInBits, AlignInBits, OffsetInBits, - NumExtraInhabitants, EnumKind, Flags); - Metadata *Ops[] = {File, Scope, Name, BaseType, - Elements, VTableHolder, TemplateParams, &Identifier, - Discriminator, DataLocation, Associated, Allocated, - Rank, Annotations, Specification, BitStride}; + CT->mutate(Tag, Line, RuntimeLang, AlignInBits, NumExtraInhabitants, EnumKind, + Flags); + Metadata *Ops[] = {File, Scope, Name, SizeInBits, + OffsetInBits, BaseType, Elements, VTableHolder, + TemplateParams, &Identifier, Discriminator, DataLocation, + Associated, Allocated, Rank, Annotations, + Specification, BitStride}; assert((std::end(Ops) - std::begin(Ops)) == (int)CT->getNumOperands() && "Mismatched number of operands"); for (unsigned I = 0, E = CT->getNumOperands(); I != E; ++I) @@ -1103,7 +1102,7 @@ DICompositeType *DICompositeType::buildODRType( DICompositeType *DICompositeType::getODRType( LLVMContext &Context, MDString &Identifier, unsigned Tag, MDString *Name, Metadata *File, unsigned Line, Metadata *Scope, Metadata *BaseType, - uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, + Metadata *SizeInBits, uint32_t AlignInBits, Metadata *OffsetInBits, Metadata *Specification, uint32_t NumExtraInhabitants, DIFlags Flags, Metadata *Elements, unsigned RuntimeLang, std::optional EnumKind, Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator, @@ -1138,7 +1137,7 @@ DISubroutineType::DISubroutineType(LLVMContext &C, StorageType Storage, DIFlags Flags, uint8_t CC, ArrayRef Ops) : DIType(C, DISubroutineTypeKind, Storage, dwarf::DW_TAG_subroutine_type, 0, - 0, 0, 0, 0, Flags, Ops), + 0, 0, Flags, Ops), CC(CC) {} DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context, DIFlags Flags, @@ -1146,7 +1145,7 @@ DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context, DIFlags Flags, StorageType Storage, bool ShouldCreate) { DEFINE_GETIMPL_LOOKUP(DISubroutineType, (Flags, CC, TypeArray)); - Metadata *Ops[] = {nullptr, nullptr, nullptr, TypeArray}; + Metadata *Ops[] = {nullptr, nullptr, nullptr, nullptr, nullptr, TypeArray}; DEFINE_GETIMPL_STORE(DISubroutineType, (Flags, CC), Ops); } diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 2d89ec1b0a8d3..8d9b545d4134f 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -643,6 +643,10 @@ MemoryEffects CallBase::getMemoryEffects() const { if (hasClobberingOperandBundles()) FnME |= MemoryEffects::writeOnly(); } + if (isVolatile()) { + // Volatile operations also access inaccessible memory. + FnME |= MemoryEffects::inaccessibleMemOnly(); + } ME &= FnME; } return ME; diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index ef279721b9643..4446f47d323d2 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -491,27 +491,28 @@ template <> struct MDNodeKeyImpl { template <> struct MDNodeKeyImpl { unsigned Tag; MDString *Name; - uint64_t SizeInBits; + Metadata *SizeInBits; uint32_t AlignInBits; unsigned Encoding; uint32_t NumExtraInhabitants; unsigned Flags; - MDNodeKeyImpl(unsigned Tag, MDString *Name, uint64_t SizeInBits, + MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *SizeInBits, uint32_t AlignInBits, unsigned Encoding, uint32_t NumExtraInhabitants, unsigned Flags) : Tag(Tag), Name(Name), SizeInBits(SizeInBits), AlignInBits(AlignInBits), Encoding(Encoding), NumExtraInhabitants(NumExtraInhabitants), Flags(Flags) {} MDNodeKeyImpl(const DIBasicType *N) - : Tag(N->getTag()), Name(N->getRawName()), SizeInBits(N->getSizeInBits()), - AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()), + : Tag(N->getTag()), Name(N->getRawName()), + SizeInBits(N->getRawSizeInBits()), AlignInBits(N->getAlignInBits()), + Encoding(N->getEncoding()), NumExtraInhabitants(N->getNumExtraInhabitants()), Flags(N->getFlags()) { } bool isKeyOf(const DIBasicType *RHS) const { return Tag == RHS->getTag() && Name == RHS->getRawName() && - SizeInBits == RHS->getSizeInBits() && + SizeInBits == RHS->getRawSizeInBits() && AlignInBits == RHS->getAlignInBits() && Encoding == RHS->getEncoding() && NumExtraInhabitants == RHS->getNumExtraInhabitants() && @@ -526,7 +527,7 @@ template <> struct MDNodeKeyImpl { template <> struct MDNodeKeyImpl { unsigned Tag; MDString *Name; - uint64_t SizeInBits; + Metadata *SizeInBits; uint32_t AlignInBits; unsigned Encoding; unsigned Flags; @@ -535,20 +536,21 @@ template <> struct MDNodeKeyImpl { APInt Numerator; APInt Denominator; - MDNodeKeyImpl(unsigned Tag, MDString *Name, uint64_t SizeInBits, + MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *SizeInBits, uint32_t AlignInBits, unsigned Encoding, unsigned Flags, unsigned Kind, int Factor, APInt Numerator, APInt Denominator) : Tag(Tag), Name(Name), SizeInBits(SizeInBits), AlignInBits(AlignInBits), Encoding(Encoding), Flags(Flags), Kind(Kind), Factor(Factor), Numerator(Numerator), Denominator(Denominator) {} MDNodeKeyImpl(const DIFixedPointType *N) - : Tag(N->getTag()), Name(N->getRawName()), SizeInBits(N->getSizeInBits()), - AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()), - Flags(N->getFlags()), Kind(N->getKind()), Factor(N->getFactorRaw()), - Numerator(N->getNumeratorRaw()), Denominator(N->getDenominatorRaw()) {} + : Tag(N->getTag()), Name(N->getRawName()), + SizeInBits(N->getRawSizeInBits()), AlignInBits(N->getAlignInBits()), + Encoding(N->getEncoding()), Flags(N->getFlags()), Kind(N->getKind()), + Factor(N->getFactorRaw()), Numerator(N->getNumeratorRaw()), + Denominator(N->getDenominatorRaw()) {} bool isKeyOf(const DIFixedPointType *RHS) const { - return Name == RHS->getRawName() && SizeInBits == RHS->getSizeInBits() && + return Name == RHS->getRawName() && SizeInBits == RHS->getRawSizeInBits() && AlignInBits == RHS->getAlignInBits() && Kind == RHS->getKind() && (RHS->isRational() ? (Numerator == RHS->getNumerator() && Denominator == RHS->getDenominator()) @@ -566,13 +568,13 @@ template <> struct MDNodeKeyImpl { Metadata *StringLength; Metadata *StringLengthExp; Metadata *StringLocationExp; - uint64_t SizeInBits; + Metadata *SizeInBits; uint32_t AlignInBits; unsigned Encoding; MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *StringLength, Metadata *StringLengthExp, Metadata *StringLocationExp, - uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding) + Metadata *SizeInBits, uint32_t AlignInBits, unsigned Encoding) : Tag(Tag), Name(Name), StringLength(StringLength), StringLengthExp(StringLengthExp), StringLocationExp(StringLocationExp), SizeInBits(SizeInBits), AlignInBits(AlignInBits), Encoding(Encoding) {} @@ -581,7 +583,7 @@ template <> struct MDNodeKeyImpl { StringLength(N->getRawStringLength()), StringLengthExp(N->getRawStringLengthExp()), StringLocationExp(N->getRawStringLocationExp()), - SizeInBits(N->getSizeInBits()), AlignInBits(N->getAlignInBits()), + SizeInBits(N->getRawSizeInBits()), AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()) {} bool isKeyOf(const DIStringType *RHS) const { @@ -589,7 +591,7 @@ template <> struct MDNodeKeyImpl { StringLength == RHS->getRawStringLength() && StringLengthExp == RHS->getRawStringLengthExp() && StringLocationExp == RHS->getRawStringLocationExp() && - SizeInBits == RHS->getSizeInBits() && + SizeInBits == RHS->getRawSizeInBits() && AlignInBits == RHS->getAlignInBits() && Encoding == RHS->getEncoding(); } @@ -609,8 +611,8 @@ template <> struct MDNodeKeyImpl { unsigned Line; Metadata *Scope; Metadata *BaseType; - uint64_t SizeInBits; - uint64_t OffsetInBits; + Metadata *SizeInBits; + Metadata *OffsetInBits; uint32_t AlignInBits; std::optional DWARFAddressSpace; std::optional PtrAuthData; @@ -619,8 +621,8 @@ template <> struct MDNodeKeyImpl { Metadata *Annotations; MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *File, unsigned Line, - Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits, - uint32_t AlignInBits, uint64_t OffsetInBits, + Metadata *Scope, Metadata *BaseType, Metadata *SizeInBits, + uint32_t AlignInBits, Metadata *OffsetInBits, std::optional DWARFAddressSpace, std::optional PtrAuthData, unsigned Flags, Metadata *ExtraData, Metadata *Annotations) @@ -632,8 +634,8 @@ template <> struct MDNodeKeyImpl { MDNodeKeyImpl(const DIDerivedType *N) : Tag(N->getTag()), Name(N->getRawName()), File(N->getRawFile()), Line(N->getLine()), Scope(N->getRawScope()), - BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()), - OffsetInBits(N->getOffsetInBits()), AlignInBits(N->getAlignInBits()), + BaseType(N->getRawBaseType()), SizeInBits(N->getRawSizeInBits()), + OffsetInBits(N->getRawOffsetInBits()), AlignInBits(N->getAlignInBits()), DWARFAddressSpace(N->getDWARFAddressSpace()), PtrAuthData(N->getPtrAuthData()), Flags(N->getFlags()), ExtraData(N->getRawExtraData()), Annotations(N->getRawAnnotations()) {} @@ -642,9 +644,9 @@ template <> struct MDNodeKeyImpl { return Tag == RHS->getTag() && Name == RHS->getRawName() && File == RHS->getRawFile() && Line == RHS->getLine() && Scope == RHS->getRawScope() && BaseType == RHS->getRawBaseType() && - SizeInBits == RHS->getSizeInBits() && + SizeInBits == RHS->getRawSizeInBits() && AlignInBits == RHS->getAlignInBits() && - OffsetInBits == RHS->getOffsetInBits() && + OffsetInBits == RHS->getRawOffsetInBits() && DWARFAddressSpace == RHS->getDWARFAddressSpace() && PtrAuthData == RHS->getPtrAuthData() && Flags == RHS->getFlags() && ExtraData == RHS->getRawExtraData() && @@ -673,7 +675,7 @@ template <> struct MDNodeKeyImpl { Metadata *File; unsigned Line; Metadata *Scope; - uint64_t SizeInBits; + Metadata *SizeInBits; uint32_t AlignInBits; unsigned Flags; Metadata *BaseType; @@ -683,7 +685,7 @@ template <> struct MDNodeKeyImpl { Metadata *Bias; MDNodeKeyImpl(MDString *Name, Metadata *File, unsigned Line, Metadata *Scope, - uint64_t SizeInBits, uint32_t AlignInBits, unsigned Flags, + Metadata *SizeInBits, uint32_t AlignInBits, unsigned Flags, Metadata *BaseType, Metadata *LowerBound, Metadata *UpperBound, Metadata *Stride, Metadata *Bias) : Name(Name), File(File), Line(Line), Scope(Scope), @@ -692,7 +694,7 @@ template <> struct MDNodeKeyImpl { Stride(Stride), Bias(Bias) {} MDNodeKeyImpl(const DISubrangeType *N) : Name(N->getRawName()), File(N->getRawFile()), Line(N->getLine()), - Scope(N->getRawScope()), SizeInBits(N->getSizeInBits()), + Scope(N->getRawScope()), SizeInBits(N->getRawSizeInBits()), AlignInBits(N->getAlignInBits()), Flags(N->getFlags()), BaseType(N->getRawBaseType()), LowerBound(N->getRawLowerBound()), UpperBound(N->getRawUpperBound()), Stride(N->getRawStride()), @@ -716,7 +718,7 @@ template <> struct MDNodeKeyImpl { return Name == RHS->getRawName() && File == RHS->getRawFile() && Line == RHS->getLine() && Scope == RHS->getRawScope() && - SizeInBits == RHS->getSizeInBits() && + SizeInBits == RHS->getRawSizeInBits() && AlignInBits == RHS->getAlignInBits() && Flags == RHS->getFlags() && BaseType == RHS->getRawBaseType() && BoundsEqual(LowerBound, RHS->getRawLowerBound()) && @@ -784,8 +786,8 @@ template <> struct MDNodeKeyImpl { unsigned Line; Metadata *Scope; Metadata *BaseType; - uint64_t SizeInBits; - uint64_t OffsetInBits; + Metadata *SizeInBits; + Metadata *OffsetInBits; uint32_t AlignInBits; unsigned Flags; Metadata *Elements; @@ -804,8 +806,8 @@ template <> struct MDNodeKeyImpl { Metadata *BitStride; MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *File, unsigned Line, - Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits, - uint32_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, + Metadata *Scope, Metadata *BaseType, Metadata *SizeInBits, + uint32_t AlignInBits, Metadata *OffsetInBits, unsigned Flags, Metadata *Elements, unsigned RuntimeLang, Metadata *VTableHolder, Metadata *TemplateParams, MDString *Identifier, Metadata *Discriminator, @@ -825,8 +827,8 @@ template <> struct MDNodeKeyImpl { MDNodeKeyImpl(const DICompositeType *N) : Tag(N->getTag()), Name(N->getRawName()), File(N->getRawFile()), Line(N->getLine()), Scope(N->getRawScope()), - BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()), - OffsetInBits(N->getOffsetInBits()), AlignInBits(N->getAlignInBits()), + BaseType(N->getRawBaseType()), SizeInBits(N->getRawSizeInBits()), + OffsetInBits(N->getRawOffsetInBits()), AlignInBits(N->getAlignInBits()), Flags(N->getFlags()), Elements(N->getRawElements()), RuntimeLang(N->getRuntimeLang()), VTableHolder(N->getRawVTableHolder()), TemplateParams(N->getRawTemplateParams()), @@ -843,10 +845,10 @@ template <> struct MDNodeKeyImpl { return Tag == RHS->getTag() && Name == RHS->getRawName() && File == RHS->getRawFile() && Line == RHS->getLine() && Scope == RHS->getRawScope() && BaseType == RHS->getRawBaseType() && - SizeInBits == RHS->getSizeInBits() && + SizeInBits == RHS->getRawSizeInBits() && AlignInBits == RHS->getAlignInBits() && - OffsetInBits == RHS->getOffsetInBits() && Flags == RHS->getFlags() && - Elements == RHS->getRawElements() && + OffsetInBits == RHS->getRawOffsetInBits() && + Flags == RHS->getFlags() && Elements == RHS->getRawElements() && RuntimeLang == RHS->getRuntimeLang() && VTableHolder == RHS->getRawVTableHolder() && TemplateParams == RHS->getRawTemplateParams() && diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 702e0a51357f5..5c01d8595d0f9 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -432,19 +432,11 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); // Some darwins have an optimized __bzero/bzero function. - switch (TT.getArch()) { - case Triple::x86: - case Triple::x86_64: + if (TT.isX86()) { if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6)) setLibcallName(RTLIB::BZERO, "__bzero"); - break; - case Triple::aarch64: - case Triple::aarch64_32: + } else if (TT.isAArch64()) setLibcallName(RTLIB::BZERO, "bzero"); - break; - default: - break; - } if (darwinHasSinCosStret(TT)) { setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret"); @@ -457,37 +449,13 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, } } - switch (TT.getOS()) { - case Triple::MacOSX: - if (TT.isMacOSXVersionLT(10, 9)) { - setLibcallName(RTLIB::EXP10_F32, nullptr); - setLibcallName(RTLIB::EXP10_F64, nullptr); - } else { - setLibcallName(RTLIB::EXP10_F32, "__exp10f"); - setLibcallName(RTLIB::EXP10_F64, "__exp10"); - } - break; - case Triple::IOS: - if (TT.isOSVersionLT(7, 0)) { - setLibcallName(RTLIB::EXP10_F32, nullptr); - setLibcallName(RTLIB::EXP10_F64, nullptr); - break; - } - [[fallthrough]]; - case Triple::DriverKit: - case Triple::TvOS: - case Triple::WatchOS: - case Triple::XROS: + if (darwinHasExp10(TT)) { setLibcallName(RTLIB::EXP10_F32, "__exp10f"); setLibcallName(RTLIB::EXP10_F64, "__exp10"); - break; - default: - break; + } else { + setLibcallName(RTLIB::EXP10_F32, nullptr); + setLibcallName(RTLIB::EXP10_F64, nullptr); } - } else if (TT.getOS() == Triple::BridgeOS) { - // TODO: BridgeOS should be included in isOSDarwin. - setLibcallName(RTLIB::EXP10_F32, "__exp10f"); - setLibcallName(RTLIB::EXP10_F64, "__exp10"); } if (hasSinCos(TT)) { @@ -665,3 +633,22 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, if (TT.getArch() == Triple::ArchType::msp430) setMSP430Libcalls(*this, TT); } + +bool RuntimeLibcallsInfo::darwinHasExp10(const Triple &TT) { + assert(TT.isOSDarwin() && "should be called with darwin triple"); + + switch (TT.getOS()) { + case Triple::MacOSX: + return !TT.isMacOSXVersionLT(10, 9); + case Triple::IOS: + return !TT.isOSVersionLT(7, 0); + case Triple::DriverKit: + case Triple::TvOS: + case Triple::WatchOS: + case Triple::XROS: + case Triple::BridgeOS: + return true; + default: + return false; + } +} diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 71261343b3482..e7bb6d9a3e32d 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1172,6 +1172,10 @@ void Verifier::visitDISubrangeType(const DISubrangeType &N) { CheckDI(!Bias || isa(Bias) || isa(Bias) || isa(Bias), "Bias must be signed constant or DIVariable or DIExpression", &N); + // Subrange types currently only support constant size. + auto *Size = N.getRawSizeInBits(); + CheckDI(!Size || isa(Size), + "SizeInBits must be a constant"); } void Verifier::visitDISubrange(const DISubrange &N) { @@ -1233,6 +1237,10 @@ void Verifier::visitDIBasicType(const DIBasicType &N) { N.getTag() == dwarf::DW_TAG_unspecified_type || N.getTag() == dwarf::DW_TAG_string_type, "invalid tag", &N); + // Basic types currently only support constant size. + auto *Size = N.getRawSizeInBits(); + CheckDI(!Size || isa(Size), + "SizeInBits must be a constant"); } void Verifier::visitDIFixedPointType(const DIFixedPointType &N) { @@ -1313,6 +1321,11 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) { "DWARF address space only applies to pointer or reference types", &N); } + + auto *Size = N.getRawSizeInBits(); + CheckDI(!Size || isa(Size) || isa(Size) || + isa(Size), + "SizeInBits must be a constant or DIVariable or DIExpression"); } /// Detect mutually exclusive flags. @@ -1400,6 +1413,11 @@ void Verifier::visitDICompositeType(const DICompositeType &N) { if (N.getTag() == dwarf::DW_TAG_array_type) { CheckDI(N.getRawBaseType(), "array types must have a base type", &N); } + + auto *Size = N.getRawSizeInBits(); + CheckDI(!Size || isa(Size) || isa(Size) || + isa(Size), + "SizeInBits must be a constant or DIVariable or DIExpression"); } void Verifier::visitDISubroutineType(const DISubroutineType &N) { @@ -5008,6 +5026,9 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { Check(mdconst::dyn_extract(MDO), "!prof brunch_weights operand is not a const int"); } + } else { + Check(ProfName == "VP", "expected either branch_weights or VP profile name", + MD); } } @@ -5517,7 +5538,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Call.getOperand(Elem.Begin + 1)->getType()->isPointerTy(), "arguments to separate_storage assumptions should be pointers", Call); - return; + continue; } Check(Elem.Tag->getKey() == "ignore" || Attribute::isExistingAttribute(Elem.Tag->getKey()), @@ -5534,7 +5555,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { if (ArgCount == 3) Check(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(), "third argument should be an integer if present", Call); - return; + continue; } Check(ArgCount <= 2, "too many arguments", Call); if (Kind == Attribute::None) diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp index 8aea08919f469..527ccf3fc36e0 100644 --- a/llvm/lib/MC/MCSchedule.cpp +++ b/llvm/lib/MC/MCSchedule.cpp @@ -37,6 +37,7 @@ const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth, 0, 0, nullptr, + nullptr, nullptr}; int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI, diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 5f1fd57802c7b..6cd6b4abdd327 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -1453,10 +1453,9 @@ static VersionTuple getMachoBuildVersionSupportedOS(const Triple &Target) { case Triple::WatchOS: return VersionTuple(5); case Triple::DriverKit: - // DriverKit always uses the build version load command. - return VersionTuple(); + case Triple::BridgeOS: case Triple::XROS: - // XROS always uses the build version load command. + // DriverKit/BridgeOS/XROS always use the build version load command. return VersionTuple(); default: break; @@ -1487,6 +1486,8 @@ getMachoBuildVersionPlatformType(const Triple &Target) { case Triple::XROS: return Target.isSimulatorEnvironment() ? MachO::PLATFORM_XROS_SIMULATOR : MachO::PLATFORM_XROS; + case Triple::BridgeOS: + return MachO::PLATFORM_BRIDGEOS; default: break; } @@ -1520,6 +1521,7 @@ void MCStreamer::emitVersionForTarget( Version = Target.getDriverKitVersion(); break; case Triple::XROS: + case Triple::BridgeOS: Version = Target.getOSVersion(); break; default: diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index 2bac99b6309af..cad25a6ddd3f5 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -75,7 +75,8 @@ static void initializeUsedResources(InstrDesc &ID, WithColor::warning() << "Ignoring invalid write of zero cycles on processor resource " << PR.Name << "\n"; - WithColor::note() << "found in scheduling class " << SCDesc.Name + WithColor::note() << "found in scheduling class " + << SM.getSchedClassName(ID.SchedClassID) << " (write index #" << I << ")\n"; #endif continue; diff --git a/llvm/lib/Option/Arg.cpp b/llvm/lib/Option/Arg.cpp index 2d52b947aaede..3aab7c0768e14 100644 --- a/llvm/lib/Option/Arg.cpp +++ b/llvm/lib/Option/Arg.cpp @@ -6,13 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Option/Arg.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/llvm-config.h" -#include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/InterleavedRange.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -72,13 +73,7 @@ std::string Arg::getAsString(const ArgList &Args) const { ArgStringList ASL; render(Args, ASL); - for (ArgStringList::iterator - it = ASL.begin(), ie = ASL.end(); it != ie; ++it) { - if (it != ASL.begin()) - OS << ' '; - OS << *it; - } - + OS << llvm::interleaved(ASL, " "); return std::string(OS.str()); } @@ -100,11 +95,7 @@ void Arg::render(const ArgList &Args, ArgStringList &Output) const { case Option::RenderCommaJoinedStyle: { SmallString<256> Res; raw_svector_ostream OS(Res); - OS << getSpelling(); - for (unsigned i = 0, e = getNumValues(); i != e; ++i) { - if (i) OS << ','; - OS << getValue(i); - } + OS << getSpelling() << llvm::interleaved(getValues(), ","); Output.push_back(Args.MakeArgString(OS.str())); break; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 356c2b71d2018..13835747c91e5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13404,30 +13404,6 @@ static bool isUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { return true; } -/// isDUPQMask - matches a splat of equivalent lanes within 128b segments in -/// the first vector operand. -static std::optional isDUPQMask(ArrayRef M, EVT VT) { - assert(VT.getFixedSizeInBits() % 128 == 0 && "Unsupported SVE vector size"); - unsigned Lane = (unsigned)M[0]; - unsigned Segments = VT.getFixedSizeInBits() / 128; - unsigned SegmentElts = VT.getVectorNumElements() / Segments; - - // Make sure there's no size changes. - if (SegmentElts * Segments != M.size()) - return std::nullopt; - - // Check the first index corresponds to one of the lanes in the first segment. - if (Lane >= SegmentElts) - return std::nullopt; - - // Check that all lanes match the first, adjusted for segment. - for (unsigned I = 0; I < M.size(); ++I) - if ((unsigned)M[I] != (Lane + ((I / SegmentElts) * SegmentElts))) - return std::nullopt; - - return Lane; -} - /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. @@ -30029,8 +30005,15 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE( DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1)); } - if (Subtarget->hasSVE2p1()) { - if (std::optional Lane = isDUPQMask(ShuffleMask, VT)) { + if ((Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()) && + Subtarget->isSVEorStreamingSVEAvailable()) { + assert(VT.getFixedSizeInBits() % AArch64::SVEBitsPerBlock == 0 && + "Unsupported SVE vector size"); + + unsigned Segments = VT.getFixedSizeInBits() / AArch64::SVEBitsPerBlock; + unsigned SegmentElts = VT.getVectorNumElements() / Segments; + if (std::optional Lane = + isDUPQMask(ShuffleMask, Segments, SegmentElts)) { SDValue IID = DAG.getConstant(Intrinsic::aarch64_sve_dup_laneq, DL, MVT::i64); return convertFromScalableVector( diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h index 7b044cf7c238f..e9bc6d947b0d9 100644 --- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h +++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" namespace llvm { @@ -6723,6 +6724,32 @@ inline bool isREVMask(ArrayRef M, unsigned EltSize, unsigned NumElts, return true; } +/// isDUPQMask - matches a splat of equivalent lanes within segments of a given +/// number of elements. +inline std::optional isDUPQMask(ArrayRef Mask, unsigned Segments, + unsigned SegmentSize) { + unsigned Lane = unsigned(Mask[0]); + + // Make sure there's no size changes. + if (SegmentSize * Segments != Mask.size()) + return std::nullopt; + + // Check the first index corresponds to one of the lanes in the first segment. + if (Lane >= SegmentSize) + return std::nullopt; + + // Check that all lanes match the first, adjusted for segment. + // Undef/poison lanes (<0) are also accepted. + if (all_of(enumerate(Mask), [&](auto P) { + const unsigned SegmentIndex = P.index() / SegmentSize; + return P.value() < 0 || + unsigned(P.value()) == Lane + SegmentIndex * SegmentSize; + })) + return Lane; + + return std::nullopt; +} + } // namespace llvm #endif diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 15e38e6cb2408..3387dee8aa4c8 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5599,6 +5599,23 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, SrcTy = DstTy; } + // Segmented shuffle matching. + if ((ST->hasSVE2p1() || ST->hasSME2p1()) && + ST->isSVEorStreamingSVEAvailable() && Kind == TTI::SK_PermuteSingleSrc && + isa(SrcTy) && !Mask.empty() && + SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf( + AArch64::SVEBitsPerBlock)) { + + FixedVectorType *VTy = cast(SrcTy); + unsigned Segments = + VTy->getPrimitiveSizeInBits() / AArch64::SVEBitsPerBlock; + unsigned SegmentElts = VTy->getNumElements() / Segments; + + // dupq zd.t, zn.t[idx] + if (isDUPQMask(Mask, Segments, SegmentElts)) + return LT.first; + } + // Check for broadcast loads, which are supported by the LD1R instruction. // In terms of code-size, the shuffle vector is free when a load + dup get // folded into a LD1R. That's what we check and return here. For performance diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index bbe83821eca8e..3c8b5712c1f0c 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1784,6 +1784,10 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { + // Do not print the numeric target address when symbolizing. + if (SymbolizeOperands) + return; + const MCOperand &Op = MI->getOperand(OpNum); // If the label has already been resolved to an immediate offset (say, when @@ -1813,6 +1817,12 @@ void AArch64InstPrinter::printAdrAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { + // Do not print the numeric target address when symbolizing. + // However, do print for ADRP, as this is typically used together with an ADD + // or an immediate-offset ldr/str and the label is likely at the wrong point. + if (SymbolizeOperands && MI->getOpcode() != AArch64::ADRP) + return; + const MCOperand &Op = MI->getOperand(OpNum); // If the label has already been resolved to an immediate offset (say, when diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 18a253b4d9f48..1f634d21df51a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2364,7 +2364,7 @@ def HasExportInsts : Predicate<"Subtarget->hasExportInsts()">, AssemblerPredicate<(all_of (not FeatureGFX90AInsts), (not FeatureGFX1250Insts))>; def HasVINTERPEncoding : Predicate<"Subtarget->hasVINTERPEncoding()">, - AssemblerPredicate<(all_of FeatureGFX11Insts)>; + AssemblerPredicate<(all_of FeatureGFX11Insts, (not FeatureGFX1250Insts))>; def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, AssemblerPredicate<(all_of FeatureGFX9Insts)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index b2ddc6e88966b..6a59a28b1d32c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -171,8 +171,7 @@ void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) { MI.eraseFromParent(); } -const std::pair -RegBankLegalizeHelper::unpackZExt(Register Reg) { +std::pair RegBankLegalizeHelper::unpackZExt(Register Reg) { auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg); auto Mask = B.buildConstant(SgprRB_S32, 0x0000ffff); auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask); @@ -180,16 +179,14 @@ RegBankLegalizeHelper::unpackZExt(Register Reg) { return {Lo.getReg(0), Hi.getReg(0)}; } -const std::pair -RegBankLegalizeHelper::unpackSExt(Register Reg) { +std::pair RegBankLegalizeHelper::unpackSExt(Register Reg) { auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg); auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16); auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16)); return {Lo.getReg(0), Hi.getReg(0)}; } -const std::pair -RegBankLegalizeHelper::unpackAExt(Register Reg) { +std::pair RegBankLegalizeHelper::unpackAExt(Register Reg) { auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg); auto Lo = PackedS32; auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h index 50bd86dc15a1f..08cc7d43bd78e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h @@ -111,9 +111,9 @@ class RegBankLegalizeHelper { SmallSet &SgprWaterfallOperandRegs); void lowerVccExtToSel(MachineInstr &MI); - const std::pair unpackZExt(Register Reg); - const std::pair unpackSExt(Register Reg); - const std::pair unpackAExt(Register Reg); + std::pair unpackZExt(Register Reg); + std::pair unpackSExt(Register Reg); + std::pair unpackAExt(Register Reg); void lowerUnpackBitShift(MachineInstr &MI); void lowerV_BFE(MachineInstr &MI); void lowerS_BFE(MachineInstr &MI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index a7b08794fdf1b..b20760c356263 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4541,6 +4541,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_cvt_pknorm_u16: case Intrinsic::amdgcn_cvt_pk_i16: case Intrinsic::amdgcn_cvt_pk_u16: + case Intrinsic::amdgcn_cvt_pk_f16_fp8: + case Intrinsic::amdgcn_cvt_pk_f16_bf8: case Intrinsic::amdgcn_fmed3: case Intrinsic::amdgcn_cubeid: case Intrinsic::amdgcn_cubema: diff --git a/llvm/lib/Target/AMDGPU/DSDIRInstructions.td b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td index 383e3371993d6..d9d7a650dfc21 100644 --- a/llvm/lib/Target/AMDGPU/DSDIRInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td @@ -141,7 +141,7 @@ def : GCNPat < } // End SubtargetPredicate = isGFX11Only -let SubtargetPredicate = isGFX12Plus in { +let SubtargetPredicate = isGFX12PlusNot12_50 in { def DS_DIRECT_LOAD : DSDIR_Pseudo<"ds_direct_load", VDSDIR_getIns<1>.ret, 1>; def DS_PARAM_LOAD : DSDIR_Pseudo<"ds_param_load", VDSDIR_getIns<0>.ret, 0>; @@ -156,7 +156,7 @@ def : GCNPat < (DS_PARAM_LOAD timm:$attr, timm:$attrchan, 0, 1) >; -} // End SubtargetPredicate = isGFX12Only +} // End SubtargetPredicate = isGFX12PlusNot12_50. //===----------------------------------------------------------------------===// // GFX11 diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 27b3d6bc9440c..59c72fcbff18a 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -599,6 +599,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, DecW, Address, CS)) break; + if (isGFX1250() && + tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI, + DecW, Address, CS)) + break; + if (isGFX12() && tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI, DecW, Address, CS)) @@ -661,9 +666,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS)) break; - // FIXME: DecoderTableGFX125064 is not defined yet. if (isGFX1250() && - tryDecodeInst(DecoderTableGFX1250_FAKE1664, MI, QW, Address, CS)) + tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI, + QW, Address, CS)) break; if (isGFX12() && @@ -722,10 +727,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Address, CS)) break; - // FIXME: Should use DecoderTableGFX1250_FAKE1632, but it is not generated - // yet. if (isGFX1250() && - tryDecodeInst(DecoderTableGFX125032, MI, DW, Address, CS)) + tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI, + DW, Address, CS)) break; if (isGFX12() && diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 927af726a8664..89574fdd0ef3f 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -697,9 +697,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return !hasGFX940Insts() && !hasGFX1250Insts(); } - bool hasVINTERPEncoding() const { - return GFX11Insts; - } + bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); } // DS_ADD_F64/DS_ADD_RTN_F64 bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 8827de2b2a537..0cca7a4fe9197 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8736,11 +8736,15 @@ SDValue SITargetLowering::lowerImage(SDValue Op, : False); if (IsGFX10Plus) Ops.push_back(IsA16 ? True : False); - if (!Subtarget->hasGFX90AInsts()) { + + if (!Subtarget->hasGFX90AInsts()) Ops.push_back(TFE); // tfe - } else if (TFE->getAsZExtVal()) { - report_fatal_error("TFE is not supported on this GPU"); + else if (TFE->getAsZExtVal()) { + DAG.getContext()->diagnose(DiagnosticInfoUnsupported( + DAG.getMachineFunction().getFunction(), + "TFE is not supported on this GPU", DL.getDebugLoc())); } + if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA) Ops.push_back(LWE); // lwe if (!IsGFX10Plus) @@ -8771,9 +8775,23 @@ SDValue SITargetLowering::lowerImage(SDValue Op, if (Subtarget->hasGFX90AInsts()) { Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a, NumVDataDwords, NumVAddrDwords); - if (Opcode == -1) - report_fatal_error( - "requested image instruction is not supported on this GPU"); + if (Opcode == -1) { + DAG.getContext()->diagnose(DiagnosticInfoUnsupported( + DAG.getMachineFunction().getFunction(), + "requested image instruction is not supported on this GPU", + DL.getDebugLoc())); + + unsigned Idx = 0; + SmallVector RetValues(OrigResultTypes.size()); + for (EVT VT : OrigResultTypes) { + if (VT == MVT::Other) + RetValues[Idx++] = Op.getOperand(0); // Chain + else + RetValues[Idx++] = DAG.getPOISON(VT); + } + + return DAG.getMergeValues(RetValues, DL); + } } if (Opcode == -1 && Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index f43831016952a..fdd5834e3b9a7 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -302,12 +302,8 @@ class WaitcntBrackets { } unsigned getSgprScoresIdx(InstCounterType T) const { - if (T == SmemAccessCounter) - return 0; - if (T == X_CNT) - return 1; - - llvm_unreachable("Invalid SMEM counter"); + assert(isSmemCounter(T) && "Invalid SMEM counter"); + return T == X_CNT ? 1 : 0; } unsigned getScoreLB(InstCounterType T) const { @@ -325,10 +321,8 @@ class WaitcntBrackets { } unsigned getRegScore(int GprNo, InstCounterType T) const { - if (GprNo < NUM_ALL_VGPRS) { + if (GprNo < NUM_ALL_VGPRS) return VgprScores[T][GprNo]; - } - assert(isSmemCounter(T)); return SgprScores[getSgprScoresIdx(T)][GprNo - NUM_ALL_VGPRS]; } @@ -866,7 +860,6 @@ void WaitcntBrackets::setScoreByInterval(RegInterval Interval, VgprUB = std::max(VgprUB, RegNo); VgprScores[CntTy][RegNo] = Score; } else { - assert(isSmemCounter(CntTy)); SgprUB = std::max(SgprUB, RegNo - NUM_ALL_VGPRS); SgprScores[getSgprScoresIdx(CntTy)][RegNo - NUM_ALL_VGPRS] = Score; } @@ -1006,12 +999,8 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, } } } else if (T == X_CNT) { - for (const MachineOperand &Op : Inst.all_uses()) { - RegInterval Interval = getRegInterval(&Inst, MRI, TRI, Op); - for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { - setRegScore(RegNo, T, CurrScore); - } - } + for (const MachineOperand &Op : Inst.all_uses()) + setScoreByOperand(&Inst, TRI, MRI, Op, T, CurrScore); } else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ { // Match the score to the destination registers. // @@ -1353,7 +1342,13 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt( MachineInstr *WaitcntInstr = nullptr; MachineInstr *WaitcntVsCntInstr = nullptr; - LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It); + LLVM_DEBUG({ + dbgs() << "PreGFX12::applyPreexistingWaitcnt at: "; + if (It == OldWaitcntInstr.getParent()->instr_end()) + dbgs() << "end of block\n"; + else + dbgs() << *It; + }); for (auto &II : make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) { @@ -1507,7 +1502,13 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt( MachineInstr *CombinedStoreDsCntInstr = nullptr; MachineInstr *WaitInstrs[NUM_EXTENDED_INST_CNTS] = {}; - LLVM_DEBUG(dbgs() << "GFX12Plus::applyPreexistingWaitcnt at: " << *It); + LLVM_DEBUG({ + dbgs() << "GFX12Plus::applyPreexistingWaitcnt at: "; + if (It == OldWaitcntInstr.getParent()->instr_end()) + dbgs() << "end of block\n"; + else + dbgs() << *It; + }); for (auto &II : make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) { diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 02b912bcfb9e0..d504c8134202d 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -259,6 +259,12 @@ foreach vt = Reg32Types.types in { >; } +let HasOMod = 0, HasClamp = 0 in { + def VOPProfile_CVT_F32_BF16_gfx1250_t16 : VOPProfile_True16 ; + let HasOpSel = 1, EmitDstSel = 0 in + def VOPProfile_CVT_F32_BF16_gfx1250_fake16 : VOPProfile_Fake16 ; +} // End HasOMod = 0, HasClamp = 0 + let isReMaterializable = 1 in { let SchedRW = [WriteDoubleCvt] in { // OMod clears exceptions when set in this instruction @@ -309,8 +315,14 @@ let OtherPredicates = [UseRealTrue16Insts] in let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16, any_fpextend>; -let SubtargetPredicate = HasBF16ConversionInsts in -defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>; +let SubtargetPredicate = HasGFX950Insts, OtherPredicates = [HasBF16ConversionInsts] in { + defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>; +} +let SubtargetPredicate = isGFX1250Plus, OtherPredicates = [HasBF16ConversionInsts] in { + defm V_CVT_F32_BF16_gfx1250 : VOP1Inst_t16_with_profiles <"v_cvt_f32_bf16_gfx1250", VOP_F32_BF16, + VOPProfile_CVT_F32_BF16_gfx1250_t16, + VOPProfile_CVT_F32_BF16_gfx1250_fake16>; +} let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; @@ -717,6 +729,24 @@ let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] } } +// FIXME-TRUE16: True16 versions of these instructions are untested. +let HasExtSDWA = 0, HasOpSel = 1, EmitDstSel = 0, HasOMod = 0, HasModifiers = 1 in { +def VOPProfile_CVT_PK_F16_F8 : VOPProfile<[v2f16, i16, untyped, untyped]>; +def VOPProfile_CVT_PK_F16_F8_true16 : VOP3_Profile_True16; +def VOPProfile_CVT_PK_F16_F8_fake16 : VOP3_Profile_Fake16; +} + +let SubtargetPredicate = isGFX1250Plus in { + let mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in { + defm V_CVT_PK_F16_FP8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_fp8", + VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16, + int_amdgcn_cvt_pk_f16_fp8>; + defm V_CVT_PK_F16_BF8 : VOP1Inst_t16_with_profiles<"v_cvt_pk_f16_bf8", + VOPProfile_CVT_PK_F16_F8, VOPProfile_CVT_PK_F16_F8_true16, VOPProfile_CVT_PK_F16_F8_fake16, + int_amdgcn_cvt_pk_f16_bf8>; + } +} // End SubtargetPredicate = isGFX1250Plus + let SubtargetPredicate = isGFX10Plus in { defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT>; @@ -980,6 +1010,13 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name op, VOP1_Real_e32_with_name, VOP3_Real_with_name; +multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250< + bits<9> op, string asmName = !tolower(NAME), string opName = NAME> { + defm opName#"_t16" : + VOP1_Real_FULL_with_name; + defm opName#"_fake16": + VOP1_Real_FULL_with_name; +} defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name; defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name; @@ -1042,6 +1079,10 @@ defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064>; defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>; defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>; +defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">; +defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>; +defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>; + //===----------------------------------------------------------------------===// // GFX10. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 0b64b504466c8..1e47acb5fde4f 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1679,6 +1679,7 @@ class Base_VOP3_DPP8_t16 op, VOP_Pseudo ps, string opName = ps.OpName> let SchedRW = ps.SchedRW; let Uses = ps.Uses; + let SubtargetPredicate = ps.SubtargetPredicate; let OtherPredicates = ps.OtherPredicates; let True16Predicate = ps.True16Predicate; } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index b67161b060638..c106835bdf3a8 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -6186,7 +6186,7 @@ static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl &Results, Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0), Read.getValue(1))); - Results.push_back(Read.getOperand(0)); + Results.push_back(Read.getValue(2)); // Chain } /// \p BC is a bitcast that is about to be turned into a VMOVDRR. diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index 0a9b2bb99f7eb..71eb1349314ea 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -308,9 +308,8 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { NeedsTransform = true; } else if (AllocaInst *Alloca = dyn_cast(PtrOperand)) { Type *AllocatedType = Alloca->getAllocatedType(); - // OrigGEPType might just be a pointer lets make sure - // to add the allocated type so we have a size - if (AllocatedType != OrigGEPType) { + // Only transform if the allocated type is an array + if (AllocatedType != OrigGEPType && isa(AllocatedType)) { NewGEPType = AllocatedType; NeedsTransform = true; } diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp index cb58f4833631d..c8866bfefdfc5 100644 --- a/llvm/lib/Target/DirectX/DXILPrepare.cpp +++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp @@ -11,6 +11,7 @@ /// Language (DXIL). //===----------------------------------------------------------------------===// +#include "DXILRootSignature.h" #include "DXILShaderFlags.h" #include "DirectX.h" #include "DirectXIRPasses/PointerTypeAnalysis.h" @@ -286,12 +287,21 @@ class DXILPrepareModule : public ModulePass { } // Remove flags not for DXIL. cleanModuleFlags(M); + + // dx.rootsignatures will have been parsed from its metadata form as its + // binary form as part of the RootSignatureAnalysisWrapper, so safely + // remove it as it is not recognized in DXIL + if (NamedMDNode *RootSignature = M.getNamedMetadata("dx.rootsignatures")) + RootSignature->eraseFromParent(); + return true; } DXILPrepareModule() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -305,6 +315,7 @@ char DXILPrepareModule::ID = 0; INITIALIZE_PASS_BEGIN(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false, false) INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass) +INITIALIZE_PASS_DEPENDENCY(RootSignatureAnalysisWrapper) INITIALIZE_PASS_END(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false, false) diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index c5d176596d8c6..616640152c8d3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -319,3 +319,19 @@ def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>; let Predicates = [HasBasicD, IsLA64] in { def : PatFpr; } // Predicates = [HasBasicD, IsLA64] + +/// Pseudo-instructions needed for the soft-float ABI with LA32D + +let Predicates = [HasBasicD, IsLA32] in { +// Moves two GPRs to an FPR. +let usesCustomInserter = 1 in +def BuildPairF64Pseudo + : Pseudo<(outs FPR64:$dst), (ins GPR:$src1, GPR:$src2), + [(set FPR64:$dst, (loongarch_build_pair_f64 GPR:$src1, GPR:$src2))]>; + +// Moves an FPR to two GPRs. +let usesCustomInserter = 1 in +def SplitPairF64Pseudo + : Pseudo<(outs GPR:$dst1, GPR:$dst2), (ins FPR64:$src), + [(set GPR:$dst1, GPR:$dst2, (loongarch_split_pair_f64 FPR64:$src))]>; +} // Predicates = [HasBasicD, IsLA32] diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 6946ed554a7e5..cab1d83ddac4a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -169,6 +169,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + if (Subtarget.hasBasicD()) + setOperationAction(ISD::BITCAST, MVT::i64, Custom); } setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); @@ -2713,13 +2715,20 @@ SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); + EVT VT = Op.getValueType(); SDValue Op0 = Op.getOperand(0); + EVT Op0VT = Op0.getValueType(); - if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && + if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && Subtarget.hasBasicF()) { SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); } + if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32); + return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi); + } return Op; } @@ -4006,6 +4015,12 @@ void LoongArchTargetLowering::ReplaceNodeResults( SDValue Dst = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); + } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) { + SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL, + DAG.getVTList(MVT::i32, MVT::i32), Src); + SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, + NewReg.getValue(0), NewReg.getValue(1)); + Results.push_back(RetReg); } break; } @@ -5649,6 +5664,37 @@ static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue +performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + SDValue Op0 = N->getOperand(0); + SDLoc DL(N); + + // If the input to SplitPairF64 is just BuildPairF64 then the operation is + // redundant. Instead, use BuildPairF64's operands directly. + if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64) + return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); + + if (Op0->isUndef()) { + SDValue Lo = DAG.getUNDEF(MVT::i32); + SDValue Hi = DAG.getUNDEF(MVT::i32); + return DCI.CombineTo(N, Lo, Hi); + } + + // It's cheaper to materialise two 32-bit integers than to load a double + // from the constant pool and transfer it to integer registers through the + // stack. + if (ConstantFPSDNode *C = dyn_cast(Op0)) { + APInt V = C->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); + SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); + return DCI.CombineTo(N, Lo, Hi); + } + + return SDValue(); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -5676,6 +5722,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, case LoongArchISD::VMSKLTZ: case LoongArchISD::XVMSKLTZ: return performVMSKLTZCombine(N, DAG, DCI, Subtarget); + case LoongArchISD::SPLIT_PAIR_F64: + return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget); } return SDValue(); } @@ -6072,6 +6120,50 @@ emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, return BB; } +static MachineBasicBlock * +emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, + const LoongArchSubtarget &Subtarget) { + assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo && + "Unexpected instruction"); + + MachineFunction &MF = *BB->getParent(); + DebugLoc DL = MI.getDebugLoc(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + Register LoReg = MI.getOperand(0).getReg(); + Register HiReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); + + BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg); + BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg) + .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill())); + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +static MachineBasicBlock * +emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, + const LoongArchSubtarget &Subtarget) { + assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo && + "Unexpected instruction"); + + MachineFunction &MF = *BB->getParent(); + DebugLoc DL = MI.getDebugLoc(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass); + Register DstReg = MI.getOperand(0).getReg(); + Register LoReg = MI.getOperand(1).getReg(); + Register HiReg = MI.getOperand(2).getReg(); + + BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg) + .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())); + BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg) + .addReg(TmpReg, RegState::Kill) + .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())); + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + static bool isSelectPseudo(MachineInstr &MI) { switch (MI.getOpcode()) { default: @@ -6252,6 +6344,10 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( } case LoongArch::Select_GPR_Using_CC_GPR: return emitSelectPseudo(MI, BB, Subtarget); + case LoongArch::BuildPairF64Pseudo: + return emitBuildPairF64Pseudo(MI, BB, Subtarget); + case LoongArch::SplitPairF64Pseudo: + return emitSplitPairF64Pseudo(MI, BB, Subtarget); case LoongArch::PseudoVBZ: case LoongArch::PseudoVBZ_B: case LoongArch::PseudoVBZ_H: @@ -6348,6 +6444,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MOVGR2FR_W_LA64) NODE_NAME_CASE(MOVFR2GR_S_LA64) NODE_NAME_CASE(FTINT) + NODE_NAME_CASE(BUILD_PAIR_F64) + NODE_NAME_CASE(SPLIT_PAIR_F64) NODE_NAME_CASE(REVB_2H) NODE_NAME_CASE(REVB_2W) NODE_NAME_CASE(BITREV_4B) @@ -6527,21 +6625,6 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, break; } - // FPR32 and FPR64 alias each other. - if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) - UseGPRForFloat = true; - - if (UseGPRForFloat && ValVT == MVT::f32) { - LocVT = GRLenVT; - LocInfo = CCValAssign::BCvt; - } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { - LocVT = MVT::i64; - LocInfo = CCValAssign::BCvt; - } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { - // TODO: Handle passing f64 on LA32 with D feature. - report_fatal_error("Passing f64 with GPR on LA32 is undefined"); - } - // If this is a variadic argument, the LoongArch calling convention requires // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 // byte alignment. An aligned register should be used regardless of whether @@ -6564,6 +6647,45 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, assert(PendingLocs.size() == PendingArgFlags.size() && "PendingLocs and PendingArgFlags out of sync"); + // FPR32 and FPR64 alias each other. + if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) + UseGPRForFloat = true; + + if (UseGPRForFloat && ValVT == MVT::f32) { + LocVT = GRLenVT; + LocInfo = CCValAssign::BCvt; + } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { + LocVT = MVT::i64; + LocInfo = CCValAssign::BCvt; + } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { + // Handle passing f64 on LA32D with a soft float ABI or when floating point + // registers are exhausted. + assert(PendingLocs.empty() && "Can't lower f64 if it is split"); + // Depending on available argument GPRS, f64 may be passed in a pair of + // GPRs, split between a GPR and the stack, or passed completely on the + // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these + // cases. + MCRegister Reg = State.AllocateReg(ArgGPRs); + if (!Reg) { + int64_t StackOffset = State.AllocateStack(8, Align(8)); + State.addLoc( + CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); + return false; + } + LocVT = MVT::i32; + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + MCRegister HiReg = State.AllocateReg(ArgGPRs); + if (HiReg) { + State.addLoc( + CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo)); + } else { + int64_t StackOffset = State.AllocateStack(4, Align(4)); + State.addLoc( + CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); + } + return false; + } + // Split arguments might be passed indirectly, so keep track of the pending // values. if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { @@ -6764,6 +6886,38 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); } +static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, + const CCValAssign &VA, + const CCValAssign &HiVA, + const SDLoc &DL) { + assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && + "Unexpected VA"); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + assert(VA.isRegLoc() && "Expected register VA assignment"); + + Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass); + RegInfo.addLiveIn(VA.getLocReg(), LoVReg); + SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); + SDValue Hi; + if (HiVA.isMemLoc()) { + // Second half of f64 is passed on the stack. + int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(), + /*IsImmutable=*/true); + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, + MachinePointerInfo::getFixedStack(MF, FI)); + } else { + // Second half of f64 is passed in another GPR. + Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass); + RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg); + Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); + } + return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi); +} + static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL) { EVT LocVT = VA.getLocVT(); @@ -6861,11 +7015,16 @@ SDValue LoongArchTargetLowering::LowerFormalArguments( else analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { CCValAssign &VA = ArgLocs[i]; SDValue ArgValue; - if (VA.isRegLoc()) - ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this); + // Passing f64 on LA32D with a soft float ABI must be handled as a special + // case. + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { + assert(VA.needsCustom()); + ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL); + } else if (VA.isRegLoc()) + ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this); else ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); if (VA.getLocInfo() == CCValAssign::Indirect) { @@ -6873,17 +7032,18 @@ SDValue LoongArchTargetLowering::LowerFormalArguments( // load all parts of it here (using the same address). InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo())); - unsigned ArgIndex = Ins[i].OrigArgIndex; - unsigned ArgPartOffset = Ins[i].PartOffset; + unsigned ArgIndex = Ins[InsIdx].OrigArgIndex; + unsigned ArgPartOffset = Ins[InsIdx].PartOffset; assert(ArgPartOffset == 0); - while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { + while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) { CCValAssign &PartVA = ArgLocs[i + 1]; - unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; + unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset; SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, MachinePointerInfo())); ++i; + ++InsIdx; } continue; } @@ -7112,31 +7272,67 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVector> RegsToPass; SmallVector MemOpChains; SDValue StackPtr; - for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { + for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e; + ++i, ++OutIdx) { CCValAssign &VA = ArgLocs[i]; - SDValue ArgValue = OutVals[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; + SDValue ArgValue = OutVals[OutIdx]; + ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags; + + // Handle passing f64 on LA32D with a soft float ABI as a special case. + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { + assert(VA.isRegLoc() && "Expected register VA assignment"); + assert(VA.needsCustom()); + SDValue SplitF64 = + DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL, + DAG.getVTList(MVT::i32, MVT::i32), ArgValue); + SDValue Lo = SplitF64.getValue(0); + SDValue Hi = SplitF64.getValue(1); + + Register RegLo = VA.getLocReg(); + RegsToPass.push_back(std::make_pair(RegLo, Lo)); + + // Get the CCValAssign for the Hi part. + CCValAssign &HiVA = ArgLocs[++i]; + + if (HiVA.isMemLoc()) { + // Second half of f64 is passed on the stack. + if (!StackPtr.getNode()) + StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT); + SDValue Address = + DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, + DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL)); + // Emit the store. + MemOpChains.push_back(DAG.getStore( + Chain, DL, Hi, Address, + MachinePointerInfo::getStack(MF, HiVA.getLocMemOffset()))); + } else { + // Second half of f64 is passed in another GPR. + Register RegHigh = HiVA.getLocReg(); + RegsToPass.push_back(std::make_pair(RegHigh, Hi)); + } + continue; + } // Promote the value if needed. // For now, only handle fully promoted and indirect arguments. if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. Align StackAlign = - std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), + std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG), getPrefTypeAlign(ArgValue.getValueType(), DAG)); TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); // If the original argument was split and passed by reference, we need to // store the required parts of it here (and pass just one address). - unsigned ArgIndex = Outs[i].OrigArgIndex; - unsigned ArgPartOffset = Outs[i].PartOffset; + unsigned ArgIndex = Outs[OutIdx].OrigArgIndex; + unsigned ArgPartOffset = Outs[OutIdx].PartOffset; assert(ArgPartOffset == 0); // Calculate the total size to store. We don't have access to what we're // actually storing other than performing the loop and collecting the // info. SmallVector> Parts; - while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { - SDValue PartValue = OutVals[i + 1]; - unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; + while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) { + SDValue PartValue = OutVals[OutIdx + 1]; + unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset; SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); EVT PartVT = PartValue.getValueType(); @@ -7144,6 +7340,7 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); Parts.push_back(std::make_pair(PartValue, Offset)); ++i; + ++OutIdx; } SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); int FI = cast(SpillSlot)->getIndex(); @@ -7279,7 +7476,8 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch); // Copy all of the result registers out of their specified physreg. - for (auto &VA : RVLocs) { + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + auto &VA = RVLocs[i]; // Copy the value out. SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); @@ -7287,7 +7485,16 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = RetValue.getValue(1); Glue = RetValue.getValue(2); - RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { + assert(VA.needsCustom()); + SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(), + MVT::i32, Glue); + Chain = RetValue2.getValue(1); + Glue = RetValue2.getValue(2); + RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, + RetValue, RetValue2); + } else + RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); InVals.push_back(RetValue); } @@ -7333,17 +7540,37 @@ SDValue LoongArchTargetLowering::LowerReturn( SmallVector RetOps(1, Chain); // Copy the result values into the output registers. - for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { + for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) { + SDValue Val = OutVals[OutIdx]; CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - // Handle a 'normal' return. - SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL); - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { + // Handle returning f64 on LA32D with a soft float ABI. + assert(VA.isRegLoc() && "Expected return via registers"); + assert(VA.needsCustom()); + SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL, + DAG.getVTList(MVT::i32, MVT::i32), Val); + SDValue Lo = SplitF64.getValue(0); + SDValue Hi = SplitF64.getValue(1); + Register RegLo = VA.getLocReg(); + Register RegHi = RVLocs[++i].getLocReg(); + + Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); + Glue = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); + Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); + Glue = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); + } else { + // Handle a 'normal' return. + Val = convertValVTToLocVT(DAG, Val, VA, DL); + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); - // Guarantee that all emitted copies are stuck together. - Glue = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + // Guarantee that all emitted copies are stuck together. + Glue = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } } RetOps[0] = Chain; // Update chain. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 79aa89726191b..60dc2b385a75c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -60,6 +60,10 @@ enum NodeType : unsigned { FTINT, + // Build and split F64 pair + BUILD_PAIR_F64, + SPLIT_PAIR_F64, + // Bit counting operations CLZ_W, CTZ_W, diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index b6552ed33f5b1..2b94e65cac0e5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -58,6 +58,13 @@ def SDT_LoongArchMovgr2fcsr : SDTypeProfile<0, 2, [SDTCisVT<0, GRLenVT>, def SDT_LoongArchMovfcsr2gr : SDTypeProfile<1, 1, [SDTCisVT<0, GRLenVT>, SDTCisSameAs<0, 1>]>; +def SDT_LoongArchBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, + SDTCisVT<1, i32>, + SDTCisSameAs<1, 2>]>; +def SDT_LoongArchSplitPairF64 : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisVT<2, f64>]>; + // TODO: Add LoongArch specific DAG Nodes // Target-independent nodes, but with target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, @@ -165,6 +172,11 @@ def loongarch_iocsrwr_d : SDNode<"LoongArchISD::IOCSRWR_D", def loongarch_cpucfg : SDNode<"LoongArchISD::CPUCFG", SDTUnaryOp, [SDNPHasChain]>; +def loongarch_build_pair_f64 : SDNode<"LoongArchISD::BUILD_PAIR_F64", + SDT_LoongArchBuildPairF64>; +def loongarch_split_pair_f64 : SDNode<"LoongArchISD::SPLIT_PAIR_F64", + SDT_LoongArchSplitPairF64>; + def to_fclass_mask: SDNodeXFormgetZExtValue(); unsigned Mask = 0; diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp index cc79257fb9c86..28f6968ee6caf 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp @@ -457,3 +457,25 @@ void NVPTXInstPrinter::printCTAGroup(const MCInst *MI, int OpNum, } llvm_unreachable("Invalid cta_group in printCTAGroup"); } + +void NVPTXInstPrinter::printCallOperand(const MCInst *MI, int OpNum, + raw_ostream &O, StringRef Modifier) { + const MCOperand &MO = MI->getOperand(OpNum); + assert(MO.isImm() && "Invalid operand"); + const auto Imm = MO.getImm(); + + if (Modifier == "RetList") { + assert((Imm == 1 || Imm == 0) && "Invalid return list"); + if (Imm) + O << " (retval0),"; + return; + } + + if (Modifier == "ParamList") { + assert(Imm >= 0 && "Invalid parameter list"); + interleaveComma(llvm::seq(Imm), O, + [&](const auto &I) { O << "param" << I; }); + return; + } + llvm_unreachable("Invalid modifier"); +} diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h index f73af7a3f2c6e..6189284e8a58c 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h @@ -52,6 +52,8 @@ class NVPTXInstPrinter : public MCInstPrinter { void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O); void printTmaReductionMode(const MCInst *MI, int OpNum, raw_ostream &O); void printCTAGroup(const MCInst *MI, int OpNum, raw_ostream &O); + void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O, + StringRef Modifier = {}); }; } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index ff10eea371049..61fe8a53cb63a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -160,15 +160,9 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) { case NVPTXISD::StoreParam: case NVPTXISD::StoreParamV2: case NVPTXISD::StoreParamV4: - case NVPTXISD::StoreParamS32: - case NVPTXISD::StoreParamU32: if (tryStoreParam(N)) return; break; - case ISD::INTRINSIC_WO_CHAIN: - if (tryIntrinsicNoChain(N)) - return; - break; case ISD::INTRINSIC_W_CHAIN: if (tryIntrinsicChain(N)) return; @@ -904,25 +898,6 @@ NVPTXDAGToDAGISel::insertMemoryInstructionFence(SDLoc DL, SDValue &Chain, return {InstructionOrdering, Scope}; } -bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { - unsigned IID = N->getConstantOperandVal(0); - switch (IID) { - default: - return false; - case Intrinsic::nvvm_texsurf_handle_internal: - SelectTexSurfHandle(N); - return true; - } -} - -void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { - // Op 0 is the intrinsic ID - SDValue Wrapper = N->getOperand(1); - SDValue GlobalVal = Wrapper.getOperand(0); - ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), - MVT::i64, GlobalVal)); -} - void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { SDValue Src = N->getOperand(0); AddrSpaceCastSDNode *CastN = cast(N); @@ -1717,8 +1692,6 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) { switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode"); - case NVPTXISD::StoreParamU32: - case NVPTXISD::StoreParamS32: case NVPTXISD::StoreParam: NumElts = 1; break; @@ -1796,27 +1769,6 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) { } } break; - // Special case: if we have a sign-extend/zero-extend node, insert the - // conversion instruction first, and use that as the value operand to - // the selected StoreParam node. - case NVPTXISD::StoreParamU32: { - Opcode = NVPTX::StoreParamI32_r; - SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, - MVT::i32); - SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, - MVT::i32, Ops[0], CvtNone); - Ops[0] = SDValue(Cvt, 0); - break; - } - case NVPTXISD::StoreParamS32: { - Opcode = NVPTX::StoreParamI32_r; - SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, - MVT::i32); - SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, - MVT::i32, Ops[0], CvtNone); - Ops[0] = SDValue(Cvt, 0); - break; - } } SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); @@ -2105,22 +2057,14 @@ static inline bool isAddLike(const SDValue V) { // selectBaseADDR - Match a dag node which will serve as the base address for an // ADDR operand pair. static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) { - // Return true if TGA or ES. - if (N.getOpcode() == ISD::TargetGlobalAddress || - N.getOpcode() == ISD::TargetExternalSymbol) - return N; - - if (N.getOpcode() == NVPTXISD::Wrapper) - return N.getOperand(0); - - // addrspacecast(Wrapper(arg_symbol) to addrspace(PARAM)) -> arg_symbol - if (AddrSpaceCastSDNode *CastN = dyn_cast(N)) - if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC && - CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM && - CastN->getOperand(0).getOpcode() == NVPTXISD::Wrapper) - return selectBaseADDR(CastN->getOperand(0).getOperand(0), DAG); - - if (auto *FIN = dyn_cast(N)) + if (const auto *GA = dyn_cast(N)) + return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), + GA->getValueType(0), GA->getOffset(), + GA->getTargetFlags()); + if (const auto *ES = dyn_cast(N)) + return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), + ES->getTargetFlags()); + if (const auto *FIN = dyn_cast(N)) return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0)); return N; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index ff58e4486a222..92b5c773258ed 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -69,7 +69,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { #include "NVPTXGenDAGISel.inc" void Select(SDNode *N) override; - bool tryIntrinsicNoChain(SDNode *N); bool tryIntrinsicChain(SDNode *N); bool tryIntrinsicVoid(SDNode *N); void SelectTexSurfHandle(SDNode *N); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index d2fafe854e9e4..b924a1f5ac93c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -702,9 +702,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::BR_JT, MVT::Other, Custom); setOperationAction(ISD::BRIND, MVT::Other, Expand); - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - // We want to legalize constant related memmove and memcopy // intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -1055,45 +1052,24 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::FIRST_NUMBER: break; - MAKE_CASE(NVPTXISD::CALL) MAKE_CASE(NVPTXISD::RET_GLUE) - MAKE_CASE(NVPTXISD::LOAD_PARAM) - MAKE_CASE(NVPTXISD::Wrapper) MAKE_CASE(NVPTXISD::DeclareParam) MAKE_CASE(NVPTXISD::DeclareScalarParam) MAKE_CASE(NVPTXISD::DeclareRet) - MAKE_CASE(NVPTXISD::DeclareScalarRet) MAKE_CASE(NVPTXISD::DeclareRetParam) - MAKE_CASE(NVPTXISD::PrintCall) - MAKE_CASE(NVPTXISD::PrintConvergentCall) - MAKE_CASE(NVPTXISD::PrintCallUni) - MAKE_CASE(NVPTXISD::PrintConvergentCallUni) + MAKE_CASE(NVPTXISD::CALL) MAKE_CASE(NVPTXISD::LoadParam) MAKE_CASE(NVPTXISD::LoadParamV2) MAKE_CASE(NVPTXISD::LoadParamV4) MAKE_CASE(NVPTXISD::StoreParam) MAKE_CASE(NVPTXISD::StoreParamV2) MAKE_CASE(NVPTXISD::StoreParamV4) - MAKE_CASE(NVPTXISD::StoreParamS32) - MAKE_CASE(NVPTXISD::StoreParamU32) - MAKE_CASE(NVPTXISD::CallArgBegin) - MAKE_CASE(NVPTXISD::CallArg) - MAKE_CASE(NVPTXISD::LastCallArg) - MAKE_CASE(NVPTXISD::CallArgEnd) - MAKE_CASE(NVPTXISD::CallVoid) - MAKE_CASE(NVPTXISD::CallVal) - MAKE_CASE(NVPTXISD::CallSymbol) - MAKE_CASE(NVPTXISD::Prototype) MAKE_CASE(NVPTXISD::MoveParam) MAKE_CASE(NVPTXISD::StoreRetval) MAKE_CASE(NVPTXISD::StoreRetvalV2) MAKE_CASE(NVPTXISD::StoreRetvalV4) - MAKE_CASE(NVPTXISD::PseudoUseParam) MAKE_CASE(NVPTXISD::UNPACK_VECTOR) MAKE_CASE(NVPTXISD::BUILD_VECTOR) - MAKE_CASE(NVPTXISD::RETURN) - MAKE_CASE(NVPTXISD::CallSeqBegin) - MAKE_CASE(NVPTXISD::CallSeqEnd) MAKE_CASE(NVPTXISD::CallPrototype) MAKE_CASE(NVPTXISD::ProxyReg) MAKE_CASE(NVPTXISD::LoadV2) @@ -1115,7 +1091,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(NVPTXISD::STACKSAVE) MAKE_CASE(NVPTXISD::SETP_F16X2) MAKE_CASE(NVPTXISD::SETP_BF16X2) - MAKE_CASE(NVPTXISD::Dummy) MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED) MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED) MAKE_CASE(NVPTXISD::BrxEnd) @@ -1189,15 +1164,6 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, } } -SDValue -NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - const GlobalAddressSDNode *GAN = cast(Op); - auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace()); - Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT); - return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op); -} - std::string NVPTXTargetLowering::getPrototype( const DataLayout &DL, Type *retTy, const ArgListTy &Args, const SmallVectorImpl &Outs, MaybeAlign RetAlign, @@ -1601,9 +1567,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ? promoteScalarArgumentSize(TypeSize * 8) : TypeSize * 8; - Chain = DAG.getNode( - NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue}, - {Chain, GetI32(ArgI), GetI32(PromotedSize), GetI32(0), InGlue}); + Chain = + DAG.getNode(NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue}, + {Chain, GetI32(ArgI), GetI32(PromotedSize), InGlue}); } InGlue = Chain.getValue(1); @@ -1740,16 +1706,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const unsigned ResultSize = DL.getTypeAllocSizeInBits(RetTy); if (!shouldPassAsArray(RetTy)) { const unsigned PromotedResultSize = promoteScalarArgumentSize(ResultSize); - SDValue DeclareRetOps[] = {Chain, GetI32(1), GetI32(PromotedResultSize), - GetI32(0), InGlue}; Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, {MVT::Other, MVT::Glue}, - DeclareRetOps); + {Chain, GetI32(PromotedResultSize), InGlue}); InGlue = Chain.getValue(1); } else { - SDValue DeclareRetOps[] = {Chain, GetI32(RetAlign->value()), - GetI32(ResultSize / 8), GetI32(0), InGlue}; - Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, - {MVT::Other, MVT::Glue}, DeclareRetOps); + Chain = DAG.getNode( + NVPTXISD::DeclareRetParam, dl, {MVT::Other, MVT::Glue}, + {Chain, GetI32(RetAlign->value()), GetI32(ResultSize / 8), InGlue}); InGlue = Chain.getValue(1); } } @@ -1800,25 +1763,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, HasVAArgs ? std::optional(FirstVAArg) : std::nullopt, *CB, UniqueCallSite); const char *ProtoStr = nvTM->getStrPool().save(Proto).data(); - SDValue ProtoOps[] = { - Chain, - DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), - InGlue, - }; - Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, {MVT::Other, MVT::Glue}, - ProtoOps); + Chain = DAG.getNode( + NVPTXISD::CallPrototype, dl, {MVT::Other, MVT::Glue}, + {Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InGlue}); InGlue = Chain.getValue(1); } - // Op to just print "call" - SDValue PrintCallOps[] = {Chain, GetI32(Ins.empty() ? 0 : 1), InGlue}; - // We model convergent calls as separate opcodes. - unsigned Opcode = - IsIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni; - if (CLI.IsConvergent) - Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni - : NVPTXISD::PrintConvergentCall; - Chain = DAG.getNode(Opcode, dl, {MVT::Other, MVT::Glue}, PrintCallOps); - InGlue = Chain.getValue(1); if (ConvertToIndirectCall) { // Copy the function ptr to a ptx register and use the register to call the @@ -1832,38 +1781,17 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Callee = DAG.getCopyFromReg(RegCopy, dl, DestReg, DestVT); } - // Ops to print out the function name - SDValue CallVoidOps[] = { Chain, Callee, InGlue }; - Chain = - DAG.getNode(NVPTXISD::CallVoid, dl, {MVT::Other, MVT::Glue}, CallVoidOps); - InGlue = Chain.getValue(1); - - // Ops to print out the param list - SDValue CallArgBeginOps[] = { Chain, InGlue }; - Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, {MVT::Other, MVT::Glue}, - CallArgBeginOps); + const unsigned Proto = IsIndirectCall ? UniqueCallSite : 0; + const unsigned NumArgs = + std::min(CLI.NumFixedArgs + 1, Args.size()); + /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns, + /// NumParams, Callee, Proto, InGlue) + Chain = DAG.getNode(NVPTXISD::CALL, dl, {MVT::Other, MVT::Glue}, + {Chain, GetI32(CLI.IsConvergent), GetI32(IsIndirectCall), + GetI32(Ins.empty() ? 0 : 1), GetI32(NumArgs), Callee, + GetI32(Proto), InGlue}); InGlue = Chain.getValue(1); - const unsigned E = std::min(CLI.NumFixedArgs + 1, Args.size()); - for (const unsigned I : llvm::seq(E)) { - const unsigned Opcode = - I == (E - 1) ? NVPTXISD::LastCallArg : NVPTXISD::CallArg; - SDValue CallArgOps[] = {Chain, GetI32(1), GetI32(I), InGlue}; - Chain = DAG.getNode(Opcode, dl, {MVT::Other, MVT::Glue}, CallArgOps); - InGlue = Chain.getValue(1); - } - SDValue CallArgEndOps[] = {Chain, GetI32(IsIndirectCall ? 0 : 1), InGlue}; - Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, {MVT::Other, MVT::Glue}, - CallArgEndOps); - InGlue = Chain.getValue(1); - - if (IsIndirectCall) { - SDValue PrototypeOps[] = {Chain, GetI32(UniqueCallSite), InGlue}; - Chain = DAG.getNode(NVPTXISD::Prototype, dl, {MVT::Other, MVT::Glue}, - PrototypeOps); - InGlue = Chain.getValue(1); - } - SmallVector ProxyRegOps; // An item of the vector is filled if the element does not need a ProxyReg // operation on it and should be added to InVals as is. ProxyRegOps and @@ -2919,8 +2847,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return SDValue(); case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG); - case ISD::GlobalAddress: - return LowerGlobalAddress(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return Op; case ISD::INTRINSIC_WO_CHAIN: @@ -3129,8 +3055,7 @@ SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout()); // Store the address of unsized array _vararg[] in the ap object. - SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT); - SDValue VAReg = DAG.getNode(NVPTXISD::Wrapper, DL, PtrVT, Arg); + SDValue VAReg = getParamSymbol(DAG, /* vararg */ -1, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, VAReg, Op.getOperand(1), @@ -3370,7 +3295,7 @@ SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { StringRef SavedStr = nvTM->getStrPool().save( getParamName(&DAG.getMachineFunction().getFunction(), idx)); - return DAG.getTargetExternalSymbol(SavedStr.data(), v); + return DAG.getExternalSymbol(SavedStr.data(), v); } SDValue NVPTXTargetLowering::LowerFormalArguments( @@ -3438,7 +3363,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( SDValue P; if (isKernelFunction(*F)) { - P = DAG.getNode(NVPTXISD::Wrapper, dl, ByvalIn.VT, ArgSymbol); + P = ArgSymbol; P.getNode()->setIROrder(Arg.getArgNo() + 1); } else { P = DAG.getNode(NVPTXISD::MoveParam, dl, ByvalIn.VT, ArgSymbol); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 0a54a8fd71f32..5efdd1582214a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -24,32 +24,19 @@ namespace NVPTXISD { enum NodeType : unsigned { // Start the numbering from where ISD NodeType finishes. FIRST_NUMBER = ISD::BUILTIN_OP_END, - Wrapper, - CALL, RET_GLUE, - LOAD_PARAM, DeclareParam, DeclareScalarParam, DeclareRetParam, DeclareRet, - DeclareScalarRet, - PrintCall, - PrintConvergentCall, - PrintCallUni, - PrintConvergentCallUni, - CallArgBegin, - CallArg, - LastCallArg, - CallArgEnd, - CallVoid, - CallVal, - CallSymbol, - Prototype, + + /// This node represents a PTX call instruction. It's operands are as follows: + /// + /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns, + /// NumParams, Callee, Proto, InGlue) + CALL, + MoveParam, - PseudoUseParam, - RETURN, - CallSeqBegin, - CallSeqEnd, CallPrototype, ProxyReg, FSHL_CLAMP, @@ -83,7 +70,6 @@ enum NodeType : unsigned { CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X, CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y, CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z, - Dummy, FIRST_MEMORY_OPCODE, LoadV2 = FIRST_MEMORY_OPCODE, @@ -100,8 +86,6 @@ enum NodeType : unsigned { StoreParam, StoreParamV2, StoreParamV4, - StoreParamS32, // to sext and store a <32bit value, not used currently - StoreParamU32, // to zext and store a <32bit value, not used currently StoreRetval, StoreRetvalV2, StoreRetvalV4, @@ -120,8 +104,6 @@ class NVPTXTargetLowering : public TargetLowering { const NVPTXSubtarget &STI); SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - const char *getTargetNodeName(unsigned Opcode) const override; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index bf84d1dca4ed5..e218ef17bb09b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -190,22 +190,4 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB); BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB); return 2; -} - -bool NVPTXInstrInfo::isSchedulingBoundary(const MachineInstr &MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const { - // Prevent the scheduler from reordering & splitting up MachineInstrs - // which must stick together (in initially set order) to - // comprise a valid PTX function call sequence. - switch (MI.getOpcode()) { - case NVPTX::CallUniPrintCallRetInst1: - case NVPTX::CallArgBeginInst: - case NVPTX::CallArgParam: - case NVPTX::LastCallArgParam: - case NVPTX::CallArgEndInst1: - return true; - } - - return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); -} +} \ No newline at end of file diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h index 95464dbbd176d..4e9dc9d3b4686 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -66,9 +66,6 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo { MachineBasicBlock *FBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded = nullptr) const override; - bool isSchedulingBoundary(const MachineInstr &MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 5979054764647..1ea6d98a1df8e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1700,17 +1700,6 @@ def Offseti32imm : Operand { let PrintMethod = "printOffseti32imm"; } -def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; -def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; - -// Load a memory address into a u32 or u64 register. -def MOV_ADDR : BasicNVPTXInst<(outs B32:$dst), (ins ADDR_base:$a), - "mov.b32", - [(set i32:$dst, (Wrapper tglobaladdr:$a))]>; -def MOV_ADDR64 : BasicNVPTXInst<(outs B64:$dst), (ins ADDR_base:$a), - "mov.b64", - [(set i64:$dst, (Wrapper tglobaladdr:$a))]>; - // Get pointer to local stack. let hasSideEffects = false in { def MOV_DEPOT_ADDR : NVPTXInst<(outs B32:$d), (ins i32imm:$num), @@ -1750,8 +1739,27 @@ def BFMOV16i : MOVi; def FMOV32i : MOVi; def FMOV64i : MOVi; -def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32i texternalsym:$dst)>; -def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64i texternalsym:$dst)>; + +def to_tglobaladdr : SDNodeXFormgetTargetGlobalAddress(N->getGlobal(), SDLoc(N), + N->getValueType(0), N->getOffset(), + N->getTargetFlags()); +}]>; + +def to_texternsym : SDNodeXFormgetTargetExternalSymbol(N->getSymbol(), N->getValueType(0), + N->getTargetFlags()); +}]>; + +def to_tframeindex : SDNodeXFormgetTargetFrameIndex(N->getIndex(), N->getValueType(0)); +}]>; + +def : Pat<(i32 globaladdr:$dst), (IMOV32i (to_tglobaladdr $dst))>; +def : Pat<(i64 globaladdr:$dst), (IMOV64i (to_tglobaladdr $dst))>; + +def : Pat<(i32 externalsym:$dst), (IMOV32i (to_texternsym $dst))>; +def : Pat<(i64 externalsym:$dst), (IMOV64i (to_texternsym $dst))>; //---- Copy Frame Index ---- def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr), @@ -1759,10 +1767,6 @@ def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr), def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr), "add.u64 \t$dst, ${addr:add};", []>; -def to_tframeindex : SDNodeXFormgetTargetFrameIndex(N->getIndex(), N->getValueType(0)); -}]>; - def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>; def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>; @@ -1975,26 +1979,19 @@ defm FSetNUM : FSET_FORMAT; defm FSetNAN : FSET_FORMAT; def SDTDeclareParamProfile : - SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; + SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; def SDTDeclareScalarParamProfile : - SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; + SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>; def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>; -def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>; def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>; -def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; -def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; -def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; -def SDTCallValProfile : SDTypeProfile<1, 0, []>; -def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>; +def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>; def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; -def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; def SDTProxyRegProfile : SDTypeProfile<1, 1, []>; def DeclareParam : @@ -2004,10 +2001,12 @@ def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def DeclareRetParam : - SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile, + SDNode<"NVPTXISD::DeclareRetParam", + SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def DeclareRet : - SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, + SDNode<"NVPTXISD::DeclareRet", + SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, @@ -2018,18 +2017,6 @@ def LoadParamV2 : def LoadParamV4 : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile, [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; -def PrintCall : - SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def PrintConvergentCall : - SDNode<"NVPTXISD::PrintConvergentCall", SDTPrintCallProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def PrintCallUni : - SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def PrintConvergentCallUni : - SDNode<"NVPTXISD::PrintConvergentCallUni", SDTPrintCallProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; @@ -2039,33 +2026,6 @@ def StoreParamV2 : def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParamU32 : - SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParamS32 : - SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallArgBegin : - SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallArg : - SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def LastCallArg : - SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallArgEnd : - SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallVoid : - SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def Prototype : - SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallVal : - SDNode<"NVPTXISD::CallVal", SDTCallValProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>; def StoreRetval : @@ -2077,16 +2037,19 @@ def StoreRetvalV2 : def StoreRetvalV4 : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, [SDNPHasChain, SDNPSideEffect]>; -def PseudoUseParam : - SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def RETURNNode : - SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, - [SDNPHasChain, SDNPSideEffect]>; def ProxyReg : SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; + /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns, + /// NumParams, Callee, Proto, InGlue) +def SDTCallProfile : SDTypeProfile<0, 6, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, + SDTCisVT<3, i32>, SDTCisVT<5, i32>]>; +def call : + SDNode<"NVPTXISD::CALL", SDTCallProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; + let mayLoad = true in { class LoadParamMemInst : NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b), @@ -2107,11 +2070,6 @@ let mayLoad = true in { []>; } -class LoadParamRegInst : - NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), - !strconcat("mov", opstr, " \t$dst, retval$b;"), - [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; - let mayStore = true in { multiclass StoreParamInst { @@ -2174,23 +2132,42 @@ let mayStore = true in { []>; } -let isCall=1 in { - multiclass CALL { - def PrintCallNoRetInst : NVPTXInst<(outs), (ins), - OpcStr # " ", [(OpNode 0)]>; - def PrintCallRetInst1 : NVPTXInst<(outs), (ins), - OpcStr # " (retval0), ", [(OpNode 1)]>; +/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns, +/// NumParams, Callee, Proto, InGlue) + +def CallOperand : Operand { let PrintMethod = "printCallOperand"; } + +foreach is_convergent = [0, 1] in { + defvar convergent_suffix = !if(is_convergent, "_conv", ""); + + let isCall = 1, isConvergent = is_convergent in { + def CALL # convergent_suffix : + NVPTXInst<(outs), + (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params, + i32imm:$proto), + "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", []>; + + def CALL_UNI # convergent_suffix : + NVPTXInst<(outs), + (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params), + "call.uni${rets:RetList} $addr, (${params:ParamList});", []>; } -} -defm Call : CALL<"call", PrintCall>; -defm CallUni : CALL<"call.uni", PrintCallUni>; + defvar call_inst = !cast("CALL" # convergent_suffix); + def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, globaladdr:$addr, imm:$proto), + (call_inst (to_tglobaladdr $addr), imm:$rets, imm:$params, imm:$proto)>; + def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i32:$addr, imm:$proto), + (call_inst $addr, imm:$rets, imm:$params, imm:$proto)>; + def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i64:$addr, imm:$proto), + (call_inst $addr, imm:$rets, imm:$params, imm:$proto)>; -// Convergent call instructions. These are identical to regular calls, except -// they have the isConvergent bit set. -let isConvergent=1 in { - defm ConvergentCall : CALL<"call", PrintConvergentCall>; - defm ConvergentCallUni : CALL<"call.uni", PrintConvergentCallUni>; + defvar call_uni_inst = !cast("CALL_UNI" # convergent_suffix); + def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, globaladdr:$addr, 0), + (call_uni_inst (to_tglobaladdr $addr), imm:$rets, imm:$params)>; + def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i32:$addr, 0), + (call_uni_inst $addr, imm:$rets, imm:$params)>; + def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i64:$addr, 0), + (call_uni_inst $addr, imm:$rets, imm:$params)>; } def LoadParamMemI64 : LoadParamMemInst; @@ -2244,69 +2221,30 @@ def StoreRetvalV4I32 : StoreRetvalV4Inst; def StoreRetvalV4I16 : StoreRetvalV4Inst; def StoreRetvalV4I8 : StoreRetvalV4Inst; -def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; -def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; -def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; -def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; - -def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", - [(CallArg 1, imm:$a)]>; -def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", - [(LastCallArg 1, imm:$a)]>; - -def CallVoidInst : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ", - [(CallVoid (Wrapper tglobaladdr:$addr))]>; -def CallVoidInstReg : NVPTXInst<(outs), (ins B32:$addr), "$addr, ", - [(CallVoid i32:$addr)]>; -def CallVoidInstReg64 : NVPTXInst<(outs), (ins B64:$addr), "$addr, ", - [(CallVoid i64:$addr)]>; -def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;", - [(Prototype (i32 imm:$val))]>; - def DeclareRetMemInst : - NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num), - ".param .align $align .b8 retval$num[$size];", - [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; + NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size), + ".param .align $align .b8 retval0[$size];", + [(DeclareRetParam imm:$align, imm:$size)]>; def DeclareRetScalarInst : - NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), - ".param .b$size retval$num;", - [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; -def DeclareRetRegInst : - NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), - ".reg .b$size retval$num;", - [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; + NVPTXInst<(outs), (ins i32imm:$size), + ".param .b$size retval0;", + [(DeclareRet imm:$size)]>; def DeclareParamInst : NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size), ".param .align $align .b8 param$a[$size];", - [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; + [(DeclareParam imm:$align, imm:$a, imm:$size)]>; def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), ".param .b$size param$a;", - [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; -def DeclareScalarRegInst : - NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), - ".reg .b$size param$a;", - [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; - -class MoveParamSymbolInst : - BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src), - "mov.b" # t.Size, - [(set t.Ty:$dst, (MoveParam texternalsym:$src))]>; - -def MOV64_PARAM : MoveParamSymbolInst; -def MOV32_PARAM : MoveParamSymbolInst; - -class PseudoUseParamInst : - NVPTXInst<(outs), (ins regclass:$src), - "// Pseudo use of $src", - [(PseudoUseParam vt:$src)]>; + [(DeclareScalarParam imm:$a, imm:$size)]>; -def PseudoUseParamI64 : PseudoUseParamInst; -def PseudoUseParamI32 : PseudoUseParamInst; -def PseudoUseParamI16 : PseudoUseParamInst; -def PseudoUseParamF64 : PseudoUseParamInst; -def PseudoUseParamF32 : PseudoUseParamInst; +foreach t = [I32RT, I64RT] in { + defvar inst_name = "MOV" # t.Size # "_PARAM"; + def inst_name : BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src), "mov.b" # t.Size>; + def : Pat<(MoveParam (t.Ty externalsym:$src)), + (!cast(inst_name) (t.Ty (to_texternsym $src)))>; +} multiclass ProxyRegInst { def NAME : BasicNVPTXInst<(outs rc:$dst), (ins rc:$src), @@ -2861,21 +2799,6 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPSideEffect]>; -def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; -def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def calltarget : Operand; -let isCall=1 in { - def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>; -} - -def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>; -def : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>; - -// Pseudo instructions. -class Pseudo pattern> - : NVPTXInst; - def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), "\\{ // callseq $amt1, $amt2", diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 10d7f04d8d937..cc1fd027d8515 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2224,6 +2224,8 @@ def nvvm_move_sym64 : NVPTXInst<(outs B64:$r), (ins ADDR_base:$s), def texsurf_handles : BasicNVPTXInst<(outs B64:$result), (ins ADDR_base:$src), "mov.u64">; +def : Pat<(int_nvvm_texsurf_handle_internal globaladdr:$src), + (texsurf_handles (to_tglobaladdr $src))>; //----------------------------------- // Compiler Error Warn diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index f921032356d65..415164fc9e2cb 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4961,6 +4961,21 @@ bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { // If this is just a masked value where the input is not handled, and // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) { + // The result of LBARX/LHARX do not need to be cleared as the instructions + // implicitly clear the upper bits. + unsigned AlreadyCleared = 0; + if (Val.getOpcode() == ISD::INTRINSIC_W_CHAIN) { + auto IntrinsicID = Val.getConstantOperandVal(1); + if (IntrinsicID == Intrinsic::ppc_lbarx) + AlreadyCleared = 24; + else if (IntrinsicID == Intrinsic::ppc_lharx) + AlreadyCleared = 16; + if (AlreadyCleared != 0 && AlreadyCleared == MB && ME == 31) { + ReplaceUses(SDValue(N, 0), N->getOperand(0)); + return true; + } + } + SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl), getI32Imm(ME, dl)}; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 386c94a324996..24287a95ecb05 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1617,10 +1617,14 @@ class VX_VT5_EO5_VB5_XO9_o eo, bits<9> xo, string opc, } // Decimal Convert From/to National/Zoned/Signed-QWord -def BCDCFN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<7, 385, "bcdcfn." , []>; -def BCDCFZ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<6, 385, "bcdcfz." , []>; -def BCDCTN_rec : VX_VT5_EO5_VB5_XO9_o <5, 385, "bcdctn." , []>; -def BCDCTZ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<4, 385, "bcdctz." , []>; +def BCDCFN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<7, 385, "bcdcfn." , + [(set v16i8:$VD, (int_ppc_national2packed v16i8:$VB, timm:$PS))]>; +def BCDCFZ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<6, 385, "bcdcfz." , + [(set v16i8:$VD, (int_ppc_zoned2packed v16i8:$VB, timm:$PS))]>; +def BCDCTN_rec : VX_VT5_EO5_VB5_XO9_o <5, 385, "bcdctn." , + [(set v16i8:$VD, (int_ppc_packed2national v16i8:$VB))]>; +def BCDCTZ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<4, 385, "bcdctz." , + [(set v16i8:$VD, (int_ppc_packed2zoned v16i8:$VB, timm:$PS))]>; def BCDCFSQ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<2, 385, "bcdcfsq.", []>; def BCDCTSQ_rec : VX_VT5_EO5_VB5_XO9_o <0, 385, "bcdctsq.", []>; diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 0e59861b8a786..b078b9268c984 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -292,7 +292,7 @@ void RISCVAsmPrinter::emitNTLHint(const MachineInstr *MI) { NontemporalMode += 0b10; MCInst Hint; - if (STI->hasStdExtZca() && STI->enableRVCHintInstrs()) + if (STI->hasStdExtZca()) Hint.setOpcode(RISCV::C_ADD_HINT); else Hint.setOpcode(RISCV::ADD); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 36b3aff51cda9..dc80432fcb738 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -373,13 +373,6 @@ def HasStdExtZhinx : Predicate<"Subtarget->hasStdExtZhinx()">, def NoStdExtZhinx : Predicate<"!Subtarget->hasStdExtZhinx()">; // Compressed Extensions -def FeatureNoRVCHints - : SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false", - "Disable RVC Hint Instructions.">; -def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">, - AssemblerPredicate<(all_of(not FeatureNoRVCHints)), - "RVC Hint Instructions">; - def FeatureStdExtZca : RISCVExtension<1, 0, "part of the C extension, excluding compressed " diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 712f6154732a2..90376b375e275 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -756,8 +756,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT, - ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE, - ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM, + ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT}; static const unsigned IntegerVecReduceOps[] = { @@ -1112,6 +1111,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); setOperationAction(FloatingPointVPOps, VT, Custom); @@ -1420,6 +1421,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_REVERSE, ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS}, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); setOperationAction({ISD::VECTOR_INTERLEAVE, ISD::VECTOR_DEINTERLEAVE}, VT, Custom); @@ -13241,6 +13244,8 @@ SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op, SDValue RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const { + using namespace SDPatternMatch; + SDLoc DL(Op); SDValue Op1 = Op.getOperand(0); @@ -13285,6 +13290,42 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2); } + auto getVectorFirstEle = [](SDValue Vec) { + SDValue FirstEle; + if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero()))) + return FirstEle; + + if (Vec.getOpcode() == ISD::SPLAT_VECTOR || + Vec.getOpcode() == ISD::BUILD_VECTOR) + return Vec.getOperand(0); + + return SDValue(); + }; + + if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1)) + if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) { + MVT EltVT = ContainerVT.getVectorElementType(); + SDValue Result; + if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) || + EltVT == MVT::bf16) { + EltVT = EltVT.changeTypeToInteger(); + ContainerVT = ContainerVT.changeVectorElementType(EltVT); + Op2 = DAG.getBitcast(ContainerVT, Op2); + FirstEle = + DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT); + } + Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL + : RISCVISD::VSLIDE1UP_VL, + DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2, + FirstEle, Mask, EVL2); + Result = DAG.getBitcast( + ContainerVT.changeVectorElementType(VT.getVectorElementType()), + Result); + return VT.isFixedLengthVector() + ? convertFromScalableVector(VT, Result, DAG, Subtarget) + : Result; + } + int64_t ImmValue = cast(Offset)->getSExtValue(); SDValue DownOffset, UpOffset; if (ImmValue >= 0) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 5711f0077b12d..898cd85a55297 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1726,7 +1726,7 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { if (!MI.memoperands_empty()) { MachineMemOperand *MMO = *(MI.memoperands_begin()); if (STI.hasStdExtZihintntl() && MMO->isNonTemporal()) { - if (STI.hasStdExtZca() && STI.enableRVCHintInstrs()) { + if (STI.hasStdExtZca()) { if (isCompressibleInst(MI, STI)) return 4; // c.ntl.all + c.load/c.store return 6; // c.ntl.all + load/store diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index 8b1eca45d82d8..8252a9b170eb3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -608,7 +608,7 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther>, // HINT Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZca, HasRVCHints], hasSideEffects = 0, mayLoad = 0, +let Predicates = [HasStdExtZca], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm), @@ -691,24 +691,24 @@ def C_SRAI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd), let Inst{12} = 0; } -} // Predicates = [HasStdExtZca, HasRVCHints], hasSideEffects = 0, mayLoad = 0, +} // Predicates = [HasStdExtZca], hasSideEffects = 0, mayLoad = 0, // mayStore = 0 //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZca, HasRVCHints] in { +let Predicates = [HasStdExtZca] in { // Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6. def : InstAlias<"c.addi x0, $imm", (C_NOP_HINT simm6nonzero:$imm), 0>; } -let Predicates = [HasStdExtC, HasRVCHints, HasStdExtZihintntl] in { +let Predicates = [HasStdExtC, HasStdExtZihintntl] in { def : InstAlias<"c.ntl.p1", (C_ADD_HINT X0, X2)>; def : InstAlias<"c.ntl.pall", (C_ADD_HINT X0, X3)>; def : InstAlias<"c.ntl.s1", (C_ADD_HINT X0, X4)>; def : InstAlias<"c.ntl.all", (C_ADD_HINT X0, X5)>; -} // Predicates = [HasStdExtC, HasRVCHints, HasStdExtZihintntl] +} // Predicates = [HasStdExtC, HasStdExtZihintntl] let EmitPriority = 0 in { let Predicates = [HasStdExtZca] in { diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 78a176fcf18d9..6600a00d4e098 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -1201,7 +1201,7 @@ multiclass SiFive7ReadAdvance { def : ReadAdvance; def : ReadAdvance; - // 12. Vector Integer Arithmetic Instructions + // 11. Vector Integer Arithmetic Instructions defm : LMULReadAdvance<"ReadVIALUV", 0>; defm : LMULReadAdvance<"ReadVIALUX", 0>; defm : LMULReadAdvanceW<"ReadVIWALUV", 0>; @@ -1232,7 +1232,7 @@ multiclass SiFive7ReadAdvance { defm : LMULReadAdvance<"ReadVIMovV", 0>; defm : LMULReadAdvance<"ReadVIMovX", 0>; - // 13. Vector Fixed-Point Arithmetic Instructions + // 12. Vector Fixed-Point Arithmetic Instructions defm : LMULReadAdvance<"ReadVSALUV", 0>; defm : LMULReadAdvance<"ReadVSALUX", 0>; defm : LMULReadAdvance<"ReadVAALUV", 0>; @@ -1244,7 +1244,7 @@ multiclass SiFive7ReadAdvance { defm : LMULReadAdvanceW<"ReadVNClipV", 0>; defm : LMULReadAdvanceW<"ReadVNClipX", 0>; - // 14. Vector Floating-Point Instructions + // 13. Vector Floating-Point Instructions defm : LMULSEWReadAdvanceF<"ReadVFALUV", 0>; defm : LMULSEWReadAdvanceF<"ReadVFALUF", 0>; defm : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>; @@ -1280,7 +1280,7 @@ multiclass SiFive7ReadAdvance { defm : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; defm : LMULSEWReadAdvanceFW<"ReadVFNCvtFToFV", 0>; - // 15. Vector Reduction Operations + // 14. Vector Reduction Operations def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; @@ -1294,14 +1294,14 @@ multiclass SiFive7ReadAdvance { def : ReadAdvance; def : ReadAdvance; - // 16. Vector Mask Instructions + // 15. Vector Mask Instructions defm : LMULReadAdvance<"ReadVMALUV", 0>; defm : LMULReadAdvance<"ReadVMPopV", 0>; defm : LMULReadAdvance<"ReadVMFFSV", 0>; defm : LMULReadAdvance<"ReadVMSFSV", 0>; defm : LMULReadAdvance<"ReadVIotaV", 0>; - // 17. Vector Permutation Instructions + // 16. Vector Permutation Instructions def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 3cd923c0ba058..ec77154d17caa 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -3239,52 +3239,79 @@ static SDValue performBitcastCombine(SDNode *N, return SDValue(); } -static SDValue performSETCCCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - auto &DAG = DCI.DAG; - +template +static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - ISD::CondCode Cond = cast(N->getOperand(2))->get(); + SDValue Cond = N->getOperand(2); + if (MatchCond != cast(Cond)->get()) + return SDValue(); + + if (MatchRHS != cast(RHS)->getSExtValue()) + return SDValue(); + SDLoc DL(N); + SDValue Ret = DAG.getZExtOrTrunc( + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, + {DAG.getConstant(Intrin, DL, MVT::i32), + DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}), + DL, MVT::i1); + if (RequiresNegate) + Ret = DAG.getNOT(DL, Ret, MVT::i1); + return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0)); +} + +static SDValue performSETCCCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + if (!DCI.isBeforeLegalize()) + return SDValue(); + EVT VT = N->getValueType(0); + if (!VT.isScalarInteger()) + return SDValue(); + SDValue LHS = N->getOperand(0); + if (LHS->getOpcode() != ISD::BITCAST) + return SDValue(); + + EVT FromVT = LHS->getOperand(0).getValueType(); + if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1) + return SDValue(); + + unsigned NumElts = FromVT.getVectorNumElements(); + if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) + return SDValue(); + + if (!cast(N->getOperand(1))) + return SDValue(); + + EVT VecVT = FromVT.changeVectorElementType(MVT::getIntegerVT(128 / NumElts)); + auto &DAG = DCI.DAG; // setcc (iN (bitcast (vNi1 X))), 0, ne // ==> any_true (vNi1 X) + if (auto Match = TryMatchTrue<0, ISD::SETNE, false, Intrinsic::wasm_anytrue>( + N, VecVT, DAG)) { + return Match; + } // setcc (iN (bitcast (vNi1 X))), 0, eq // ==> xor (any_true (vNi1 X)), -1 + if (auto Match = TryMatchTrue<0, ISD::SETEQ, true, Intrinsic::wasm_anytrue>( + N, VecVT, DAG)) { + return Match; + } // setcc (iN (bitcast (vNi1 X))), -1, eq // ==> all_true (vNi1 X) + if (auto Match = TryMatchTrue<-1, ISD::SETEQ, false, Intrinsic::wasm_alltrue>( + N, VecVT, DAG)) { + return Match; + } // setcc (iN (bitcast (vNi1 X))), -1, ne // ==> xor (all_true (vNi1 X)), -1 - if (DCI.isBeforeLegalize() && VT.isScalarInteger() && - (Cond == ISD::SETEQ || Cond == ISD::SETNE) && - (isNullConstant(RHS) || isAllOnesConstant(RHS)) && - LHS->getOpcode() == ISD::BITCAST) { - EVT FromVT = LHS->getOperand(0).getValueType(); - if (FromVT.isFixedLengthVector() && - FromVT.getVectorElementType() == MVT::i1) { - int Intrin = isNullConstant(RHS) ? Intrinsic::wasm_anytrue - : Intrinsic::wasm_alltrue; - unsigned NumElts = FromVT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) - return SDValue(); - EVT Width = MVT::getIntegerVT(128 / NumElts); - SDValue Ret = DAG.getZExtOrTrunc( - DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, - {DAG.getConstant(Intrin, DL, MVT::i32), - DAG.getSExtOrTrunc(LHS->getOperand(0), DL, - FromVT.changeVectorElementType(Width))}), - DL, MVT::i1); - if ((isNullConstant(RHS) && (Cond == ISD::SETEQ)) || - (isAllOnesConstant(RHS) && (Cond == ISD::SETNE))) { - Ret = DAG.getNOT(DL, Ret, MVT::i1); - } - return DAG.getZExtOrTrunc(Ret, DL, VT); - } + if (auto Match = TryMatchTrue<-1, ISD::SETNE, true, Intrinsic::wasm_alltrue>( + N, VecVT, DAG)) { + return Match; } - return SDValue(); } diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 3090ad313b90d..9b0dd0562cde3 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -95,6 +95,8 @@ class X86InstructionSelector : public InstructionSelector { MachineFunction &MF) const; bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + bool selectFAbs(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; bool selectUAddSub(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI) const; @@ -391,6 +393,8 @@ bool X86InstructionSelector::select(MachineInstr &I) { switch (I.getOpcode()) { default: return false; + case TargetOpcode::G_FABS: + return selectFAbs(I, MRI, MF); case TargetOpcode::G_STORE: case TargetOpcode::G_LOAD: return selectLoadStoreOp(I, MRI, MF); @@ -1050,6 +1054,35 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I, I.eraseFromParent(); return true; } +bool X86InstructionSelector::selectFAbs(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + assert((I.getOpcode() == TargetOpcode::G_FABS) && "unexpected instruction"); + Register SrcReg = I.getOperand(1).getReg(); + Register DstReg = I.getOperand(0).getReg(); + LLT Ty = MRI.getType(SrcReg); + unsigned OpAbs; + const TargetRegisterClass *DstRC; + switch (Ty.getSizeInBits()) { + default: + return false; + case 32: + OpAbs = X86::ABS_Fp32; + DstRC = &X86::FR32RegClass; + break; + case 64: + OpAbs = X86::ABS_Fp64; + DstRC = &X86::FR64RegClass; + break; + } + MRI.setRegClass(DstReg, DstRC); + MachineInstr &FAbsInst = + *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpAbs), DstReg) + .addReg(SrcReg); + constrainSelectedInstRegOperands(FAbsInst, TII, TRI, RBI); + I.eraseFromParent(); + return true; +} bool X86InstructionSelector::selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index f21a7c81459f7..0250ec66c0b99 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -97,10 +97,10 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .widenScalarToNextPow2(0, /*Min=*/8) .clampScalar(0, s8, sMaxScalar); - getActionDefinitionsBuilder({G_LROUND, G_LLROUND, G_FCOS, G_FCOSH, G_FACOS, - G_FSIN, G_FSINH, G_FASIN, G_FTAN, G_FTANH, - G_FATAN, G_FATAN2, G_FPOW, G_FEXP, G_FEXP2, - G_FEXP10, G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI}) + getActionDefinitionsBuilder( + {G_LROUND, G_LLROUND, G_FCOS, G_FCOSH, G_FACOS, G_FSIN, G_FSINH, + G_FASIN, G_FTAN, G_FTANH, G_FATAN, G_FATAN2, G_FPOW, G_FEXP, + G_FEXP2, G_FEXP10, G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, G_FSINCOS}) .libcall(); getActionDefinitionsBuilder(G_FSQRT) @@ -418,6 +418,10 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .legalFor(HasAVX512, {v16s32, v8s64}) .legalFor(UseX87, {s80}); + getActionDefinitionsBuilder(G_FABS) + .legalFor(UseX87 && !HasSSE2 && !HasSSE1, {s64, s80}) + .lower(); + // fp comparison getActionDefinitionsBuilder(G_FCMP) .legalFor(HasSSE1 || UseX87, {s8, s32}) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index bcda188d4c2cb..772e48efb8607 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1574,7 +1574,7 @@ def ProcessorFeatures { FeatureVPCLMULQDQ]; list ZN3AdditionalTuning = [TuningMacroFusion]; list ZN3Tuning = - !listconcat(ZN2Tuning, ZN3AdditionalTuning); + !listremove(!listconcat(ZN2Tuning, ZN3AdditionalTuning), [TuningSlowSHLD]); list ZN3Features = !listconcat(ZN2Features, ZN3AdditionalFeatures); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 6a05a1700f0cb..e73bec2e22a57 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5441,7 +5441,8 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment, else if (LT.first * Ty.getVectorNumElements() > NumElem) { auto *NewMaskTy = FixedVectorType::get(MaskTy->getElementType(), - Ty.getVectorNumElements()); + (unsigned)LT.first.getValue() * + Ty.getVectorNumElements()); // Expanding requires fill mask with zeroes Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, NewMaskTy, {}, CostKind, 0, MaskTy); diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index 5718ae385bac1..42ed914f6dc73 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -1018,10 +1018,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { /// /// This stores the string representation and parses the various pieces into /// enum members. -Triple::Triple(const Twine &Str) - : Data(Str.str()), Arch(UnknownArch), SubArch(NoSubArch), - Vendor(UnknownVendor), OS(UnknownOS), Environment(UnknownEnvironment), - ObjectFormat(UnknownObjectFormat) { +Triple::Triple(const Twine &Str) : Data(Str.str()) { // Do minimal parsing by hand here. SmallVector Components; StringRef(Data).split(Components, '-', /*MaxSplit*/ 3); @@ -1636,14 +1633,7 @@ void Triple::setObjectFormat(ObjectFormatType Kind) { } void Triple::setArchName(StringRef Str) { - // Work around a miscompilation bug for Twines in gcc 4.0.3. - SmallString<64> Triple; - Triple += Str; - Triple += "-"; - Triple += getVendorName(); - Triple += "-"; - Triple += getOSAndEnvironmentName(); - setTriple(Triple); + setTriple(Str + "-" + getVendorName() + "-" + getOSAndEnvironmentName()); } void Triple::setVendorName(StringRef Str) { diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 4a06e0fa619c0..7f5a2a982982d 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -250,10 +250,10 @@ CleanupPointerRootUsers(GlobalVariable *GV, } } - for (int i = 0, e = Dead.size(); i != e; ++i) { - if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) { - Dead[i].second->eraseFromParent(); - Instruction *I = Dead[i].first; + for (const auto &[Inst, Store] : Dead) { + if (IsSafeComputationToRemove(Inst, GetTLI)) { + Store->eraseFromParent(); + Instruction *I = Inst; do { if (isAllocationFn(I, GetTLI)) break; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 99acb02561d53..40578e5edc3ab 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3494,13 +3494,13 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { auto Removable = isAllocSiteRemovable(&MI, Users, TLI, KnowInitZero | KnowInitUndef); if (Removable) { - for (unsigned i = 0, e = Users.size(); i != e; ++i) { + for (WeakTrackingVH &User : Users) { // Lowering all @llvm.objectsize and MTI calls first because they may use // a bitcast/GEP of the alloca we are removing. - if (!Users[i]) - continue; + if (!User) + continue; - Instruction *I = cast(&*Users[i]); + Instruction *I = cast(&*User); if (IntrinsicInst *II = dyn_cast(I)) { if (II->getIntrinsicID() == Intrinsic::objectsize) { @@ -3511,7 +3511,7 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { Worklist.add(Inserted); replaceInstUsesWith(*I, Result); eraseInstFromFunction(*I); - Users[i] = nullptr; // Skip examining in the next loop. + User = nullptr; // Skip examining in the next loop. continue; } if (auto *MTI = dyn_cast(I)) { @@ -3527,11 +3527,11 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { } } } - for (unsigned i = 0, e = Users.size(); i != e; ++i) { - if (!Users[i]) + for (WeakTrackingVH &User : Users) { + if (!User) continue; - Instruction *I = cast(&*Users[i]); + Instruction *I = cast(&*User); if (ICmpInst *C = dyn_cast(I)) { replaceInstUsesWith(*C, diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index a6f9992383cd3..1a76898bd61c6 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3294,22 +3294,51 @@ struct MemorySanitizerVisitor : public InstVisitor { setOrigin(&I, getOrigin(Op)); } - void handleCountZeroes(IntrinsicInst &I) { + // Uninitialized bits are ok if they appear after the leading/trailing 0's + // and a 1. If the input is all zero, it is fully initialized iff + // !is_zero_poison. + // + // e.g., for ctlz, with little-endian, if 0/1 are initialized bits with + // concrete value 0/1, and ? is an uninitialized bit: + // - 0001 0??? is fully initialized + // - 000? ???? is fully uninitialized (*) + // - ???? ???? is fully uninitialized + // - 0000 0000 is fully uninitialized if is_zero_poison, + // fully initialized otherwise + // + // (*) TODO: arguably, since the number of zeros is in the range [3, 8], we + // only need to poison 4 bits. + // + // OutputShadow = + // ((ConcreteZerosCount >= ShadowZerosCount) && !AllZeroShadow) + // || (is_zero_poison && AllZeroSrc) + void handleCountLeadingTrailingZeros(IntrinsicInst &I) { IRBuilder<> IRB(&I); Value *Src = I.getArgOperand(0); + Value *SrcShadow = getShadow(Src); - // Set the Output shadow based on input Shadow - Value *BoolShadow = IRB.CreateIsNotNull(getShadow(Src), "_mscz_bs"); + Value *False = IRB.getInt1(false); + Value *ConcreteZerosCount = IRB.CreateIntrinsic( + I.getType(), I.getIntrinsicID(), {Src, /*is_zero_poison=*/False}); + Value *ShadowZerosCount = IRB.CreateIntrinsic( + I.getType(), I.getIntrinsicID(), {SrcShadow, /*is_zero_poison=*/False}); + + Value *CompareConcreteZeros = IRB.CreateICmpUGE( + ConcreteZerosCount, ShadowZerosCount, "_mscz_cmp_zeros"); + + Value *NotAllZeroShadow = + IRB.CreateIsNotNull(SrcShadow, "_mscz_shadow_not_null"); + Value *OutputShadow = + IRB.CreateAnd(CompareConcreteZeros, NotAllZeroShadow, "_mscz_main"); // If zero poison is requested, mix in with the shadow Constant *IsZeroPoison = cast(I.getOperand(1)); if (!IsZeroPoison->isZeroValue()) { Value *BoolZeroPoison = IRB.CreateIsNull(Src, "_mscz_bzp"); - BoolShadow = IRB.CreateOr(BoolShadow, BoolZeroPoison, "_mscz_bs"); + OutputShadow = IRB.CreateOr(OutputShadow, BoolZeroPoison, "_mscz_bs"); } - Value *OutputShadow = - IRB.CreateSExt(BoolShadow, getShadowTy(Src), "_mscz_os"); + OutputShadow = IRB.CreateSExt(OutputShadow, getShadowTy(Src), "_mscz_os"); setShadow(&I, OutputShadow); setOriginForNaryOp(I); @@ -4726,7 +4755,7 @@ struct MemorySanitizerVisitor : public InstVisitor { break; case Intrinsic::ctlz: case Intrinsic::cttz: - handleCountZeroes(I); + handleCountLeadingTrailingZeros(I); break; case Intrinsic::masked_compressstore: handleMaskedCompressStore(I); diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 1e16677e5a56f..9e3b4b82cc454 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -235,8 +235,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, // matrix by exchanging the two columns. static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx) { - for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I) - std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]); + for (auto &Row : DepMatrix) + std::swap(Row[ToIndx], Row[FromIndx]); } // Check if a direction vector is lexicographically positive. Return true if it diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index 778a6a012556b..343da5b2e4704 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -83,10 +83,10 @@ static void PrintOps(Instruction *I, const SmallVectorImpl &Ops) { Module *M = I->getModule(); dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " << *Ops[0].Op->getType() << '\t'; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + for (const ValueEntry &Op : Ops) { dbgs() << "[ "; - Ops[i].Op->printAsOperand(dbgs(), false, M); - dbgs() << ", #" << Ops[i].Rank << "] "; + Op.Op->printAsOperand(dbgs(), false, M); + dbgs() << ", #" << Op.Rank << "] "; } } #endif @@ -1585,9 +1585,9 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, // where they are actually the same multiply. unsigned MaxOcc = 0; Value *MaxOccVal = nullptr; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + for (const ValueEntry &Op : Ops) { BinaryOperator *BOp = - isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul); + isReassociableOp(Op.Op, Instruction::Mul, Instruction::FMul); if (!BOp) continue; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 9883974c55e3b..242cf6d811b66 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1528,12 +1528,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall, inputs, NewValues); - LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) { - newFunction->dump(); - report_fatal_error("verification of newFunction failed!"); - }); - LLVM_DEBUG(if (verifyFunction(*oldFunction)) - report_fatal_error("verification of oldFunction failed!")); + LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - newFunction:\n"); + LLVM_DEBUG(newFunction->dump()); + LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - oldFunction:\n"); + LLVM_DEBUG(oldFunction->dump()); LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC)) report_fatal_error("Stale Asumption cache for old Function!")); return newFunction; @@ -1833,6 +1831,9 @@ CallInst *CodeExtractor::emitReplacerCall( // This takes place of the original loop BasicBlock *codeReplacer = BasicBlock::Create(Context, "codeRepl", oldFunction, ReplIP); + if (AllocationBlock) + assert(AllocationBlock->getParent() == oldFunction && + "AllocationBlock is not in the same function"); BasicBlock *AllocaBlock = AllocationBlock ? AllocationBlock : &oldFunction->getEntryBlock(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index a3f39f5ad7a29..fe20d48f780e2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -465,7 +465,7 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) { "Predecessor basic-block not found building successor."); BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; auto *PredBBTerminator = PredBB->getTerminator(); - LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); + LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n'); auto *TermBr = dyn_cast(PredBBTerminator); if (isa(PredBBTerminator)) { @@ -579,8 +579,8 @@ VPBasicBlock *VPBasicBlock::clone() { } void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) { - LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() - << " in BB:" << BB->getName() << '\n'); + LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName() + << " in BB: " << BB->getName() << '\n'); State->CFG.PrevVPBB = this; @@ -589,7 +589,7 @@ void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) { Recipe.execute(*State); } - LLVM_DEBUG(dbgs() << "LV: filled BB:" << *BB); + LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB); } VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 3e459f5ea4ce5..a4bfdcabaa314 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -3082,10 +3082,10 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { auto *SSV = cast(SVOp0); SVOp0 = SSV->getOperand(0); SVOp1 = SSV->getOperand(1); - for (unsigned I = 0, E = Mask.size(); I != E; I++) { - if (Mask[I] >= static_cast(SSV->getShuffleMask().size())) + for (int &Elem : Mask) { + if (Elem >= static_cast(SSV->getShuffleMask().size())) return false; - Mask[I] = Mask[I] < 0 ? Mask[I] : SSV->getMaskValue(Mask[I]); + Elem = Elem < 0 ? Elem : SSV->getMaskValue(Elem); } } if (SVOp0 == Op1 && SVOp1 == Op0) { diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 9f86650ec58d1..b20cdb8d68ec3 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -510,6 +510,13 @@ if(build_runtimes) endif() # TODO: We need to consider passing it as '-DRUNTIMES_x86_64_LLVM_ENABLE_RUNTIMES'. + if("libclc" IN_LIST LLVM_ENABLE_RUNTIMES) + foreach(dep clang llvm-as llvm-link opt) + if(TARGET ${dep}) + list(APPEND extra_deps ${dep}) + endif() + endforeach() + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) if (${LLVM_TOOL_FLANG_BUILD}) message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") diff --git a/llvm/test/Analysis/CostModel/AArch64/segmented-shufflevector-patterns.ll b/llvm/test/Analysis/CostModel/AArch64/segmented-shufflevector-patterns.ll new file mode 100644 index 0000000000000..790f49f1d3b82 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/segmented-shufflevector-patterns.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="print" -cost-kind=throughput 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+sve2p1 < %s | FileCheck %s +; RUN: opt -passes="print" -cost-kind=throughput 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+sme2p1 -force-streaming < %s | FileCheck %s + +;; Broadcast indexed lane within 128b segments (dupq zd.t, zn.t[idx]) +define void @dup_within_each_segment_256b() #0 { +; CHECK-LABEL: 'dup_within_each_segment_256b' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dupq_b11 = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dupq_h2 = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dupq_s3 = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dupq_d0 = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %dupq_512b_d1 = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dupq_s3_with_poison = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %dupq_b11 = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> + %dupq_h2 = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> + %dupq_s3 = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> + %dupq_d0 = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> + %dupq_512b_d1 = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> + %dupq_s3_with_poison = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> + ret void +} + +define void @dup_within_each_segment_512b() #1 { +; CHECK-LABEL: 'dup_within_each_segment_512b' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dupq_b11 = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dupq_h2 = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dupq_s3 = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dupq_d0 = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dupq_512b_d1 = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %dupq_s3_with_poison = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %dupq_b11 = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> + %dupq_h2 = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> + %dupq_s3 = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> + %dupq_d0 = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> + %dupq_512b_d1 = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> + %dupq_s3_with_poison = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> + ret void +} + +attributes #0 = { noinline vscale_range(2,2) } +attributes #1 = { noinline vscale_range(4,4) } diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 4d0603722c3ae..0779c75c345e3 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -13337,6 +13337,57 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16 ret <16 x i8> %tmp2 } +define <16 x i8> @test_v16i8_post_reg_ld1lane_zero(ptr %bar, ptr %ptr, i64 %inc) { +; CHECK-SD-LABEL: test_v16i8_post_reg_ld1lane_zero: +; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: movi.2d v0, #0000000000000000 +; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0], x2 +; CHECK-SD-NEXT: str x0, [x1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane_zero: +; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: mov w8, #0 ; =0x0 +; CHECK-GI-NEXT: mov.b v0[1], w8 +; CHECK-GI-NEXT: mov.b v0[2], w8 +; CHECK-GI-NEXT: mov.b v0[3], w8 +; CHECK-GI-NEXT: mov.b v0[4], w8 +; CHECK-GI-NEXT: mov.b v0[5], w8 +; CHECK-GI-NEXT: mov.b v0[6], w8 +; CHECK-GI-NEXT: mov.b v0[7], w8 +; CHECK-GI-NEXT: mov.b v0[8], w8 +; CHECK-GI-NEXT: mov.b v0[9], w8 +; CHECK-GI-NEXT: mov.b v0[10], w8 +; CHECK-GI-NEXT: mov.b v0[11], w8 +; CHECK-GI-NEXT: mov.b v0[12], w8 +; CHECK-GI-NEXT: mov.b v0[13], w8 +; CHECK-GI-NEXT: mov.b v0[14], w8 +; CHECK-GI-NEXT: mov.b v0[15], w8 +; CHECK-GI-NEXT: add x8, x0, x2 +; CHECK-GI-NEXT: str x8, [x1] +; CHECK-GI-NEXT: ret + %tmp1 = load i8, ptr %bar + %tmp2 = insertelement <16 x i8> zeroinitializer, i8 %tmp1, i32 0 + %tmp3 = getelementptr i8, ptr %bar, i64 %inc + store ptr %tmp3, ptr %ptr + ret <16 x i8> %tmp2 +} + +define <16 x i8> @test_v16i8_post_reg_ld1lane_undef(ptr %bar, ptr %ptr, i64 %inc) { +; CHECK-LABEL: test_v16i8_post_reg_ld1lane_undef: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: add x8, x0, x2 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: ret + %tmp1 = load i8, ptr %bar + %tmp2 = insertelement <16 x i8> poison, i8 %tmp1, i32 0 + %tmp3 = getelementptr i8, ptr %bar, i64 %inc + store ptr %tmp3, ptr %ptr + ret <16 x i8> %tmp2 +} + define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) { ; CHECK-SD-LABEL: test_v8i8_post_imm_ld1lane: ; CHECK-SD: ; %bb.0: @@ -14078,3 +14129,69 @@ define i32 @load_single_extract_variable_index_masked2_i32(ptr %A, i32 %idx) { %e = extractelement <4 x i32> %lv, i32 %idx.x ret i32 %e } + +define void @chained_insert_zero(ptr noundef %fenc, ptr noundef %pred, ptr noundef %residual, i32 noundef %stride) { +; CHECK-SD-LABEL: chained_insert_zero: +; CHECK-SD: ; %bb.0: ; %entry +; CHECK-SD-NEXT: movi.2d v0, #0000000000000000 +; CHECK-SD-NEXT: movi.2d v1, #0000000000000000 +; CHECK-SD-NEXT: ; kill: def $w3 killed $w3 def $x3 +; CHECK-SD-NEXT: sxtw x8, w3 +; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0], x8 +; CHECK-SD-NEXT: ld1.s { v1 }[0], [x1], x8 +; CHECK-SD-NEXT: sbfiz x8, x3, #1, #32 +; CHECK-SD-NEXT: usubl.8h v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x2] +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: usubl.8h v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x2, x8] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: chained_insert_zero: +; CHECK-GI: ; %bb.0: ; %entry +; CHECK-GI-NEXT: ldr s0, [x0] +; CHECK-GI-NEXT: ldr s1, [x1] +; CHECK-GI-NEXT: ; kill: def $w3 killed $w3 def $x3 +; CHECK-GI-NEXT: sxtw x8, w3 +; CHECK-GI-NEXT: mov.s v0[1], wzr +; CHECK-GI-NEXT: mov.s v1[1], wzr +; CHECK-GI-NEXT: usubl.8h v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x2] +; CHECK-GI-NEXT: ldr s0, [x0, x8] +; CHECK-GI-NEXT: ldr s1, [x1, x8] +; CHECK-GI-NEXT: lsl x8, x8, #1 +; CHECK-GI-NEXT: mov.s v0[1], wzr +; CHECK-GI-NEXT: mov.s v1[1], wzr +; CHECK-GI-NEXT: usubl.8h v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x2, x8] +; CHECK-GI-NEXT: ret +entry: + %idx.ext = sext i32 %stride to i64 + %0 = load i32, ptr %fenc, align 4 + %vld1_lane.i = insertelement <2 x i32> , i32 %0, i64 0 + %1 = bitcast <2 x i32> %vld1_lane.i to <8 x i8> + %2 = load i32, ptr %pred, align 4 + %vld1_lane.i16 = insertelement <2 x i32> , i32 %2, i64 0 + %3 = bitcast <2 x i32> %vld1_lane.i16 to <8 x i8> + %vmovl.i15 = zext <8 x i8> %1 to <8 x i16> + %vmovl.i = zext <8 x i8> %3 to <8 x i16> + %sub.i = sub nsw <8 x i16> %vmovl.i15, %vmovl.i + %shuffle.i = shufflevector <8 x i16> %sub.i, <8 x i16> poison, <4 x i32> + store <4 x i16> %shuffle.i, ptr %residual, align 2 + %add.ptr = getelementptr inbounds i8, ptr %fenc, i64 %idx.ext + %add.ptr6 = getelementptr inbounds i8, ptr %pred, i64 %idx.ext + %add.ptr8 = getelementptr inbounds i16, ptr %residual, i64 %idx.ext + %4 = load i32, ptr %add.ptr, align 4 + %vld1_lane.i.1 = insertelement <2 x i32> , i32 %4, i64 0 + %5 = bitcast <2 x i32> %vld1_lane.i.1 to <8 x i8> + %6 = load i32, ptr %add.ptr6, align 4 + %vld1_lane.i16.1 = insertelement <2 x i32> , i32 %6, i64 0 + %7 = bitcast <2 x i32> %vld1_lane.i16.1 to <8 x i8> + %vmovl.i15.1 = zext <8 x i8> %5 to <8 x i16> + %vmovl.i.1 = zext <8 x i8> %7 to <8 x i16> + %sub.i.1 = sub nsw <8 x i16> %vmovl.i15.1, %vmovl.i.1 + %shuffle.i.1 = shufflevector <8 x i16> %sub.i.1, <8 x i16> poison, <4 x i32> + store <4 x i16> %shuffle.i.1, ptr %add.ptr8, align 2 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/exp10-libcall-names.ll b/llvm/test/CodeGen/AArch64/exp10-libcall-names.ll index 6e603b7064f8f..50358e5f15879 100644 --- a/llvm/test/CodeGen/AArch64/exp10-libcall-names.ll +++ b/llvm/test/CodeGen/AArch64/exp10-libcall-names.ll @@ -10,9 +10,9 @@ ; RUN: llc -mtriple=arm64-apple-driverkit < %s | FileCheck -check-prefix=APPLE %s ; RUN: llc -mtriple=arm64-apple-driverkit1.0 < %s | FileCheck -check-prefix=APPLE %s ; RUN: llc -mtriple=arm64-apple-driverkit24.0 < %s | FileCheck -check-prefix=APPLE %s -; RUN: llc -mtriple=arm64-apple-bridgeos < %s | FileCheck -check-prefix=BRIDGEOS %s -; RUN: llc -mtriple=arm64-apple-bridgeos1.0 < %s | FileCheck -check-prefix=BRIDGEOS %s -; RUN: llc -mtriple=arm64-apple-bridgeos9.0 < %s | FileCheck -check-prefix=BRIDGEOS %s +; RUN: llc -mtriple=arm64-apple-bridgeos < %s | FileCheck -check-prefix=APPLE %s +; RUN: llc -mtriple=arm64-apple-bridgeos1.0 < %s | FileCheck -check-prefix=APPLE %s +; RUN: llc -mtriple=arm64-apple-bridgeos9.0 < %s | FileCheck -check-prefix=APPLE %s ; RUN: not llc -mtriple=aarch64-apple-macos10.8 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR %s ; RUN: not llc -mtriple=aarch64-apple-ios6.0 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR %s @@ -29,11 +29,6 @@ define float @test_exp10_f32(float %x) { ; APPLE-LABEL: test_exp10_f32: ; APPLE: ; %bb.0: ; APPLE-NEXT: b ___exp10f -; -; BRIDGEOS-LABEL: test_exp10_f32: -; BRIDGEOS: // %bb.0: -; BRIDGEOS-NEXT: b __exp10f -; %ret = call float @llvm.exp10.f32(float %x) ret float %ret } @@ -46,11 +41,6 @@ define double @test_exp10_f64(double %x) { ; APPLE-LABEL: test_exp10_f64: ; APPLE: ; %bb.0: ; APPLE-NEXT: b ___exp10 -; -; BRIDGEOS-LABEL: test_exp10_f64: -; BRIDGEOS: // %bb.0: -; BRIDGEOS-NEXT: b __exp10 -; %ret = call double @llvm.exp10.f64(double %x) ret double %ret } diff --git a/llvm/test/CodeGen/AArch64/streaming-func-no-sme.ll b/llvm/test/CodeGen/AArch64/streaming-func-no-sme.ll index 968adcb7cc21b..9be776f817271 100644 --- a/llvm/test/CodeGen/AArch64/streaming-func-no-sme.ll +++ b/llvm/test/CodeGen/AArch64/streaming-func-no-sme.ll @@ -1,4 +1,4 @@ -; RUN: not llc -mtriple aarch64-none-linux-gnu %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple aarch64-none-linux-gnu -filetype=null %s 2>&1 | FileCheck %s ; CHECK: LLVM ERROR: streaming SVE functions require SME define void @streaming(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind { diff --git a/llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll b/llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll index 40d4d0ff60148..da83b27ce4d55 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,SVE +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p1,+bf16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SME define void @dupq_i8_256b(ptr %addr) #0 { ; CHECK-LABEL: dupq_i8_256b: @@ -71,13 +72,43 @@ define void @dupq_f16_256b(ptr %addr) #0 { } define void @dupq_bf16_256b(ptr %addr) #0 { -; CHECK-LABEL: dupq_bf16_256b: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: dup v0.8h, v0.h[2] -; CHECK-NEXT: dup v1.8h, v1.h[2] -; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: ret +; SVE-LABEL: dupq_bf16_256b: +; SVE: // %bb.0: +; SVE-NEXT: ldp q0, q1, [x0] +; SVE-NEXT: dup v0.8h, v0.h[2] +; SVE-NEXT: dup v1.8h, v1.h[2] +; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: ret +; +; SME-LABEL: dupq_bf16_256b: +; SME: // %bb.0: +; SME-NEXT: ldp q1, q0, [x0] +; SME-NEXT: str q0, [sp, #-64]! +; SME-NEXT: .cfi_def_cfa_offset 64 +; SME-NEXT: ldr h0, [sp, #4] +; SME-NEXT: str q1, [sp, #32] +; SME-NEXT: str h0, [sp, #30] +; SME-NEXT: str h0, [sp, #28] +; SME-NEXT: str h0, [sp, #26] +; SME-NEXT: str h0, [sp, #24] +; SME-NEXT: str h0, [sp, #22] +; SME-NEXT: str h0, [sp, #20] +; SME-NEXT: str h0, [sp, #18] +; SME-NEXT: str h0, [sp, #16] +; SME-NEXT: ldr h0, [sp, #36] +; SME-NEXT: ldr q1, [sp, #16] +; SME-NEXT: str h0, [sp, #62] +; SME-NEXT: str h0, [sp, #60] +; SME-NEXT: str h0, [sp, #58] +; SME-NEXT: str h0, [sp, #56] +; SME-NEXT: str h0, [sp, #54] +; SME-NEXT: str h0, [sp, #52] +; SME-NEXT: str h0, [sp, #50] +; SME-NEXT: str h0, [sp, #48] +; SME-NEXT: ldr q0, [sp, #48] +; SME-NEXT: stp q0, q1, [x0] +; SME-NEXT: add sp, sp, #64 +; SME-NEXT: ret %load = load <16 x bfloat>, ptr %addr %splat.lanes = shufflevector <16 x bfloat> %load, <16 x bfloat> poison, <16 x i32> @@ -112,4 +143,18 @@ define void @dupq_f64_256b(ptr %addr) #0 { ret void } -attributes #0 = { noinline vscale_range(2,2) "target-features"="+sve2p1,+bf16" } +define void @dupq_f32_256b_with_poison(ptr %addr) #0 { +; CHECK-LABEL: dupq_f32_256b_with_poison: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: dupq z0.s, z0.s[3] +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %load = load <8 x float>, ptr %addr + %splat.lanes = shufflevector <8 x float> %load, <8 x float> poison, <8 x i32> + store <8 x float> %splat.lanes, ptr %addr + ret void +} + +attributes #0 = { noinline vscale_range(2,2) } diff --git a/llvm/test/CodeGen/AMDGPU/convergence-laneops.ll b/llvm/test/CodeGen/AMDGPU/convergence-laneops.ll index b7a1749be18bc..57ab371d5b6fc 100644 --- a/llvm/test/CodeGen/AMDGPU/convergence-laneops.ll +++ b/llvm/test/CodeGen/AMDGPU/convergence-laneops.ll @@ -1,5 +1,5 @@ ; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s -; RUN: not --crash llc -mtriple=amdgcn--amdhsa -mcpu=1100 -verify-machineinstrs < %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s +; RUN: not --crash llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s ; FIXME: Merge these tests with existing lane op tests (llvm.amdgcn.readlane.ll, llvm.amdgcn.writelane.ll ...) once the crash is fixed. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll new file mode 100644 index 0000000000000..243f6c4d23732 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-REAL16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG-FAKE16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-REAL16 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL-FAKE16 %s + +define amdgpu_ps float @test_cvt_pk_f16_bf8_v(i16 %a) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_bf8_v: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_bf8 v0, v0.l +; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_bf8_v: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_bf8 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_bf8_v: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_bf8 v0, v0.l +; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_bf8_v: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_bf8 v0, v0 +; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog + %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 %a) + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} + +define amdgpu_ps float @test_cvt_pk_f16_bf8_s(i16 inreg %a) { +; GFX1250-LABEL: test_cvt_pk_f16_bf8_s: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_cvt_pk_f16_bf8 v0, s0 +; GFX1250-NEXT: ; return to shader part epilog + %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16 %a) + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} + +define amdgpu_ps float @test_cvt_pk_f16_fp8_v(i16 %a) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l +; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_fp8_v: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_fp8_v: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l +; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_fp8_v: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 +; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog + %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a) + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} + +define amdgpu_ps float @test_cvt_pk_f16_fp8_s(i16 inreg %a) { +; GFX1250-LABEL: test_cvt_pk_f16_fp8_s: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_cvt_pk_f16_fp8 v0, s0 +; GFX1250-NEXT: ; return to shader part epilog + %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a) + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} + +define amdgpu_ps float @test_cvt_pk_f16_fp8_v_hi(<2 x i16> %a) { +; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_f16_fp8_v_hi: +; GFX1250-SDAG-REAL16: ; %bb.0: +; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.h +; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_f16_fp8_v_hi: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 +; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_f16_fp8_v_hi: +; GFX1250-GISEL-REAL16: ; %bb.0: +; GFX1250-GISEL-REAL16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_f16_fp8 v0, v0.l +; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_f16_fp8_v_hi: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-FAKE16-NEXT: v_cvt_pk_f16_fp8 v0, v0 +; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog + %a.1 = extractelement <2 x i16> %a, i32 1 + %cvt = tail call <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16 %a.1) + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v2bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v2bf16.ll index 008e19b620520..5914253b5f58e 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v2bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v2bf16.ll @@ -1228,51 +1228,49 @@ define void @v_shuffle_v3bf16_v2bf16__3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v1 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v2 ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: v_alignbit_b32 v2, s4, v2, 16 -; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX900-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17] -; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v1 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v2 ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: v_alignbit_b32 v2, s4, v2, 16 -; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX90A-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17] -; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v1 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v2 ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: v_alignbit_b32 v2, s0, v2, 16 -; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 ; GFX942-NEXT: global_store_dword v0, v2, s[0:1] -; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll index 99c9480adc410..cd4dbe93e8a11 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll @@ -1928,48 +1928,45 @@ define void @v_shuffle_v3bf16_v3bf16__5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v3bf16_v3bf16__5_u_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:1] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[1:2] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 ; GFX900-NEXT: global_store_dword v3, v2, s[16:17] -; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v3bf16_v3bf16__5_u_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:1] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[2:3] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 ; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] -; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_u_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:1] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[2:3] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 ; GFX942-NEXT: global_store_dword v4, v3, s[0:1] -; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v2f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v2f16.ll index e34becc1065ff..99cb8a38f57c3 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v2f16.ll @@ -1228,51 +1228,49 @@ define void @v_shuffle_v3f16_v2f16__3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v3f16_v2f16__3_u_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v1 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v2 ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: v_alignbit_b32 v2, s4, v2, 16 -; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX900-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4 ; GFX900-NEXT: global_store_dword v0, v2, s[16:17] -; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v3f16_v2f16__3_u_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v1 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v2 ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: v_alignbit_b32 v2, s4, v2, 16 -; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX90A-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4 ; GFX90A-NEXT: global_store_dword v0, v2, s[16:17] -; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_u_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v1 ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v2 ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: v_alignbit_b32 v2, s0, v2, 16 -; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 ; GFX942-NEXT: global_store_dword v0, v2, s[0:1] -; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll index 84d42c882494c..0854ff2ebfc5d 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll @@ -1928,48 +1928,45 @@ define void @v_shuffle_v3f16_v3f16__5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX900-LABEL: v_shuffle_v3f16_v3f16__5_u_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[0:1] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def v[1:2] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 ; GFX900-NEXT: global_store_dword v3, v2, s[16:17] -; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: v_shuffle_v3f16_v3f16__5_u_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:1] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_mov_b32_e32 v4, 0 -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[2:3] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 ; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] -; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_u_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[0:1] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def v[2:3] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 ; GFX942-NEXT: global_store_dword v4, v3, s[0:1] -; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll index db5f0ad42a677..b3cf3790a59d3 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll @@ -1,17 +1,16 @@ ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX90A %s -; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX942 %s +; RUN: not llc -O0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX90A %s +; RUN: not llc -O0 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX942 %s ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s ; GFX9-LABEL: image_sample_test: ; GFX9: image_sample_lz -; GFX90A: LLVM ERROR: requested image instruction is not supported on this GPU - -; GFX942: LLVM ERROR: requested image instruction is not supported on this GPU +; GFX90A: error: :0:0: in function image_sample_test void (ptr addrspace(1), float, float, <8 x i32>, <4 x i32>): requested image instruction is not supported on this GPU +; GFX942: error: :0:0: in function image_sample_test void (ptr addrspace(1), float, float, <8 x i32>, <4 x i32>): requested image instruction is not supported on this GPU ; GFX1030-LABEL: image_sample_test: ; GFX1030: image_sample_lz @@ -28,3 +27,13 @@ define amdgpu_kernel void @image_sample_test(ptr addrspace(1) %out, float %arg1, } declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) + +; GFX90A: error: :0:0: in function sample_1d_tfe <4 x float> (<8 x i32>, <4 x i32>, ptr addrspace(1), float): TFE is not supported on this GPU +; GFX942: error: :0:0: in function sample_1d_tfe <4 x float> (<8 x i32>, <4 x i32>, ptr addrspace(1), float): TFE is not supported on this GPU +define <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) { + %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) + %v.vec = extractvalue {<4 x float>, i32} %v, 0 + %v.err = extractvalue {<4 x float>, i32} %v, 1 + store i32 %v.err, ptr addrspace(1) %out, align 4 + ret <4 x float> %v.vec +} diff --git a/llvm/test/CodeGen/ARM/ifcvt_unanalyzable_fallthrough.mir b/llvm/test/CodeGen/ARM/ifcvt_unanalyzable_fallthrough.mir new file mode 100644 index 0000000000000..d2673c36f0f4c --- /dev/null +++ b/llvm/test/CodeGen/ARM/ifcvt_unanalyzable_fallthrough.mir @@ -0,0 +1,114 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv7-apple-ios -run-pass=if-converter %s -o - | FileCheck %s + +# Testcase with unanalyzable branches (that may fallthrough) in the BB +# following the diamond/triangle. + +# Goal here is to showcase a problem seen in the IfConverter when +# AnalyzeBranch is indicating that the branches couldn't be analyzed. Problem +# was originally seen for an out-of-tree target, and here we use ARM and a +# MBB with two conditional jumps to make AnalyzeBranch return false. +# +# The problem was that if-converter when analyzing branches was using a +# variable named HasFallThrough, to remember that an MBB could fallthrough to +# the textual successor. When HasFallThrough is set we know that there are +# fallthrough exits, but the opposite is not guaranteed. If +# HasFallThrough==false there could still be fallthrough exists in situations +# when analyzeBranch found unanalyzable branches. There were however a couple +# of places in the code that checked !HasFallThrough assuming that it would +# imply that there was no fallthrough exit. +# +# As a consequence we could end up merging blocks at the end of a converted +# diamond/triangle and while doing that we messed up when fixing up the CFG +# related to fallthrough edges. For the test cases below we incorrectly ended +# up with a fallthrough from the MBBs with two Bcc instructions to the MBB +# with the STRH after if conversion. +# +--- +name: avoidMergeBlockDiamond +body: | + ; CHECK-LABEL: name: avoidMergeBlockDiamond + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sp = tADDspi $sp, 2, 1 /* CC::ne */, $cpsr + ; CHECK-NEXT: $sp = tADDspi $sp, 1, 0 /* CC::eq */, $cpsr, implicit $sp + ; CHECK-NEXT: $sp = tADDspi $sp, 3, 14 /* CC::al */, $noreg + ; CHECK-NEXT: tBcc %bb.1, 1 /* CC::ne */, $cpsr + ; CHECK-NEXT: tBcc %bb.1, 1 /* CC::ne */, $cpsr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: STRH $sp, $sp, $noreg, 0, 14 /* CC::al */, $noreg + ; CHECK-NEXT: tB %bb.2, 14 /* CC::al */, $noreg + bb.0: + tBcc %bb.2, 1, $cpsr + + bb.1: + $sp = tADDspi $sp, 1, 14, _ + tB %bb.4, 14, $noreg + + bb.2: + $sp = tADDspi $sp, 2, 14, _ + tB %bb.4, 14, $noreg + + bb.3: + STRH $sp, $sp, $noreg, 0, 14, $noreg + tB %bb.3, 14, $noreg + + bb.4: + $sp = tADDspi $sp, 3, 14, _ + tBcc %bb.5, 1, $cpsr + tBcc %bb.5, 1, $cpsr + + bb.5: + successors: + tBX_RET 14, _ +... + +# Similar to the above, but with a triangle. +--- +name: avoidMergeBlockTriangle +body: | + ; CHECK-LABEL: name: avoidMergeBlockTriangle + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sp = tADDspi $sp, 1, 1 /* CC::ne */, $cpsr + ; CHECK-NEXT: $sp = tADDspi $sp, 2, 14 /* CC::al */, $noreg + ; CHECK-NEXT: tBcc %bb.1, 1 /* CC::ne */, $cpsr + ; CHECK-NEXT: tBcc %bb.1, 1 /* CC::ne */, $cpsr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: STRH $sp, $sp, $noreg, 0, 14 /* CC::al */, $noreg + ; CHECK-NEXT: tB %bb.2, 14 /* CC::al */, $noreg + bb.0: + tBcc %bb.1, 1, $cpsr + tB %bb.3, 14, $noreg + + bb.1: + $sp = tADDspi $sp, 1, 14, _ + tB %bb.3, 14, $noreg + + bb.2: + STRH $sp, $sp, $noreg, 0, 14, $noreg + tB %bb.2, 14, $noreg + + bb.3: + $sp = tADDspi $sp, 2, 14, _ + tBcc %bb.4, 1, $cpsr + tBcc %bb.4, 1, $cpsr + + bb.4: + successors: + tBX_RET 14, _ +... diff --git a/llvm/test/CodeGen/ARM/special-reg.ll b/llvm/test/CodeGen/ARM/special-reg.ll index e966550e673d4..cc95f79d2c73b 100644 --- a/llvm/test/CodeGen/ARM/special-reg.ll +++ b/llvm/test/CodeGen/ARM/special-reg.ll @@ -25,14 +25,18 @@ entry: define i64 @read_volatile_i64_twice() { ; ACORE-LABEL: read_volatile_i64_twice: ; ACORE: @ %bb.0: @ %entry -; ACORE-NEXT: mov r0, #0 -; ACORE-NEXT: mov r1, #0 +; ACORE-NEXT: mrrc p15, #1, r0, r1, c14 +; ACORE-NEXT: mrrc p15, #1, r2, r3, c14 +; ACORE-NEXT: eor r0, r2, r0 +; ACORE-NEXT: eor r1, r3, r1 ; ACORE-NEXT: bx lr ; ; MCORE-LABEL: read_volatile_i64_twice: ; MCORE: @ %bb.0: @ %entry -; MCORE-NEXT: movs r0, #0 -; MCORE-NEXT: movs r1, #0 +; MCORE-NEXT: mrrc p15, #1, r0, r1, c14 +; MCORE-NEXT: mrrc p15, #1, r2, r3, c14 +; MCORE-NEXT: eors r0, r2 +; MCORE-NEXT: eors r1, r3 ; MCORE-NEXT: bx lr entry: %0 = tail call i64 @llvm.read_volatile_register.i64(metadata !5) diff --git a/llvm/test/CodeGen/DirectX/issue-145408-gep-struct-fix.ll b/llvm/test/CodeGen/DirectX/issue-145408-gep-struct-fix.ll new file mode 100644 index 0000000000000..40d222cdf2f8f --- /dev/null +++ b/llvm/test/CodeGen/DirectX/issue-145408-gep-struct-fix.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -dxil-data-scalarization -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +%struct.RawStruct8D = type { [8 x i32] } + +define void @test_no_transform_of_struct() { +; CHECK-LABEL: define void @test_no_transform_of_struct() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[OUTPUTSIZESLOCAL_I:%.*]] = alloca [[STRUCT_RAWSTRUCT8D:%.*]], align 4 +; CHECK-NEXT: [[ARRAYINIT_ELEMENT13_I76:%.*]] = getelementptr inbounds nuw [1 x %struct.RawStruct8D], ptr [[OUTPUTSIZESLOCAL_I]], i32 0, i32 0 +; CHECK-NEXT: ret void +; +entry: + %outputSizesLocal.i = alloca %struct.RawStruct8D, align 4 + %arrayinit.element13.i76 = getelementptr inbounds nuw [1 x %struct.RawStruct8D], ptr %outputSizesLocal.i, i32 0, i32 0 + ret void +} diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index 2b29fd30a7a56..36fed88fc52d6 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -33,13 +33,13 @@ ; CHECK-NEXT: DXIL Translate Metadata ; CHECK-NEXT: DXIL Post Optimization Validation ; CHECK-NEXT: DXIL Op Lowering +; CHECK-NEXT: DXIL Root Signature Analysis ; CHECK-NEXT: DXIL Prepare Module ; CHECK-ASM-NEXT: DXIL Metadata Pretty Printer ; CHECK-ASM-NEXT: Print Module IR ; CHECK-OBJ-NEXT: DXIL Embedder -; CHECK-OBJ-NEXT: DXIL Root Signature Analysis ; CHECK-OBJ-NEXT: DXContainer Global Emitter ; CHECK-OBJ-NEXT: FunctionPass Manager ; CHECK-OBJ-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/DirectX/strip-rootsignatures.ll b/llvm/test/CodeGen/DirectX/strip-rootsignatures.ll new file mode 100644 index 0000000000000..3ac617ae871fc --- /dev/null +++ b/llvm/test/CodeGen/DirectX/strip-rootsignatures.ll @@ -0,0 +1,18 @@ +; RUN: opt -S -dxil-prepare < %s | FileCheck %s + +; Ensures that dxil-prepare will remove the dx.rootsignatures metadata + +target triple = "dxil-unknown-shadermodel6.0-compute" + +define void @main() { +entry: + ret void +} + +; CHECK-NOT: !dx.rootsignatures +; CHECK-NOT: {{^!}} + +!dx.rootsignatures = !{!2} ; list of function/root signature pairs +!2 = !{ ptr @main, !3, i32 2 } ; function, root signature +!3 = !{ !4 } ; list of root signature elements +!4 = !{ !"RootFlags", i32 1 } ; 1 = allow_input_assembler_input_layout diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll new file mode 100644 index 0000000000000..62c2cc999456c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/calling-conv-ilp32d.ll @@ -0,0 +1,193 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d --target-abi=ilp32d < %s \ +; RUN: | FileCheck %s + +;; This file contains specific tests for the ilp32d ABI. + +;; Check pass floating-point arguments whith FPRs. + +define i32 @callee_float_in_fpr(i32 %a, float %b, double %c) nounwind { +; CHECK-LABEL: callee_float_in_fpr: +; CHECK: # %bb.0: +; CHECK-NEXT: ftintrz.w.s $fa0, $fa0 +; CHECK-NEXT: movfr2gr.s $a1, $fa0 +; CHECK-NEXT: ftintrz.w.d $fa0, $fa1 +; CHECK-NEXT: movfr2gr.s $a2, $fa0 +; CHECK-NEXT: add.w $a0, $a0, $a1 +; CHECK-NEXT: add.w $a0, $a0, $a2 +; CHECK-NEXT: ret + %b_fptosi = fptosi float %b to i32 + %c_fptosi = fptosi double %c to i32 + %1 = add i32 %a, %b_fptosi + %2 = add i32 %1, %c_fptosi + ret i32 %2 +} + +define i32 @caller_float_in_fpr() nounwind { +; CHECK-LABEL: caller_float_in_fpr: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $sp, $sp, -16 +; CHECK-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; CHECK-NEXT: movgr2fr.w $fa1, $zero +; CHECK-NEXT: movgr2frh.w $fa1, $zero +; CHECK-NEXT: movgr2fr.w $fa0, $zero +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: bl callee_float_in_fpr +; CHECK-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; CHECK-NEXT: addi.w $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call i32 @callee_float_in_fpr(i32 1, float 0.0, double 0.0) + ret i32 %1 +} + +;; Check that the GPR is used once the FPRs are exhausted. + +;; Must keep define on a single line due to an update_llc_test_checks.py limitation. +define i32 @callee_double_in_gpr_exhausted_fprs(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) nounwind { +; CHECK-LABEL: callee_double_in_gpr_exhausted_fprs: +; CHECK: # %bb.0: +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: movgr2frh.w $fa0, $a1 +; CHECK-NEXT: ftintrz.w.d $fa1, $fa7 +; CHECK-NEXT: movfr2gr.s $a0, $fa1 +; CHECK-NEXT: ftintrz.w.d $fa0, $fa0 +; CHECK-NEXT: movfr2gr.s $a1, $fa0 +; CHECK-NEXT: add.w $a0, $a0, $a1 +; CHECK-NEXT: ret + %h_fptosi = fptosi double %h to i32 + %i_fptosi = fptosi double %i to i32 + %1 = add i32 %h_fptosi, %i_fptosi + ret i32 %1 +} + +define i32 @caller_double_in_gpr_exhausted_fprs() nounwind { +; CHECK-LABEL: caller_double_in_gpr_exhausted_fprs: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $sp, $sp, -16 +; CHECK-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1) +; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI3_1) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2) +; CHECK-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI3_2) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3) +; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI3_3) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4) +; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI3_4) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5) +; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI3_5) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6) +; CHECK-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI3_6) +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: ffint.s.w $fa0, $fa0 +; CHECK-NEXT: fcvt.d.s $fa0, $fa0 +; CHECK-NEXT: lu12i.w $a1, 262688 +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: bl callee_double_in_gpr_exhausted_fprs +; CHECK-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; CHECK-NEXT: addi.w $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call i32 @callee_double_in_gpr_exhausted_fprs( + double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, + double 7.0, double 8.0, double 9.0) + ret i32 %1 +} + +;; Check that the stack is used once the FPRs and GPRs are both exhausted. + +;; Must keep define on a single line due to an update_llc_test_checks.py limitation. +define i32 @callee_double_on_stack_exhausted_fprs_gprs(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, double %k, double %l, double %m, double %n) nounwind { +; CHECK-LABEL: callee_double_on_stack_exhausted_fprs_gprs: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.d $fa0, $sp, 0 +; CHECK-NEXT: fld.d $fa1, $sp, 8 +; CHECK-NEXT: ftintrz.w.d $fa0, $fa0 +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: ftintrz.w.d $fa0, $fa1 +; CHECK-NEXT: movfr2gr.s $a1, $fa0 +; CHECK-NEXT: add.w $a0, $a0, $a1 +; CHECK-NEXT: ret + %m_fptosi = fptosi double %m to i32 + %n_fptosi = fptosi double %n to i32 + %1 = add i32 %m_fptosi, %n_fptosi + ret i32 %1 +} + +define i32 @caller_double_on_stack_exhausted_fprs_gprs() nounwind { +; CHECK-LABEL: caller_double_on_stack_exhausted_fprs_gprs: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $sp, $sp, -32 +; CHECK-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; CHECK-NEXT: lu12i.w $a0, 262816 +; CHECK-NEXT: st.w $a0, $sp, 4 +; CHECK-NEXT: st.w $zero, $sp, 0 +; CHECK-NEXT: lu12i.w $a0, 262848 +; CHECK-NEXT: st.w $a0, $sp, 12 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; CHECK-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI5_0) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1) +; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI5_1) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2) +; CHECK-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI5_2) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3) +; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI5_3) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4) +; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI5_4) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5) +; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI5_5) +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6) +; CHECK-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI5_6) +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: ffint.s.w $fa0, $fa0 +; CHECK-NEXT: fcvt.d.s $fa0, $fa0 +; CHECK-NEXT: lu12i.w $a1, 262688 +; CHECK-NEXT: lu12i.w $a3, 262720 +; CHECK-NEXT: lu12i.w $a5, 262752 +; CHECK-NEXT: lu12i.w $a7, 262784 +; CHECK-NEXT: st.w $zero, $sp, 8 +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: move $a2, $zero +; CHECK-NEXT: move $a4, $zero +; CHECK-NEXT: move $a6, $zero +; CHECK-NEXT: bl callee_double_on_stack_exhausted_fprs_gprs +; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; CHECK-NEXT: addi.w $sp, $sp, 32 +; CHECK-NEXT: ret + %1 = call i32 @callee_double_on_stack_exhausted_fprs_gprs( + double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, + double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, + double 13.0, double 14.0) + ret i32 %1 +} + +;; Check returning doubles. + +define double @callee_double_ret() nounwind { +; CHECK-LABEL: callee_double_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: ffint.s.w $fa0, $fa0 +; CHECK-NEXT: fcvt.d.s $fa0, $fa0 +; CHECK-NEXT: ret + ret double 1.0 +} + +define i64 @caller_double_ret() nounwind { +; CHECK-LABEL: caller_double_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $sp, $sp, -16 +; CHECK-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; CHECK-NEXT: bl callee_double_ret +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: movfrh2gr.s $a1, $fa0 +; CHECK-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; CHECK-NEXT: addi.w $sp, $sp, 16 +; CHECK-NEXT: ret + %1 = call double @callee_double_ret() + %2 = bitcast double %1 to i64 + ret i64 %2 +} diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-f.ll b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-f.ll index be9ea29b54c33..c1d75ddd32803 100644 --- a/llvm/test/CodeGen/LoongArch/inline-asm-constraint-f.ll +++ b/llvm/test/CodeGen/LoongArch/inline-asm-constraint-f.ll @@ -32,18 +32,14 @@ define double @constraint_f_double(double %a) nounwind { define double @constraint_gpr(double %a) { ; LA32-LABEL: constraint_gpr: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: .cfi_def_cfa_offset 16 -; LA32-NEXT: fst.d $fa0, $sp, 8 -; LA32-NEXT: ld.w $a7, $sp, 8 -; LA32-NEXT: ld.w $t0, $sp, 12 +; LA32-NEXT: .cfi_def_cfa_offset 0 +; LA32-NEXT: movfr2gr.s $a7, $fa0 +; LA32-NEXT: movfrh2gr.s $t0, $fa0 ; LA32-NEXT: #APP ; LA32-NEXT: move $a6, $a7 ; LA32-NEXT: #NO_APP -; LA32-NEXT: st.w $a7, $sp, 4 -; LA32-NEXT: st.w $a6, $sp, 0 -; LA32-NEXT: fld.d $fa0, $sp, 0 -; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: movgr2fr.w $fa0, $a6 +; LA32-NEXT: movgr2frh.w $fa0, $a7 ; LA32-NEXT: ret ; ; LA64-LABEL: constraint_gpr: diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll index 2a51fd97feb62..0b82ea220d7fb 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll @@ -279,11 +279,8 @@ define double @convert_u64_to_double(i64 %a) nounwind { define double @bitcast_i64_to_double(i64 %a, i64 %b) nounwind { ; LA32-LABEL: bitcast_i64_to_double: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $a1, $sp, 12 -; LA32-NEXT: st.w $a0, $sp, 8 -; LA32-NEXT: fld.d $fa0, $sp, 8 -; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: movgr2frh.w $fa0, $a1 ; LA32-NEXT: ret ; ; LA64-LABEL: bitcast_i64_to_double: @@ -297,11 +294,8 @@ define double @bitcast_i64_to_double(i64 %a, i64 %b) nounwind { define i64 @bitcast_double_to_i64(double %a) nounwind { ; LA32-LABEL: bitcast_double_to_i64: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: fst.d $fa0, $sp, 8 -; LA32-NEXT: ld.w $a0, $sp, 8 -; LA32-NEXT: ld.w $a1, $sp, 12 -; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: movfr2gr.s $a0, $fa0 +; LA32-NEXT: movfrh2gr.s $a1, $fa0 ; LA32-NEXT: ret ; ; LA64-LABEL: bitcast_double_to_i64: diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll index 78cabd37c0ad9..b6507e87f0886 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll @@ -115,9 +115,8 @@ define double @load_acquire_double(ptr %ptr) { ; LA32-NEXT: .cfi_offset 1, -4 ; LA32-NEXT: ori $a1, $zero, 2 ; LA32-NEXT: bl __atomic_load_8 -; LA32-NEXT: st.w $a1, $sp, 4 -; LA32-NEXT: st.w $a0, $sp, 0 -; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: movgr2frh.w $fa0, $a1 ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret @@ -234,9 +233,8 @@ define double @load_unordered_double(ptr %ptr) { ; LA32-NEXT: .cfi_offset 1, -4 ; LA32-NEXT: move $a1, $zero ; LA32-NEXT: bl __atomic_load_8 -; LA32-NEXT: st.w $a1, $sp, 4 -; LA32-NEXT: st.w $a0, $sp, 0 -; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: movgr2frh.w $fa0, $a1 ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret @@ -352,9 +350,8 @@ define double @load_monotonic_double(ptr %ptr) { ; LA32-NEXT: .cfi_offset 1, -4 ; LA32-NEXT: move $a1, $zero ; LA32-NEXT: bl __atomic_load_8 -; LA32-NEXT: st.w $a1, $sp, 4 -; LA32-NEXT: st.w $a0, $sp, 0 -; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: movgr2frh.w $fa0, $a1 ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret @@ -481,9 +478,8 @@ define double @load_seq_cst_double(ptr %ptr) { ; LA32-NEXT: .cfi_offset 1, -4 ; LA32-NEXT: ori $a1, $zero, 5 ; LA32-NEXT: bl __atomic_load_8 -; LA32-NEXT: st.w $a1, $sp, 4 -; LA32-NEXT: st.w $a0, $sp, 0 -; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: movgr2frh.w $fa0, $a1 ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret @@ -605,9 +601,8 @@ define void @store_release_double(ptr %ptr, double %v) { ; LA32-NEXT: .cfi_def_cfa_offset 16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: fst.d $fa0, $sp, 0 -; LA32-NEXT: ld.w $a1, $sp, 0 -; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: movfrh2gr.s $a2, $fa0 ; LA32-NEXT: ori $a3, $zero, 3 ; LA32-NEXT: bl __atomic_store_8 ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload @@ -723,9 +718,8 @@ define void @store_unordered_double(ptr %ptr, double %v) { ; LA32-NEXT: .cfi_def_cfa_offset 16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: fst.d $fa0, $sp, 0 -; LA32-NEXT: ld.w $a1, $sp, 0 -; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: movfrh2gr.s $a2, $fa0 ; LA32-NEXT: move $a3, $zero ; LA32-NEXT: bl __atomic_store_8 ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload @@ -841,9 +835,8 @@ define void @store_monotonic_double(ptr %ptr, double %v) { ; LA32-NEXT: .cfi_def_cfa_offset 16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: fst.d $fa0, $sp, 0 -; LA32-NEXT: ld.w $a1, $sp, 0 -; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: movfrh2gr.s $a2, $fa0 ; LA32-NEXT: move $a3, $zero ; LA32-NEXT: bl __atomic_store_8 ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload @@ -973,9 +966,8 @@ define void @store_seq_cst_double(ptr %ptr, double %v) { ; LA32-NEXT: .cfi_def_cfa_offset 16 ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: fst.d $fa0, $sp, 0 -; LA32-NEXT: ld.w $a1, $sp, 0 -; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: movfrh2gr.s $a2, $fa0 ; LA32-NEXT: ori $a3, $zero, 5 ; LA32-NEXT: bl __atomic_store_8 ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/Mips/msa/compare_float.ll b/llvm/test/CodeGen/Mips/msa/compare_float.ll index 2656cb839768c..178264581ea19 100644 --- a/llvm/test/CodeGen/Mips/msa/compare_float.ll +++ b/llvm/test/CodeGen/Mips/msa/compare_float.ll @@ -1,661 +1,645 @@ -; RUN: llc -mtriple=mips-elf -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s -; RUN: llc -mtriple=mipsel-elf -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=mips-elf -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s --check-prefixes=CHECK,MIPS +; RUN: llc -mtriple=mipsel-elf -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s --check-prefixes=CHECK,MIPSEL declare <4 x float> @llvm.mips.fmax.w(<4 x float>, <4 x float>) nounwind declare <2 x double> @llvm.mips.fmax.d(<2 x double>, <2 x double>) nounwind declare <4 x float> @llvm.mips.fmin.w(<4 x float>, <4 x float>) nounwind declare <2 x double> @llvm.mips.fmin.d(<2 x double>, <2 x double>) nounwind +; (setcc $a, $b, SETFALSE) is always folded, so we won't get fcaf define void @false_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: false_v4f32: - +; CHECK-LABEL: false_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ldi.b $w0, 0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a %2 = load <4 x float>, ptr %b %3 = fcmp false <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> store <4 x i32> %4, ptr %c ret void - - ; (setcc $a, $b, SETFALSE) is always folded, so we won't get fcaf: - ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], 0 - ; CHECK-DAG: st.w [[R1]], 0($4) - ; CHECK: .size false_v4f32 } +; (setcc $a, $b, SETFALSE) is always folded define void @false_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: false_v2f64: - +; CHECK-LABEL: false_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ldi.b $w0, 0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <2 x double>, ptr %a %2 = load <2 x double>, ptr %b %3 = fcmp false <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> store <2 x i64> %4, ptr %c ret void - - ; (setcc $a, $b, SETFALSE) is always folded - ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], 0 - ; CHECK-DAG: st.w [[R1]], 0($4) - ; CHECK: .size false_v2f64 } define void @oeq_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: oeq_v4f32: - +; CHECK-LABEL: oeq_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($6) +; CHECK-NEXT: ld.w $w1, 0($5) +; CHECK-NEXT: fceq.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp oeq <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size oeq_v4f32 } define void @oeq_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: oeq_v2f64: - +; CHECK-LABEL: oeq_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($6) +; CHECK-NEXT: ld.d $w1, 0($5) +; CHECK-NEXT: fceq.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp oeq <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size oeq_v2f64 } define void @oge_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: oge_v4f32: - +; CHECK-LABEL: oge_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($5) +; CHECK-NEXT: ld.w $w1, 0($6) +; CHECK-NEXT: fcle.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp oge <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fcle.w [[R3:\$w[0-9]+]], [[R2]], [[R1]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size oge_v4f32 } define void @oge_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: oge_v2f64: - +; CHECK-LABEL: oge_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($5) +; CHECK-NEXT: ld.d $w1, 0($6) +; CHECK-NEXT: fcle.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp oge <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fcle.d [[R3:\$w[0-9]+]], [[R2]], [[R1]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size oge_v2f64 } define void @ogt_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ogt_v4f32: - +; CHECK-LABEL: ogt_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($5) +; CHECK-NEXT: ld.w $w1, 0($6) +; CHECK-NEXT: fclt.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ogt <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fclt.w [[R3:\$w[0-9]+]], [[R2]], [[R1]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size ogt_v4f32 } define void @ogt_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ogt_v2f64: - +; CHECK-LABEL: ogt_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($5) +; CHECK-NEXT: ld.d $w1, 0($6) +; CHECK-NEXT: fclt.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ogt <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fclt.d [[R3:\$w[0-9]+]], [[R2]], [[R1]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size ogt_v2f64 } define void @ole_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ole_v4f32: - +; CHECK-LABEL: ole_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($6) +; CHECK-NEXT: ld.w $w1, 0($5) +; CHECK-NEXT: fcle.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ole <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fcle.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size ole_v4f32 } define void @ole_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ole_v2f64: - +; CHECK-LABEL: ole_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($6) +; CHECK-NEXT: ld.d $w1, 0($5) +; CHECK-NEXT: fcle.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ole <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fcle.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size ole_v2f64 } define void @olt_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: olt_v4f32: - +; CHECK-LABEL: olt_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($6) +; CHECK-NEXT: ld.w $w1, 0($5) +; CHECK-NEXT: fclt.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp olt <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fclt.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size olt_v4f32 } define void @olt_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: olt_v2f64: - +; CHECK-LABEL: olt_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($6) +; CHECK-NEXT: ld.d $w1, 0($5) +; CHECK-NEXT: fclt.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp olt <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fclt.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size olt_v2f64 } define void @one_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: one_v4f32: - +; CHECK-LABEL: one_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($6) +; CHECK-NEXT: ld.w $w1, 0($5) +; CHECK-NEXT: fcne.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp one <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fcne.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size one_v4f32 } define void @one_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: one_v2f64: - +; CHECK-LABEL: one_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($6) +; CHECK-NEXT: ld.d $w1, 0($5) +; CHECK-NEXT: fcne.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp one <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fcne.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size one_v2f64 } define void @ord_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ord_v4f32: - +; CHECK-LABEL: ord_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($6) +; CHECK-NEXT: ld.w $w1, 0($5) +; CHECK-NEXT: fcor.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ord <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fcor.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size ord_v4f32 } define void @ord_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ord_v2f64: - +; CHECK-LABEL: ord_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($6) +; CHECK-NEXT: ld.d $w1, 0($5) +; CHECK-NEXT: fcor.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ord <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fcor.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size ord_v2f64 } define void @ueq_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ueq_v4f32: - +; CHECK-LABEL: ueq_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($6) +; CHECK-NEXT: ld.w $w1, 0($5) +; CHECK-NEXT: fcueq.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ueq <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fcueq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size ueq_v4f32 } define void @ueq_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ueq_v2f64: - +; CHECK-LABEL: ueq_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($6) +; CHECK-NEXT: ld.d $w1, 0($5) +; CHECK-NEXT: fcueq.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ueq <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fcueq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size ueq_v2f64 } define void @uge_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: uge_v4f32: - +; CHECK-LABEL: uge_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($5) +; CHECK-NEXT: ld.w $w1, 0($6) +; CHECK-NEXT: fcule.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp uge <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fcule.w [[R3:\$w[0-9]+]], [[R2]], [[R1]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size uge_v4f32 } define void @uge_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: uge_v2f64: - +; CHECK-LABEL: uge_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($5) +; CHECK-NEXT: ld.d $w1, 0($6) +; CHECK-NEXT: fcule.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp uge <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fcule.d [[R3:\$w[0-9]+]], [[R2]], [[R1]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size uge_v2f64 } define void @ugt_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ugt_v4f32: - +; CHECK-LABEL: ugt_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($5) +; CHECK-NEXT: ld.w $w1, 0($6) +; CHECK-NEXT: fcult.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ugt <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fcult.w [[R3:\$w[0-9]+]], [[R2]], [[R1]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size ugt_v4f32 } define void @ugt_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ugt_v2f64: - +; CHECK-LABEL: ugt_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($5) +; CHECK-NEXT: ld.d $w1, 0($6) +; CHECK-NEXT: fcult.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ugt <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fcult.d [[R3:\$w[0-9]+]], [[R2]], [[R1]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size ugt_v2f64 } define void @ule_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ule_v4f32: - +; CHECK-LABEL: ule_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($6) +; CHECK-NEXT: ld.w $w1, 0($5) +; CHECK-NEXT: fcule.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ule <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fcule.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size ule_v4f32 } define void @ule_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ule_v2f64: - +; CHECK-LABEL: ule_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($6) +; CHECK-NEXT: ld.d $w1, 0($5) +; CHECK-NEXT: fcule.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ule <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fcule.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size ule_v2f64 } define void @ult_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ult_v4f32: - +; CHECK-LABEL: ult_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($6) +; CHECK-NEXT: ld.w $w1, 0($5) +; CHECK-NEXT: fcult.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ult <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fcult.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size ult_v4f32 } define void @ult_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: ult_v2f64: - +; CHECK-LABEL: ult_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($6) +; CHECK-NEXT: ld.d $w1, 0($5) +; CHECK-NEXT: fcult.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ult <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fcult.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size ult_v2f64 } define void @uno_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: uno_v4f32: - +; CHECK-LABEL: uno_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($6) +; CHECK-NEXT: ld.w $w1, 0($5) +; CHECK-NEXT: fcun.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp uno <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> - ; CHECK-DAG: fcun.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <4 x i32> %4, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size uno_v4f32 } define void @uno_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: uno_v2f64: - +; CHECK-LABEL: uno_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($6) +; CHECK-NEXT: ld.d $w1, 0($5) +; CHECK-NEXT: fcun.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp uno <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> - ; CHECK-DAG: fcun.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <2 x i64> %4, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size uno_v2f64 } +; (setcc $a, $b, SETTRUE) is always folded, so we won't get fcaf define void @true_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: true_v4f32: - +; CHECK-LABEL: true_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ldi.b $w0, -1 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a %2 = load <4 x float>, ptr %b %3 = fcmp true <4 x float> %1, %2 %4 = sext <4 x i1> %3 to <4 x i32> store <4 x i32> %4, ptr %c ret void - - ; (setcc $a, $b, SETTRUE) is always folded, so we won't get fcaf: - ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], -1 - ; CHECK-DAG: st.w [[R1]], 0($4) - ; CHECK: .size true_v4f32 } +; (setcc $a, $b, SETTRUE) is always folded. define void @true_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: true_v2f64: - +; CHECK-LABEL: true_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ldi.b $w0, -1 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <2 x double>, ptr %a %2 = load <2 x double>, ptr %b %3 = fcmp true <2 x double> %1, %2 %4 = sext <2 x i1> %3 to <2 x i64> store <2 x i64> %4, ptr %c ret void - - ; (setcc $a, $b, SETTRUE) is always folded. - ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], -1 - ; CHECK-DAG: st.w [[R1]], 0($4) - ; CHECK: .size true_v2f64 } -define void @bsel_v4f32(ptr %d, ptr %a, ptr %b, - ptr %c) nounwind { - ; CHECK: bsel_v4f32: - +; Note that IfSet and IfClr are swapped since the condition is inverted +define void @bsel_v4f32(ptr %d, ptr %a, ptr %b, ptr %c) nounwind { +; CHECK-LABEL: bsel_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($5) +; CHECK-NEXT: ld.w $w1, 0($6) +; CHECK-NEXT: fclt.w $w1, $w1, $w0 +; CHECK-NEXT: ld.w $w2, 0($7) +; CHECK-NEXT: bsel.v $w1, $w2, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w1, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = load <4 x float>, ptr %c - ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) %4 = fcmp ogt <4 x float> %1, %2 - ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]] %5 = select <4 x i1> %4, <4 x float> %1, <4 x float> %3 - ; Note that IfSet and IfClr are swapped since the condition is inverted - ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]] store <4 x float> %5, ptr %d - ; CHECK-DAG: st.w [[R4]], 0($4) - ret void - ; CHECK: .size bsel_v4f32 } -define void @bsel_v2f64(ptr %d, ptr %a, ptr %b, - ptr %c) nounwind { - ; CHECK: bsel_v2f64: - +; Note that IfSet and IfClr are swapped since the condition is inverted +define void @bsel_v2f64(ptr %d, ptr %a, ptr %b, ptr %c) nounwind { +; CHECK-LABEL: bsel_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($5) +; CHECK-NEXT: ld.d $w1, 0($6) +; CHECK-NEXT: fclt.d $w1, $w1, $w0 +; CHECK-NEXT: ld.d $w2, 0($7) +; CHECK-NEXT: bsel.v $w1, $w2, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w1, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = load <2 x double>, ptr %c - ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) %4 = fcmp ogt <2 x double> %1, %2 - ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]] %5 = select <2 x i1> %4, <2 x double> %1, <2 x double> %3 - ; Note that IfSet and IfClr are swapped since the condition is inverted - ; CHECK-DAG: bsel.v [[R4]], [[R3]], [[R1]] store <2 x double> %5, ptr %d - ; CHECK-DAG: st.d [[R4]], 0($4) - ret void - ; CHECK: .size bsel_v2f64 } -define void @bseli_v4f32(ptr %d, ptr %a, ptr %b, - ptr %c) nounwind { - ; CHECK: bseli_v4f32: - +; Note that IfSet and IfClr are swapped since the condition is inverted +define void @bseli_v4f32(ptr %d, ptr %a, ptr %b, ptr %c) nounwind { +; MIPS-LABEL: bseli_v4f32: +; MIPS: # %bb.0: +; MIPS-NEXT: ld.w $w0, 0($5) +; MIPS-NEXT: ld.w $w1, 0($6) +; MIPS-NEXT: fclt.w $w1, $w1, $w0 +; MIPS-NEXT: ldi.b $w2, 0 +; MIPS-NEXT: shf.b $w2, $w2, 27 +; MIPS-NEXT: bsel.v $w1, $w2, $w0 +; MIPS-NEXT: jr $ra +; MIPS-NEXT: st.w $w1, 0($4) +; +; MIPSEL-LABEL: bseli_v4f32: +; MIPSEL: # %bb.0: +; MIPSEL-NEXT: ld.w $w0, 0($5) +; MIPSEL-NEXT: ld.w $w1, 0($6) +; MIPSEL-NEXT: fclt.w $w1, $w1, $w0 +; MIPSEL-NEXT: ldi.b $w2, 0 +; MIPSEL-NEXT: bsel.v $w1, $w2, $w0 +; MIPSEL-NEXT: jr $ra +; MIPSEL-NEXT: st.w $w1, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ogt <4 x float> %1, %2 - ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]] %4 = select <4 x i1> %3, <4 x float> %1, <4 x float> zeroinitializer - ; Note that IfSet and IfClr are swapped since the condition is inverted - ; CHECK-DAG: bsel.v [[R4]], [[R3:\$w[0-9]+]], [[R1]] store <4 x float> %4, ptr %d - ; CHECK-DAG: st.w [[R4]], 0($4) - ret void - ; CHECK: .size bseli_v4f32 } -define void @bseli_v2f64(ptr %d, ptr %a, ptr %b, - ptr %c) nounwind { - ; CHECK: bseli_v2f64: - +; Note that IfSet and IfClr are swapped since the condition is inverted +define void @bseli_v2f64(ptr %d, ptr %a, ptr %b, ptr %c) nounwind { +; MIPS-LABEL: bseli_v2f64: +; MIPS: # %bb.0: +; MIPS-NEXT: ld.d $w0, 0($5) +; MIPS-NEXT: ld.d $w1, 0($6) +; MIPS-NEXT: fclt.d $w1, $w1, $w0 +; MIPS-NEXT: ldi.b $w2, 0 +; MIPS-NEXT: shf.b $w2, $w2, 27 +; MIPS-NEXT: shf.w $w2, $w2, 177 +; MIPS-NEXT: bsel.v $w1, $w2, $w0 +; MIPS-NEXT: jr $ra +; MIPS-NEXT: st.d $w1, 0($4) +; +; MIPSEL-LABEL: bseli_v2f64: +; MIPSEL: # %bb.0: +; MIPSEL-NEXT: ld.d $w0, 0($5) +; MIPSEL-NEXT: ld.d $w1, 0($6) +; MIPSEL-NEXT: fclt.d $w1, $w1, $w0 +; MIPSEL-NEXT: ldi.b $w2, 0 +; MIPSEL-NEXT: bsel.v $w1, $w2, $w0 +; MIPSEL-NEXT: jr $ra +; MIPSEL-NEXT: st.d $w1, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = fcmp ogt <2 x double> %1, %2 - ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]] %4 = select <2 x i1> %3, <2 x double> %1, <2 x double> zeroinitializer - ; Note that IfSet and IfClr are swapped since the condition is inverted - ; CHECK-DAG: bsel.v [[R4]], [[R3:\$w[0-9]+]], [[R1]] store <2 x double> %4, ptr %d - ; CHECK-DAG: st.d [[R4]], 0($4) - ret void - ; CHECK: .size bseli_v2f64 } define void @max_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: max_v4f32: - +; CHECK-LABEL: max_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($6) +; CHECK-NEXT: ld.w $w1, 0($5) +; CHECK-NEXT: fmax.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = tail call <4 x float> @llvm.mips.fmax.w(<4 x float> %1, <4 x float> %2) - ; CHECK-DAG: fmax.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <4 x float> %3, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size max_v4f32 } define void @max_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: max_v2f64: - +; CHECK-LABEL: max_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($6) +; CHECK-NEXT: ld.d $w1, 0($5) +; CHECK-NEXT: fmax.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = tail call <2 x double> @llvm.mips.fmax.d(<2 x double> %1, <2 x double> %2) - ; CHECK-DAG: fmax.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <2 x double> %3, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size max_v2f64 } define void @min_v4f32(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: min_v4f32: - +; CHECK-LABEL: min_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $w0, 0($6) +; CHECK-NEXT: ld.w $w1, 0($5) +; CHECK-NEXT: fmin.w $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.w $w0, 0($4) %1 = load <4 x float>, ptr %a - ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = load <4 x float>, ptr %b - ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = tail call <4 x float> @llvm.mips.fmin.w(<4 x float> %1, <4 x float> %2) - ; CHECK-DAG: fmin.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <4 x float> %3, ptr %c - ; CHECK-DAG: st.w [[R3]], 0($4) - ret void - ; CHECK: .size min_v4f32 } define void @min_v2f64(ptr %c, ptr %a, ptr %b) nounwind { - ; CHECK: min_v2f64: - +; CHECK-LABEL: min_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $w0, 0($6) +; CHECK-NEXT: ld.d $w1, 0($5) +; CHECK-NEXT: fmin.d $w0, $w1, $w0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: st.d $w0, 0($4) %1 = load <2 x double>, ptr %a - ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) %2 = load <2 x double>, ptr %b - ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) %3 = tail call <2 x double> @llvm.mips.fmin.d(<2 x double> %1, <2 x double> %2) - ; CHECK-DAG: fmin.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] store <2 x double> %3, ptr %c - ; CHECK-DAG: st.d [[R3]], 0($4) - ret void - ; CHECK: .size min_v2f64 } diff --git a/llvm/test/CodeGen/NVPTX/alias.ll b/llvm/test/CodeGen/NVPTX/alias.ll index 8ae29b51290ef..01761c21ab103 100644 --- a/llvm/test/CodeGen/NVPTX/alias.ll +++ b/llvm/test/CodeGen/NVPTX/alias.ll @@ -56,8 +56,7 @@ attributes #0 = { noreturn } ; CHECK-NEXT: .noreturn ; CHECK: .visible .func (.param .b32 func_retval0) z() -; CHECK: call.uni (retval0), -; CHECK-NEXT: b, +; CHECK: call.uni (retval0), b, ; CHECK: .alias b, a; diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll index 6f115756a8ae7..01e4065a7baa7 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll @@ -216,12 +216,7 @@ define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; CHECK-NEXT: .param .align 4 .b8 param1[4]; ; CHECK-NEXT: st.param.b32 [param1], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; diff --git a/llvm/test/CodeGen/NVPTX/byval-const-global.ll b/llvm/test/CodeGen/NVPTX/byval-const-global.ll index 2af1e6d7e185b..ad9e4b089e8d8 100644 --- a/llvm/test/CodeGen/NVPTX/byval-const-global.ll +++ b/llvm/test/CodeGen/NVPTX/byval-const-global.ll @@ -19,11 +19,7 @@ define void @foo() { ; CHECK-NEXT: .param .align 8 .b8 param0[16]; ; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: st.param.b64 [param0+8], %rd2; -; CHECK-NEXT: call.uni -; CHECK-NEXT: bar, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni bar, (param0); ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: ret; call void @bar(ptr byval(%struct) @G) diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll index a2175dd009f5f..0cd7058174d67 100644 --- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll +++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll @@ -48,8 +48,7 @@ entry: ; CHECK-NEXT: st.param.b64 [param0], %rd[[A_REG]] ; CHECK-NEXT: .param .b64 param1; ; CHECK-NEXT: st.param.b64 [param1], %rd[[SP_REG]] -; CHECK-NEXT: call.uni -; CHECK-NEXT: callee, +; CHECK-NEXT: call.uni callee, call void @callee(ptr %a, ptr %buf) #2 ret void diff --git a/llvm/test/CodeGen/NVPTX/combine-mad.ll b/llvm/test/CodeGen/NVPTX/combine-mad.ll index dc6d504c2c66c..2232810d02128 100644 --- a/llvm/test/CodeGen/NVPTX/combine-mad.ll +++ b/llvm/test/CodeGen/NVPTX/combine-mad.ll @@ -203,12 +203,7 @@ define i32 @test_mad_multi_use(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: .param .b32 param1; ; CHECK-NEXT: st.param.b32 [param1], %r5; ; CHECK-NEXT: .param .b32 retval0; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: use, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), use, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r6, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.param.b32 [func_retval0], %r6; diff --git a/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll b/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll index 5e85bf4554546..39a2d7f9e1504 100644 --- a/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll +++ b/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll @@ -9,18 +9,16 @@ declare void @conv() convergent declare void @not_conv() define void @test(ptr %f) { - ; CHECK: ConvergentCallUniPrintCall - ; CHECK-NEXT: @conv + ; CHECK: CALL_UNI_conv @conv call void @conv() - ; CHECK: CallUniPrintCall - ; CHECK-NEXT: @not_conv + ; CHECK: CALL_UNI @not_conv call void @not_conv() - ; CHECK: ConvergentCallPrintCall + ; CHECK: CALL_conv %{{[0-9]+}} call void %f() convergent - ; CHECK: CallPrintCall + ; CHECK: CALL %{{[0-9]+}} call void %f() ret void diff --git a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll index 71a46fa6d4820..d1b478d341915 100644 --- a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll +++ b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll @@ -9,12 +9,7 @@ define %struct.64 @test_return_type_mismatch(ptr %p) { ; CHECK-LABEL: test_return_type_mismatch( ; CHECK: .param .align 1 .b8 retval0[8]; ; CHECK-NEXT: prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _); -; CHECK-NEXT: call (retval0), -; CHECK-NEXT: %rd -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ) -; CHECK-NEXT: , prototype_0; +; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_0; %ret = call %struct.64 @callee(ptr %p) ret %struct.64 %ret } @@ -23,12 +18,7 @@ define i64 @test_param_type_mismatch(ptr %p) { ; CHECK-LABEL: test_param_type_mismatch( ; CHECK: .param .b64 retval0; ; CHECK-NEXT: prototype_1 : .callprototype (.param .b64 _) _ (.param .b64 _); -; CHECK-NEXT: call (retval0), -; CHECK-NEXT: %rd -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ) -; CHECK-NEXT: , prototype_1; +; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_1; %ret = call i64 @callee(i64 7) ret i64 %ret } @@ -37,13 +27,7 @@ define i64 @test_param_count_mismatch(ptr %p) { ; CHECK-LABEL: test_param_count_mismatch( ; CHECK: .param .b64 retval0; ; CHECK-NEXT: prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _); -; CHECK-NEXT: call (retval0), -; CHECK-NEXT: %rd -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ) -; CHECK-NEXT: , prototype_2; +; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0, param1), prototype_2; %ret = call i64 @callee(ptr %p, i64 7) ret i64 %ret } @@ -52,12 +36,7 @@ define %struct.64 @test_return_type_mismatch_variadic(ptr %p) { ; CHECK-LABEL: test_return_type_mismatch_variadic( ; CHECK: .param .align 1 .b8 retval0[8]; ; CHECK-NEXT: prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _); -; CHECK-NEXT: call (retval0), -; CHECK-NEXT: %rd -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ) -; CHECK-NEXT: , prototype_3; +; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_3; %ret = call %struct.64 (ptr, ...) @callee_variadic(ptr %p) ret %struct.64 %ret } @@ -65,12 +44,7 @@ define %struct.64 @test_return_type_mismatch_variadic(ptr %p) { define i64 @test_param_type_mismatch_variadic(ptr %p) { ; CHECK-LABEL: test_param_type_mismatch_variadic( ; CHECK: .param .b64 retval0; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: callee_variadic -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ) +; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1); %ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7) ret i64 %ret } @@ -78,12 +52,7 @@ define i64 @test_param_type_mismatch_variadic(ptr %p) { define i64 @test_param_count_mismatch_variadic(ptr %p) { ; CHECK-LABEL: test_param_count_mismatch_variadic( ; CHECK: .param .b64 retval0; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: callee_variadic -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ) +; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1); %ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7) ret i64 %ret } diff --git a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll index b73aea76a4528..4d2ba7d00f872 100644 --- a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll +++ b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll @@ -24,11 +24,7 @@ define i32 @test_dynamic_stackalloc(i64 %n) { ; CHECK-32-NEXT: .param .b32 param0; ; CHECK-32-NEXT: st.param.b32 [param0], %r5; ; CHECK-32-NEXT: .param .b32 retval0; -; CHECK-32-NEXT: call.uni (retval0), -; CHECK-32-NEXT: bar, -; CHECK-32-NEXT: ( -; CHECK-32-NEXT: param0 -; CHECK-32-NEXT: ); +; CHECK-32-NEXT: call.uni (retval0), bar, (param0); ; CHECK-32-NEXT: ld.param.b32 %r6, [retval0]; ; CHECK-32-NEXT: } // callseq 0 ; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6; @@ -49,11 +45,7 @@ define i32 @test_dynamic_stackalloc(i64 %n) { ; CHECK-64-NEXT: .param .b64 param0; ; CHECK-64-NEXT: st.param.b64 [param0], %rd5; ; CHECK-64-NEXT: .param .b32 retval0; -; CHECK-64-NEXT: call.uni (retval0), -; CHECK-64-NEXT: bar, -; CHECK-64-NEXT: ( -; CHECK-64-NEXT: param0 -; CHECK-64-NEXT: ); +; CHECK-64-NEXT: call.uni (retval0), bar, (param0); ; CHECK-64-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-64-NEXT: } // callseq 0 ; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1; diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll index c905fc04ce780..252edf4b02c76 100644 --- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll @@ -263,12 +263,7 @@ declare half @test_callee(half %a, half %b) #0 ; CHECK-DAG: st.param.b16 [param0], [[A]]; ; CHECK-DAG: st.param.b16 [param1], [[B]]; ; CHECK-DAG: .param .align 2 .b8 retval0[2]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; @@ -287,12 +282,7 @@ define half @test_call(half %a, half %b) #0 { ; CHECK-DAG: st.param.b16 [param0], [[B]]; ; CHECK-DAG: st.param.b16 [param1], [[A]]; ; CHECK-DAG: .param .align 2 .b8 retval0[2]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; @@ -311,12 +301,7 @@ define half @test_call_flipped(half %a, half %b) #0 { ; CHECK-DAG: st.param.b16 [param0], [[B]]; ; CHECK-DAG: st.param.b16 [param1], [[A]]; ; CHECK-DAG: .param .align 2 .b8 retval0[2]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; @@ -650,8 +635,7 @@ else: ; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]]; ; CHECK: { ; CHECK: st.param.b64 [param0], %[[P1]]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_dummy +; CHECK: call.uni (retval0), test_dummy ; CHECK: } ; CHECK: setp.ne.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 0; ; CHECK: @[[PRED]] bra [[LOOP]]; diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index fc7f53c5fdca3..8da2c1d1ebac2 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -467,12 +467,7 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: .param .align 4 .b8 param1[4]; ; CHECK-NEXT: st.param.b32 [param1], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; @@ -495,12 +490,7 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: .param .align 4 .b8 param1[4]; ; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; @@ -523,12 +513,7 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: .param .align 4 .b8 param1[4]; ; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 2 ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll index 327851725991e..b74e531adba3f 100644 --- a/llvm/test/CodeGen/NVPTX/fma.ll +++ b/llvm/test/CodeGen/NVPTX/fma.ll @@ -40,12 +40,7 @@ define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) { ; CHECK-NEXT: .param .b32 param1; ; CHECK-NEXT: st.param.b32 [param1], %r6; ; CHECK-NEXT: .param .b32 retval0; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: dummy_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), dummy_f32, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r7, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.param.b32 [func_retval0], %r7; @@ -92,12 +87,7 @@ define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) { ; CHECK-NEXT: .param .b64 param1; ; CHECK-NEXT: st.param.b64 [param1], %rd6; ; CHECK-NEXT: .param .b64 retval0; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: dummy_f64, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), dummy_f64, (param0, param1); ; CHECK-NEXT: ld.param.b64 %rd7, [retval0]; ; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; diff --git a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll index d253df5ed1b9c..ed8f6b4511079 100644 --- a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll +++ b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll @@ -50,11 +50,7 @@ define void @test_ld_param_escaping(ptr byval(i32) %a) { ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .b64 param0; ; CHECK-NEXT: st.param.b64 [param0], %rd2; -; CHECK-NEXT: call.uni -; CHECK-NEXT: escape, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni escape, (param0); ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: ret; call void @escape(ptr %a) @@ -72,11 +68,7 @@ define void @test_ld_param_byval(ptr byval(i32) %a) { ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.b32 [param0], %r1; -; CHECK-NEXT: call.uni -; CHECK-NEXT: byval_user, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni byval_user, (param0); ; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: ret; call void @byval_user(ptr %a) diff --git a/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll b/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll index d40f514acd408..de69d02ded5e4 100644 --- a/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll +++ b/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll @@ -42,11 +42,7 @@ define void @call(fp128 %x) { ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v2.b64 [param0], {%rd1, %rd2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call, (param0); ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: ret; call void @call(fp128 %x) diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll index bf1fb06c44688..d5ddadf2b21c5 100644 --- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll @@ -647,12 +647,7 @@ define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-NEXT: .param .align 4 .b8 param1[4]; ; COMMON-NEXT: st.param.b32 [param1], %r2; ; COMMON-NEXT: .param .align 4 .b8 retval0[4]; -; COMMON-NEXT: call.uni (retval0), -; COMMON-NEXT: test_callee, -; COMMON-NEXT: ( -; COMMON-NEXT: param0, -; COMMON-NEXT: param1 -; COMMON-NEXT: ); +; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1); ; COMMON-NEXT: ld.param.b32 %r3, [retval0]; ; COMMON-NEXT: } // callseq 0 ; COMMON-NEXT: st.param.b32 [func_retval0], %r3; @@ -675,12 +670,7 @@ define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-NEXT: .param .align 4 .b8 param1[4]; ; COMMON-NEXT: st.param.b32 [param1], %r1; ; COMMON-NEXT: .param .align 4 .b8 retval0[4]; -; COMMON-NEXT: call.uni (retval0), -; COMMON-NEXT: test_callee, -; COMMON-NEXT: ( -; COMMON-NEXT: param0, -; COMMON-NEXT: param1 -; COMMON-NEXT: ); +; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1); ; COMMON-NEXT: ld.param.b32 %r3, [retval0]; ; COMMON-NEXT: } // callseq 1 ; COMMON-NEXT: st.param.b32 [func_retval0], %r3; @@ -703,12 +693,7 @@ define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-NEXT: .param .align 4 .b8 param1[4]; ; COMMON-NEXT: st.param.b32 [param1], %r1; ; COMMON-NEXT: .param .align 4 .b8 retval0[4]; -; COMMON-NEXT: call.uni (retval0), -; COMMON-NEXT: test_callee, -; COMMON-NEXT: ( -; COMMON-NEXT: param0, -; COMMON-NEXT: param1 -; COMMON-NEXT: ); +; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1); ; COMMON-NEXT: ld.param.b32 %r3, [retval0]; ; COMMON-NEXT: } // callseq 2 ; COMMON-NEXT: st.param.b32 [func_retval0], %r3; diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index 7cc7468bc7de7..72c279bee4268 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -833,12 +833,7 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: .param .align 4 .b8 param1[4]; ; CHECK-NEXT: st.param.b32 [param1], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; @@ -861,12 +856,7 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: .param .align 4 .b8 param1[4]; ; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; @@ -889,12 +879,7 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; CHECK-NEXT: .param .align 4 .b8 param1[4]; ; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 2 ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll index 1341a04c939c6..eae0321433946 100644 --- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll +++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll @@ -33,13 +33,7 @@ define internal i32 @foo() { ; CHECK-NEXT: st.param.b64 [param1], %rd4; ; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _); -; CHECK-NEXT: call (retval0), -; CHECK-NEXT: %rd1, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ) -; CHECK-NEXT: , prototype_0; +; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_0; ; CHECK-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.param.b32 [func_retval0], %r1; @@ -76,13 +70,7 @@ define internal i32 @bar() { ; CHECK-NEXT: st.param.b64 [param1], %rd5; ; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _); -; CHECK-NEXT: call (retval0), -; CHECK-NEXT: %rd1, -; CHECK-NEXT: ( -; CHECK-NEXT: param0, -; CHECK-NEXT: param1 -; CHECK-NEXT: ) -; CHECK-NEXT: , prototype_1; +; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_1; ; CHECK-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: st.param.b32 [func_retval0], %r1; diff --git a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll index 419c780f7d82a..9e9705709f2bd 100644 --- a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll +++ b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll @@ -14,10 +14,7 @@ define void @foo(ptr %ptr) { ; CHECK-NEXT: ld.param.b64 %rd1, [foo_param_0]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 16 .b8 retval0[16]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: bar, -; CHECK-NEXT: ( -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), bar, (); ; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.v4.b32 [%rd1], {%r1, %r2, %r3, %r4}; diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll index 2bfd891a04a17..a9004d00e7807 100644 --- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll +++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll @@ -58,11 +58,7 @@ define ptx_kernel void @foo2(i32 %a) { ; PTX32-NEXT: { // callseq 0, 0 ; PTX32-NEXT: .param .b32 param0; ; PTX32-NEXT: st.param.b32 [param0], %r2; -; PTX32-NEXT: call.uni -; PTX32-NEXT: bar, -; PTX32-NEXT: ( -; PTX32-NEXT: param0 -; PTX32-NEXT: ); +; PTX32-NEXT: call.uni bar, (param0); ; PTX32-NEXT: } // callseq 0 ; PTX32-NEXT: ret; ; @@ -84,11 +80,7 @@ define ptx_kernel void @foo2(i32 %a) { ; PTX64-NEXT: { // callseq 0, 0 ; PTX64-NEXT: .param .b64 param0; ; PTX64-NEXT: st.param.b64 [param0], %rd1; -; PTX64-NEXT: call.uni -; PTX64-NEXT: bar, -; PTX64-NEXT: ( -; PTX64-NEXT: param0 -; PTX64-NEXT: ); +; PTX64-NEXT: call.uni bar, (param0); ; PTX64-NEXT: } // callseq 0 ; PTX64-NEXT: ret; %local = alloca i32, align 4 @@ -159,20 +151,12 @@ define void @foo4() { ; PTX32-NEXT: { // callseq 1, 0 ; PTX32-NEXT: .param .b32 param0; ; PTX32-NEXT: st.param.b32 [param0], %r1; -; PTX32-NEXT: call.uni -; PTX32-NEXT: bar, -; PTX32-NEXT: ( -; PTX32-NEXT: param0 -; PTX32-NEXT: ); +; PTX32-NEXT: call.uni bar, (param0); ; PTX32-NEXT: } // callseq 1 ; PTX32-NEXT: { // callseq 2, 0 ; PTX32-NEXT: .param .b32 param0; ; PTX32-NEXT: st.param.b32 [param0], %r3; -; PTX32-NEXT: call.uni -; PTX32-NEXT: bar, -; PTX32-NEXT: ( -; PTX32-NEXT: param0 -; PTX32-NEXT: ); +; PTX32-NEXT: call.uni bar, (param0); ; PTX32-NEXT: } // callseq 2 ; PTX32-NEXT: ret; ; @@ -197,20 +181,12 @@ define void @foo4() { ; PTX64-NEXT: { // callseq 1, 0 ; PTX64-NEXT: .param .b64 param0; ; PTX64-NEXT: st.param.b64 [param0], %rd1; -; PTX64-NEXT: call.uni -; PTX64-NEXT: bar, -; PTX64-NEXT: ( -; PTX64-NEXT: param0 -; PTX64-NEXT: ); +; PTX64-NEXT: call.uni bar, (param0); ; PTX64-NEXT: } // callseq 1 ; PTX64-NEXT: { // callseq 2, 0 ; PTX64-NEXT: .param .b64 param0; ; PTX64-NEXT: st.param.b64 [param0], %rd3; -; PTX64-NEXT: call.uni -; PTX64-NEXT: bar, -; PTX64-NEXT: ( -; PTX64-NEXT: param0 -; PTX64-NEXT: ); +; PTX64-NEXT: call.uni bar, (param0); ; PTX64-NEXT: } // callseq 2 ; PTX64-NEXT: ret; %A = alloca i32 diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll index c3f94455b3038..0a2cd81ac904c 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll @@ -133,12 +133,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; PTX-NEXT: st.param.b64 [param0], %rd3; ; PTX-NEXT: .param .b32 retval0; ; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _); -; PTX-NEXT: call (retval0), -; PTX-NEXT: %rd1, -; PTX-NEXT: ( -; PTX-NEXT: param0 -; PTX-NEXT: ) -; PTX-NEXT: , prototype_0; +; PTX-NEXT: call (retval0), %rd1, (param0), prototype_0; ; PTX-NEXT: ld.param.b32 %r1, [retval0]; ; PTX-NEXT: } // callseq 0 ; PTX-NEXT: ret; @@ -182,14 +177,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: st.param.b64 [param2], %rd4; ; PTX-NEXT: .param .b32 retval0; ; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _); -; PTX-NEXT: call (retval0), -; PTX-NEXT: %rd1, -; PTX-NEXT: ( -; PTX-NEXT: param0, -; PTX-NEXT: param1, -; PTX-NEXT: param2 -; PTX-NEXT: ) -; PTX-NEXT: , prototype_1; +; PTX-NEXT: call (retval0), %rd1, (param0, param1, param2), prototype_1; ; PTX-NEXT: ld.param.b32 %r2, [retval0]; ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; @@ -284,12 +272,7 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou ; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: .param .b32 retval0; ; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _); -; PTX-NEXT: call (retval0), -; PTX-NEXT: %rd1, -; PTX-NEXT: ( -; PTX-NEXT: param0 -; PTX-NEXT: ) -; PTX-NEXT: , prototype_2; +; PTX-NEXT: call (retval0), %rd1, (param0), prototype_2; ; PTX-NEXT: ld.param.b32 %r3, [retval0]; ; PTX-NEXT: } // callseq 2 ; PTX-NEXT: ret; @@ -330,12 +313,7 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: .param .b32 retval0; ; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _); -; PTX-NEXT: call (retval0), -; PTX-NEXT: %rd1, -; PTX-NEXT: ( -; PTX-NEXT: param0 -; PTX-NEXT: ) -; PTX-NEXT: , prototype_3; +; PTX-NEXT: call (retval0), %rd1, (param0), prototype_3; ; PTX-NEXT: ld.param.b32 %r4, [retval0]; ; PTX-NEXT: } // callseq 3 ; PTX-NEXT: st.param.b32 [func_retval0], %r3; @@ -561,11 +539,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) { ; PTX-NEXT: { // callseq 4, 0 ; PTX-NEXT: .param .align 4 .b8 param0[4]; ; PTX-NEXT: st.param.b32 [param0], %r1; -; PTX-NEXT: call.uni -; PTX-NEXT: device_func, -; PTX-NEXT: ( -; PTX-NEXT: param0 -; PTX-NEXT: ); +; PTX-NEXT: call.uni device_func, (param0); ; PTX-NEXT: } // callseq 4 ; PTX-NEXT: ret; call void @device_func(ptr byval(i32) align 4 %input) diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll index 246408ecf6a3a..6f334b075241b 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes IR,IRC ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes IR,IRO ; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes PTX,PTXC @@ -47,11 +47,7 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 % ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: .param .b64 retval0; -; PTX-NEXT: call.uni (retval0), -; PTX-NEXT: escape, -; PTX-NEXT: ( -; PTX-NEXT: param0 -; PTX-NEXT: ); +; PTX-NEXT: call.uni (retval0), escape, (param0); ; PTX-NEXT: ld.param.b64 %rd6, [retval0]; ; PTX-NEXT: } // callseq 0 ; PTX-NEXT: ret; @@ -89,11 +85,7 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) { ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: st.param.b64 [param0], %rd2; ; PTX-NEXT: .param .b64 retval0; -; PTX-NEXT: call.uni (retval0), -; PTX-NEXT: escape, -; PTX-NEXT: ( -; PTX-NEXT: param0 -; PTX-NEXT: ); +; PTX-NEXT: call.uni (retval0), escape, (param0); ; PTX-NEXT: ld.param.b64 %rd3, [retval0]; ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll index 54495cf0d61f3..d268562914755 100644 --- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll @@ -153,11 +153,7 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out ; PTX-NEXT: { // callseq 0, 0 ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: st.param.b64 [param0], %rd1; -; PTX-NEXT: call.uni -; PTX-NEXT: _Z6escapePv, -; PTX-NEXT: ( -; PTX-NEXT: param0 -; PTX-NEXT: ); +; PTX-NEXT: call.uni _Z6escapePv, (param0); ; PTX-NEXT: } // callseq 0 ; PTX-NEXT: ret; entry: @@ -198,11 +194,7 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone ; PTX-NEXT: { // callseq 1, 0 ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: st.param.b64 [param0], %rd3; -; PTX-NEXT: call.uni -; PTX-NEXT: _Z6escapePv, -; PTX-NEXT: ( -; PTX-NEXT: param0 -; PTX-NEXT: ); +; PTX-NEXT: call.uni _Z6escapePv, (param0); ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; entry: @@ -902,11 +894,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) { ; PTX-NEXT: { // callseq 2, 0 ; PTX-NEXT: .param .align 4 .b8 param0[4]; ; PTX-NEXT: st.param.b32 [param0], %r1; -; PTX-NEXT: call.uni -; PTX-NEXT: device_func, -; PTX-NEXT: ( -; PTX-NEXT: param0 -; PTX-NEXT: ); +; PTX-NEXT: call.uni device_func, (param0); ; PTX-NEXT: } // callseq 2 ; PTX-NEXT: ret; call void @device_func(ptr byval(i32) align 4 %input) @@ -929,11 +917,7 @@ define void @device_func(ptr byval(i32) align 4 %input) { ; PTX-NEXT: { // callseq 3, 0 ; PTX-NEXT: .param .align 4 .b8 param0[4]; ; PTX-NEXT: st.param.b32 [param0], %r1; -; PTX-NEXT: call.uni -; PTX-NEXT: device_func, -; PTX-NEXT: ( -; PTX-NEXT: param0 -; PTX-NEXT: ); +; PTX-NEXT: call.uni device_func, (param0); ; PTX-NEXT: } // callseq 3 ; PTX-NEXT: ret; call void @device_func(ptr byval(i32) align 4 %input) diff --git a/llvm/test/CodeGen/NVPTX/misched_func_call.ll b/llvm/test/CodeGen/NVPTX/misched_func_call.ll index 7e907990147a5..2e9eb6913ac0e 100644 --- a/llvm/test/CodeGen/NVPTX/misched_func_call.ll +++ b/llvm/test/CodeGen/NVPTX/misched_func_call.ll @@ -21,11 +21,7 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) { ; CHECK-NEXT: .param .b64 param0; ; CHECK-NEXT: st.param.b64 [param0], 0d0000000000000000; ; CHECK-NEXT: .param .b64 retval0; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: quux, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), quux, (param0); ; CHECK-NEXT: ld.param.b64 %rd1, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: mul.lo.s32 %r7, %r10, %r3; diff --git a/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll index a1f0577c2218b..448960181ae42 100644 --- a/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll +++ b/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll @@ -11,10 +11,7 @@ define dso_local void @naked() naked "frame-pointer"="all" { ; CHECK-32-EMPTY: ; CHECK-32-NEXT: // %bb.0: ; CHECK-32-NEXT: { // callseq 0, 0 -; CHECK-32-NEXT: call.uni -; CHECK-32-NEXT: main, -; CHECK-32-NEXT: ( -; CHECK-32-NEXT: ); +; CHECK-32-NEXT: call.uni main, (); ; CHECK-32-NEXT: } // callseq 0 ; CHECK-32-NEXT: // begin inline asm ; CHECK-32-NEXT: exit; @@ -26,10 +23,7 @@ define dso_local void @naked() naked "frame-pointer"="all" { ; CHECK-64-EMPTY: ; CHECK-64-NEXT: // %bb.0: ; CHECK-64-NEXT: { // callseq 0, 0 -; CHECK-64-NEXT: call.uni -; CHECK-64-NEXT: main, -; CHECK-64-NEXT: ( -; CHECK-64-NEXT: ); +; CHECK-64-NEXT: call.uni main, (); ; CHECK-64-NEXT: } // callseq 0 ; CHECK-64-NEXT: // begin inline asm ; CHECK-64-NEXT: exit; @@ -45,10 +39,7 @@ define dso_local void @normal() "frame-pointer"="all" { ; CHECK-32-EMPTY: ; CHECK-32-NEXT: // %bb.0: ; CHECK-32-NEXT: { // callseq 1, 0 -; CHECK-32-NEXT: call.uni -; CHECK-32-NEXT: main, -; CHECK-32-NEXT: ( -; CHECK-32-NEXT: ); +; CHECK-32-NEXT: call.uni main, (); ; CHECK-32-NEXT: } // callseq 1 ; CHECK-32-NEXT: // begin inline asm ; CHECK-32-NEXT: exit; @@ -60,10 +51,7 @@ define dso_local void @normal() "frame-pointer"="all" { ; CHECK-64-EMPTY: ; CHECK-64-NEXT: // %bb.0: ; CHECK-64-NEXT: { // callseq 1, 0 -; CHECK-64-NEXT: call.uni -; CHECK-64-NEXT: main, -; CHECK-64-NEXT: ( -; CHECK-64-NEXT: ); +; CHECK-64-NEXT: call.uni main, (); ; CHECK-64-NEXT: } // callseq 1 ; CHECK-64-NEXT: // begin inline asm ; CHECK-64-NEXT: exit; diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll index 4fc8786c1e2fe..cd2664e913824 100644 --- a/llvm/test/CodeGen/NVPTX/param-add.ll +++ b/llvm/test/CodeGen/NVPTX/param-add.ll @@ -37,11 +37,7 @@ define i32 @test(%struct.1float alignstack(32) %data) { ; CHECK-NEXT: st.param.b8 [param0+2], %r12; ; CHECK-NEXT: st.param.b8 [param0+3], %r13; ; CHECK-NEXT: .param .b32 retval0; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), callee, (param0); ; CHECK-NEXT: ld.param.b32 %r14, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.param.b32 [func_retval0], %r14; diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index 4bea710e6dd93..263477df1dbfe 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -32,8 +32,7 @@ ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], [[C]] ; CHECK: .param .b32 retval0; -; CHECK: call.uni -; CHECK-NEXT: test_i1, +; CHECK: call.uni (retval0), test_i1, ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0]; ; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -76,8 +75,7 @@ define signext i1 @test_i1s(i1 signext %a) { ; CHECK-DAG: st.param.b8 [param0], [[E0]]; ; CHECK-DAG: st.param.b8 [param0+2], [[E2]]; ; CHECK: .param .align 1 .b8 retval0[1]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v3i1, +; CHECK: call.uni (retval0), test_v3i1, ; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; ; CHECK-DAG: st.param.b8 [func_retval0], [[RE0]] @@ -95,8 +93,7 @@ define <3 x i1> @test_v3i1(<3 x i1> %a) { ; CHECK: .param .align 1 .b8 param0[1]; ; CHECK: st.param.b8 [param0], [[E0]]; ; CHECK: .param .align 1 .b8 retval0[1]; -; CHECK: call.uni (retval0), -; CHECK: test_v4i1, +; CHECK: call.uni (retval0), test_v4i1, ; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1]; ; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; @@ -120,8 +117,7 @@ define <4 x i1> @test_v4i1(<4 x i1> %a) { ; CHECK-DAG: st.param.b8 [param0], [[E0]]; ; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; ; CHECK: .param .align 1 .b8 retval0[1]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v5i1, +; CHECK: call.uni (retval0), test_v5i1, ; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; ; CHECK-DAG: st.param.b8 [func_retval0], [[RE0]] @@ -139,8 +135,7 @@ define <5 x i1> @test_v5i1(<5 x i1> %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK: test_i2, +; CHECK: call.uni (retval0), test_i2, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; @@ -156,8 +151,7 @@ define i2 @test_i2(i2 %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK: test_i3, +; CHECK: call.uni (retval0), test_i3, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; @@ -176,8 +170,7 @@ define i3 @test_i3(i3 %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], [[A]]; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK: test_i8, +; CHECK: call.uni (retval0), test_i8, ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0]; ; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -196,8 +189,7 @@ define i8 @test_i8(i8 %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], [[A]]; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK: test_i8s, +; CHECK: call.uni (retval0), test_i8s, ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0]; ; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ? ; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]]; @@ -216,8 +208,7 @@ define signext i8 @test_i8s(i8 signext %a) { ; CHECK: .param .align 4 .b8 param0[4]; ; CHECK: st.param.b32 [param0], [[R]] ; CHECK: .param .align 4 .b8 retval0[4]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v3i8, +; CHECK: call.uni (retval0), test_v3i8, ; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0]; ; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very ; interesting here, so it's skipped. @@ -235,8 +226,7 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) { ; CHECK: .param .align 4 .b8 param0[4]; ; CHECK: st.param.b32 [param0], [[R]]; ; CHECK: .param .align 4 .b8 retval0[4]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v4i8, +; CHECK: call.uni (retval0), test_v4i8, ; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[RET]]; ; CHECK-NEXT: ret; @@ -254,8 +244,7 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) { ; CHECK-DAG: st.param.v4.b8 [param0], ; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; ; CHECK: .param .align 8 .b8 retval0[8]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v5i8, +; CHECK: call.uni (retval0), test_v5i8, ; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; ; CHECK-DAG: st.param.v4.b8 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} @@ -272,8 +261,7 @@ define <5 x i8> @test_v5i8(<5 x i8> %a) { ; CHECK: ld.param.b16 {{%rs[0-9]+}}, [test_i11_param_0]; ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i11, +; CHECK: call.uni (retval0), test_i11, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; @@ -290,8 +278,7 @@ define i11 @test_i11(i11 %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i16, +; CHECK: call.uni (retval0), test_i16, ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0]; ; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -309,8 +296,7 @@ define i16 @test_i16(i16 %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i16s, +; CHECK: call.uni (retval0), test_i16s, ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0]; ; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]]; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -329,8 +315,7 @@ define signext i16 @test_i16s(i16 signext %a) { ; CHECK: st.param.v2.b16 [param0], {[[E0]], [[E1]]}; ; CHECK: st.param.b16 [param0+4], [[E2]]; ; CHECK: .param .align 8 .b8 retval0[8]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v3i16, +; CHECK: call.uni (retval0), test_v3i16, ; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0]; ; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4]; ; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]}; @@ -348,8 +333,7 @@ define <3 x i16> @test_v3i16(<3 x i16> %a) { ; CHECK: .param .align 8 .b8 param0[8]; ; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK: .param .align 8 .b8 retval0[8]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v4i16, +; CHECK: call.uni (retval0), test_v4i16, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]} ; CHECK-NEXT: ret; @@ -367,8 +351,7 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) { ; CHECK-DAG: st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: .param .align 16 .b8 retval0[16]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v5i16, +; CHECK: call.uni (retval0), test_v5i16, ; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8]; ; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} @@ -386,8 +369,7 @@ define <5 x i16> @test_v5i16(<5 x i16> %a) { ; CHECK: .param .align 2 .b8 param0[2]; ; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: .param .align 2 .b8 retval0[2]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_f16, +; CHECK: call.uni (retval0), test_f16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]] ; CHECK-NEXT: ret; @@ -403,8 +385,7 @@ define half @test_f16(half %a) { ; CHECK: .param .align 4 .b8 param0[4]; ; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v2f16, +; CHECK: call.uni (retval0), test_v2f16, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]] ; CHECK-NEXT: ret; @@ -420,8 +401,7 @@ define <2 x half> @test_v2f16(<2 x half> %a) { ; CHECK: .param .align 2 .b8 param0[2]; ; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: .param .align 2 .b8 retval0[2]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_bf16, +; CHECK: call.uni (retval0), test_bf16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]] ; CHECK-NEXT: ret; @@ -437,8 +417,7 @@ define bfloat @test_bf16(bfloat %a) { ; CHECK: .param .align 4 .b8 param0[4]; ; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v2bf16, +; CHECK: call.uni (retval0), test_v2bf16, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]] ; CHECK-NEXT: ret; @@ -457,8 +436,7 @@ define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) { ; CHECK-DAG: st.param.v2.b16 [param0], {[[E0]], [[E1]]}; ; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; ; CHECK: .param .align 8 .b8 retval0[8]; -; CHECK: call.uni (retval0), -; CHECK: test_v3f16, +; CHECK: call.uni (retval0), test_v3f16, ; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4]; ; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]}; @@ -476,8 +454,7 @@ define <3 x half> @test_v3f16(<3 x half> %a) { ; CHECK: .param .align 8 .b8 param0[8]; ; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]}; ; CHECK: .param .align 8 .b8 retval0[8]; -; CHECK: call.uni (retval0), -; CHECK: test_v4f16, +; CHECK: call.uni (retval0), test_v4f16, ; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]}; ; CHECK: ret; @@ -495,8 +472,7 @@ define <4 x half> @test_v4f16(<4 x half> %a) { ; CHECK-DAG: st.param.v4.b16 [param0], ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: .param .align 16 .b8 retval0[16]; -; CHECK: call.uni (retval0), -; CHECK: test_v5f16, +; CHECK: call.uni (retval0), test_v5f16, ; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8]; ; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]}; @@ -514,8 +490,7 @@ define <5 x half> @test_v5f16(<5 x half> %a) { ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]}; ; CHECK: .param .align 16 .b8 retval0[16]; -; CHECK: call.uni (retval0), -; CHECK: test_v8f16, +; CHECK: call.uni (retval0), test_v8f16, ; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]}; ; CHECK: ret; @@ -535,8 +510,7 @@ define <8 x half> @test_v8f16(<8 x half> %a) { ; CHECK-DAG: st.param.v4.b16 [param0+8], ; CHECK-DAG: st.param.b16 [param0+16], [[E8]]; ; CHECK: .param .align 32 .b8 retval0[32]; -; CHECK: call.uni (retval0), -; CHECK: test_v9f16, +; CHECK: call.uni (retval0), test_v9f16, ; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8]; ; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16]; @@ -557,8 +531,7 @@ define <9 x half> @test_v9f16(<9 x half> %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i19, +; CHECK: call.uni (retval0), test_i19, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; @@ -575,8 +548,7 @@ define i19 @test_i19(i19 %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i23, +; CHECK: call.uni (retval0), test_i23, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; @@ -593,8 +565,7 @@ define i23 @test_i23(i23 %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i24, +; CHECK: call.uni (retval0), test_i24, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; @@ -610,8 +581,7 @@ define i24 @test_i24(i24 %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i29, +; CHECK: call.uni (retval0), test_i29, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; ; CHECK-NEXT: ret; @@ -627,8 +597,7 @@ define i29 @test_i29(i29 %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i32, +; CHECK: call.uni (retval0), test_i32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; @@ -646,8 +615,7 @@ define i32 @test_i32(i32 %a) { ; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK: st.param.b32 [param0+8], [[E2]]; ; CHECK: .param .align 16 .b8 retval0[16]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v3i32, +; CHECK: call.uni (retval0), test_v3i32, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; ; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}; @@ -665,8 +633,7 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) { ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK: .param .align 16 .b8 retval0[16]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v4i32, +; CHECK: call.uni (retval0), test_v4i32, ; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} ; CHECK-NEXT: ret; @@ -684,8 +651,7 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) { ; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK-DAG: st.param.b32 [param0+16], [[E4]]; ; CHECK: .param .align 32 .b8 retval0[32]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v5i32, +; CHECK: call.uni (retval0), test_v5i32, ; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; ; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} @@ -703,8 +669,7 @@ define <5 x i32> @test_v5i32(<5 x i32> %a) { ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .b32 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_f32, +; CHECK: call.uni (retval0), test_f32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; @@ -721,8 +686,7 @@ define float @test_f32(float %a) { ; CHECK: .param .b64 param0; ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i40, +; CHECK: call.uni (retval0), test_i40, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; @@ -739,8 +703,7 @@ define i40 @test_i40(i40 %a) { ; CHECK: .param .b64 param0; ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i47, +; CHECK: call.uni (retval0), test_i47, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; @@ -757,8 +720,7 @@ define i47 @test_i47(i47 %a) { ; CHECK: .param .b64 param0; ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i48, +; CHECK: call.uni (retval0), test_i48, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; @@ -776,8 +738,7 @@ define i48 @test_i48(i48 %a) { ; CHECK: .param .b64 param0; ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i51, +; CHECK: call.uni (retval0), test_i51, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; @@ -795,8 +756,7 @@ define i51 @test_i51(i51 %a) { ; CHECK: .param .b64 param0; ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i56, +; CHECK: call.uni (retval0), test_i56, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; @@ -812,8 +772,7 @@ define i56 @test_i56(i56 %a) { ; CHECK: .param .b64 param0; ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i57, +; CHECK: call.uni (retval0), test_i57, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; ; CHECK-NEXT: ret; @@ -829,8 +788,7 @@ define i57 @test_i57(i57 %a) { ; CHECK: .param .b64 param0; ; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: .param .b64 retval0; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_i64, +; CHECK: call.uni (retval0), test_i64, ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0]; ; CHECK: st.param.b64 [func_retval0], [[R]]; ; CHECK-NEXT: ret; @@ -848,8 +806,7 @@ define i64 @test_i64(i64 %a) { ; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; ; CHECK: st.param.b64 [param0+16], [[E2]]; ; CHECK: .param .align 32 .b8 retval0[32]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v3i64, +; CHECK: call.uni (retval0), test_v3i64, ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0]; ; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16]; ; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RE0]], [[RE1]]}; @@ -872,8 +829,7 @@ define <3 x i64> @test_v3i64(<3 x i64> %a) { ; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; ; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]}; ; CHECK: .param .align 32 .b8 retval0[32]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_v4i64, +; CHECK: call.uni (retval0), test_v4i64, ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0]; ; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16]; ; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]}; @@ -893,8 +849,7 @@ define <4 x i64> @test_v4i64(<4 x i64> %a) { ; CHECK: .param .align 1 .b8 param0[1]; ; CHECK: st.param.b8 [param0], [[A]] ; CHECK: .param .align 1 .b8 retval0[1]; -; CHECK: call.uni -; CHECK-NEXT: test_s_i1, +; CHECK: call.uni (retval0), test_s_i1, ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b8 [func_retval0], [[R]]; ; CHECK-NEXT: ret; @@ -910,8 +865,7 @@ define %s_i1 @test_s_i1(%s_i1 %a) { ; CHECK: .param .align 1 .b8 param0[1]; ; CHECK: st.param.b8 [param0], [[A]] ; CHECK: .param .align 1 .b8 retval0[1]; -; CHECK: call.uni -; CHECK-NEXT: test_s_i8, +; CHECK: call.uni (retval0), test_s_i8, ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b8 [func_retval0], [[R]]; ; CHECK-NEXT: ret; @@ -927,8 +881,7 @@ define %s_i8 @test_s_i8(%s_i8 %a) { ; CHECK: .param .align 2 .b8 param0[2]; ; CHECK: st.param.b16 [param0], [[A]] ; CHECK: .param .align 2 .b8 retval0[2]; -; CHECK: call.uni -; CHECK-NEXT: test_s_i16, +; CHECK: call.uni (retval0), test_s_i16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; @@ -944,8 +897,7 @@ define %s_i16 @test_s_i16(%s_i16 %a) { ; CHECK: .param .align 2 .b8 param0[2]; ; CHECK: st.param.b16 [param0], [[A]] ; CHECK: .param .align 2 .b8 retval0[2]; -; CHECK: call.uni -; CHECK-NEXT: test_s_f16, +; CHECK: call.uni (retval0), test_s_f16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]]; ; CHECK-NEXT: ret; @@ -961,8 +913,7 @@ define %s_f16 @test_s_f16(%s_f16 %a) { ; CHECK: .param .align 4 .b8 param0[4] ; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_s_i32, +; CHECK: call.uni (retval0), test_s_i32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; @@ -978,8 +929,7 @@ define %s_i32 @test_s_i32(%s_i32 %a) { ; CHECK: .param .align 4 .b8 param0[4] ; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_s_f32, +; CHECK: call.uni (retval0), test_s_f32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; @@ -995,8 +945,7 @@ define %s_f32 @test_s_f32(%s_f32 %a) { ; CHECK: .param .align 8 .b8 param0[8]; ; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: .param .align 8 .b8 retval0[8]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_s_i64, +; CHECK: call.uni (retval0), test_s_i64, ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0]; ; CHECK: st.param.b64 [func_retval0], [[R]]; ; CHECK-NEXT: ret; @@ -1021,8 +970,7 @@ define %s_i64 @test_s_i64(%s_i64 %a) { ; CHECK-DAG: st.param.b32 [param0+12], [[E3]]; ; CHECK-DAG: st.param.b64 [param0+16], [[E4]]; ; CHECK: .param .align 8 .b8 retval0[24]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_s_i32f32, +; CHECK: call.uni (retval0), test_s_i32f32, ; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b32 [[RE1:%r[0-9]+]], [retval0+4]; ; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; @@ -1051,8 +999,7 @@ define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) { ; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; ; CHECK: st.param.b64 [param0+16], [[E4]]; ; CHECK: .param .align 8 .b8 retval0[24]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_s_i32x4, +; CHECK: call.uni (retval0), test_s_i32x4, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8]; ; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16]; @@ -1081,8 +1028,7 @@ define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) { ; CHECK: st.param.b32 [param0+16], [[E4]]; ; CHECK: st.param.b64 [param0+24], [[E5]]; ; CHECK: .param .align 8 .b8 retval0[32]; -; CHECK: call.uni (retval0), -; CHECK: test_s_i1i32x4, +; CHECK: call.uni (retval0), test_s_i1i32x4, ; CHECK: ( ; CHECK: param0 ; CHECK: ); @@ -1160,8 +1106,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { ; CHECK-DAG: st.param.b8 [param0+23], ; CHECK-DAG: st.param.b8 [param0+24], ; CHECK: .param .align 1 .b8 retval0[25]; -; CHECK: call.uni (retval0), -; CHECK-NEXT: test_s_i1i32x4p, +; CHECK: call.uni (retval0), test_s_i1i32x4p, ; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0]; ; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+1]; ; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+2]; @@ -1237,8 +1182,7 @@ define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) { ; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]}; ; CHECK: st.param.b32 [param0+64], [[E15]]; ; CHECK: .param .align 16 .b8 retval0[80]; -; CHECK: call.uni (retval0), -; CHECK: test_s_crossfield, +; CHECK: call.uni (retval0), test_s_crossfield, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; ; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16]; diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll index 22a648c7a9786..f490c5f73d425 100644 --- a/llvm/test/CodeGen/NVPTX/param-overalign.ll +++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx | FileCheck %s ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %} @@ -18,27 +19,23 @@ target triple = "nvptx64-nvidia-cuda" ; CHECK-NEXT: ; define float @caller_md(float %a, float %b) { -; CHECK-LABEL: .visible .func (.param .b32 func_retval0) caller_md( -; CHECK-NEXT: .param .b32 caller_md_param_0, -; CHECK-NEXT: .param .b32 caller_md_param_1 -; CHECK-NEXT: ) -; CHECK-NEXT: { - -; CHECK: ld.param.b32 %r1, [caller_md_param_0]; +; CHECK-LABEL: caller_md( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [caller_md_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [caller_md_param_1]; -; CHECK-NEXT: { +; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: .param .b32 retval0; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: callee_md, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), callee_md, (param0); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; -; CHECK-NEXT: } +; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; + %s1 = insertvalue %struct.float2 poison, float %a, 0 %s2 = insertvalue %struct.float2 %s1, float %b, 1 %r = call float @callee_md(%struct.float2 %s2) @@ -46,15 +43,16 @@ define float @caller_md(float %a, float %b) { } define float @callee_md(%struct.float2 alignstack(8) %a) { -; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee_md( -; CHECK-NEXT: .param .align 8 .b8 callee_md_param_0[8] -; CHECK-NEXT: ) -; CHECK-NEXT: { - -; CHECK: ld.param.v2.b32 {%r1, %r2}, [callee_md_param_0]; +; CHECK-LABEL: callee_md( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [callee_md_param_0]; ; CHECK-NEXT: add.rn.f32 %r3, %r1, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; + %v0 = extractvalue %struct.float2 %a, 0 %v1 = extractvalue %struct.float2 %a, 1 %2 = fadd float %v0, %v1 @@ -62,27 +60,23 @@ define float @callee_md(%struct.float2 alignstack(8) %a) { } define float @caller(float %a, float %b) { -; CHECK-LABEL: .visible .func (.param .b32 func_retval0) caller( -; CHECK-NEXT: .param .b32 caller_param_0, -; CHECK-NEXT: .param .b32 caller_param_1 -; CHECK-NEXT: ) -; CHECK-NEXT: { - -; CHECK: ld.param.b32 %r1, [caller_param_0]; +; CHECK-LABEL: caller( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [caller_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [caller_param_1]; -; CHECK-NEXT: { +; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: .param .b32 retval0; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: callee, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), callee, (param0); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; -; CHECK-NEXT: } +; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; + %s1 = insertvalue %struct.float2 poison, float %a, 0 %s2 = insertvalue %struct.float2 %s1, float %b, 1 %r = call float @callee(%struct.float2 %s2) @@ -90,15 +84,16 @@ define float @caller(float %a, float %b) { } define float @callee(%struct.float2 alignstack(8) %a ) { -; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee( -; CHECK-NEXT: .param .align 8 .b8 callee_param_0[8] -; CHECK-NEXT: ) -; CHECK-NEXT: { - -; CHECK: ld.param.v2.b32 {%r1, %r2}, [callee_param_0]; +; CHECK-LABEL: callee( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [callee_param_0]; ; CHECK-NEXT: add.rn.f32 %r3, %r1, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; + %v0 = extractvalue %struct.float2 %a, 0 %v1 = extractvalue %struct.float2 %a, 1 %2 = fadd float %v0, %v1 @@ -106,9 +101,15 @@ define float @callee(%struct.float2 alignstack(8) %a ) { } define alignstack(8) %struct.float2 @aligned_return(%struct.float2 %a ) { -; CHECK-LABEL: .visible .func (.param .align 8 .b8 func_retval0[8]) aligned_return( -; CHECK-NEXT: .param .align 4 .b8 aligned_return_param_0[8] -; CHECK-NEXT: ) -; CHECK-NEXT: { +; CHECK-LABEL: aligned_return( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [aligned_return_param_0+4]; +; CHECK-NEXT: ld.param.b32 %r2, [aligned_return_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0+4], %r1; +; CHECK-NEXT: ret; ret %struct.float2 %a } diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll index abb1aff867754..892e49a5fe82a 100644 --- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll +++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll @@ -86,11 +86,7 @@ define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[4]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St4x1, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St4x1, (param0); ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; %1 = load i32, ptr %in, align 4 %call = tail call fastcc [1 x i32] @callee_St4x1(i32 %1) #2 @@ -118,11 +114,7 @@ define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .align 16 .b8 param0[8]; ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[8]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St4x2, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St4x2, (param0); ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; %agg.tmp = alloca %struct.St4x2, align 8 %1 = load i64, ptr %in, align 4 @@ -160,11 +152,7 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[12]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St4x3, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St4x3, (param0); ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8]; %call = tail call fastcc [3 x i32] @callee_St4x3(ptr noundef nonnull byval(%struct.St4x3) align 4 %in) #2 @@ -207,11 +195,7 @@ define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[16]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St4x4, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St4x4, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; %call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2 %.fca.0.extract = extractvalue [4 x i32] %call, 0 @@ -258,11 +242,7 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[20]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St4x5, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St4x5, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16]; %call = tail call fastcc [5 x i32] @callee_St4x5(ptr noundef nonnull byval(%struct.St4x5) align 4 %in) #2 @@ -318,11 +298,7 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[24]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St4x6, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St4x6, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; %call = tail call fastcc [6 x i32] @callee_St4x6(ptr noundef nonnull byval(%struct.St4x6) align 4 %in) #2 @@ -385,11 +361,7 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[28]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St4x7, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St4x7, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+24]; @@ -460,11 +432,7 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[32]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St4x8, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St4x8, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; %call = tail call fastcc [8 x i32] @callee_St4x8(ptr noundef nonnull byval(%struct.St4x8) align 4 %in) #2 @@ -537,11 +505,7 @@ define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 param0; ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[8]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St8x1, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St8x1, (param0); ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; %1 = load i64, ptr %in, align 8 %call = tail call fastcc [1 x i64] @callee_St8x1(i64 %1) #2 @@ -569,11 +533,7 @@ define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[16]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St8x2, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St8x2, (param0); ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; %call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2 %.fca.0.extract = extractvalue [2 x i64] %call, 0 @@ -608,11 +568,7 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[24]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St8x3, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St8x3, (param0); ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16]; %call = tail call fastcc [3 x i64] @callee_St8x3(ptr noundef nonnull byval(%struct.St8x3) align 8 %in) #2 @@ -656,11 +612,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[32]; - ; CHECK: call.uni (retval0), - ; CHECK-NEXT: callee_St8x4, - ; CHECK-NEXT: ( - ; CHECK-NEXT: param0 - ; CHECK-NEXT: ); + ; CHECK: call.uni (retval0), callee_St8x4, (param0); ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16]; %call = tail call fastcc [4 x i64] @callee_St8x4(ptr noundef nonnull byval(%struct.St8x4) align 8 %in) #2 diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll index b165b4cb4b262..f0813609268e9 100644 --- a/llvm/test/CodeGen/NVPTX/shift-opt.ll +++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll @@ -131,11 +131,7 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) { ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .b64 param0; ; CHECK-NEXT: st.param.b64 [param0], %rd3; -; CHECK-NEXT: call.uni -; CHECK-NEXT: use, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni use, (param0); ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; @@ -164,11 +160,7 @@ define i64 @test_negative_use_shl(i64 %x, i32 %y) { ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .b64 param0; ; CHECK-NEXT: st.param.b64 [param0], %rd2; -; CHECK-NEXT: call.uni -; CHECK-NEXT: use, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni use, (param0); ; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll index bdab9958fe2b2..50d3e8049a947 100644 --- a/llvm/test/CodeGen/NVPTX/st-param-imm.ll +++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll @@ -28,11 +28,7 @@ define void @st_param_i8_i16() { ; CHECK-NEXT: .param .align 2 .b8 param0[4]; ; CHECK-NEXT: st.param.b8 [param0], 1; ; CHECK-NEXT: st.param.b16 [param0+2], 2; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_i8_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_i8_i16, (param0); ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: ret; call void @call_i8_i16(%struct.A { i8 1, i16 2 }) @@ -48,11 +44,7 @@ define void @st_param_i32() { ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .b32 param0; ; CHECK-NEXT: st.param.b32 [param0], 3; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_i32, (param0); ; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: ret; call void @call_i32(i32 3) @@ -68,11 +60,7 @@ define void @st_param_i64() { ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .b64 param0; ; CHECK-NEXT: st.param.b64 [param0], 4; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_i64, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_i64, (param0); ; CHECK-NEXT: } // callseq 2 ; CHECK-NEXT: ret; call void @call_i64(i64 4) @@ -88,11 +76,7 @@ define void @st_param_f32() { ; CHECK-NEXT: { // callseq 3, 0 ; CHECK-NEXT: .param .b32 param0; ; CHECK-NEXT: st.param.b32 [param0], 0f40A00000; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_f32, (param0); ; CHECK-NEXT: } // callseq 3 ; CHECK-NEXT: ret; call void @call_f32(float 5.0) @@ -108,11 +92,7 @@ define void @st_param_f64() { ; CHECK-NEXT: { // callseq 4, 0 ; CHECK-NEXT: .param .b64 param0; ; CHECK-NEXT: st.param.b64 [param0], 0d4018000000000000; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_f64, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_f64, (param0); ; CHECK-NEXT: } // callseq 4 ; CHECK-NEXT: ret; call void @call_f64(double 6.0) @@ -134,11 +114,7 @@ define void @st_param_v2_i8_ii() { ; CHECK-NEXT: { // callseq 5, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[2]; ; CHECK-NEXT: st.param.v2.b8 [param0], {1, 2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i8, (param0); ; CHECK-NEXT: } // callseq 5 ; CHECK-NEXT: ret; call void @call_v2_i8(%struct.char2 { i8 1, i8 2 }) @@ -154,11 +130,7 @@ define void @st_param_v2_i8_ir(i8 %val) { ; CHECK-NEXT: { // callseq 6, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[2]; ; CHECK-NEXT: st.param.v2.b8 [param0], {1, %rs1}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i8, (param0); ; CHECK-NEXT: } // callseq 6 ; CHECK-NEXT: ret; %struct.ir0 = insertvalue %struct.char2 poison, i8 1, 0 @@ -176,11 +148,7 @@ define void @st_param_v2_i8_ri(i8 %val) { ; CHECK-NEXT: { // callseq 7, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[2]; ; CHECK-NEXT: st.param.v2.b8 [param0], {%rs1, 2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i8, (param0); ; CHECK-NEXT: } // callseq 7 ; CHECK-NEXT: ret; %struct.ri0 = insertvalue %struct.char2 poison, i8 %val, 0 @@ -198,11 +166,7 @@ define void @st_param_v2_i16_ii() { ; CHECK-NEXT: { // callseq 8, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v2.b16 [param0], {1, 2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i16, (param0); ; CHECK-NEXT: } // callseq 8 ; CHECK-NEXT: ret; call void @call_v2_i16(%struct.short2 { i16 1, i16 2 }) @@ -218,11 +182,7 @@ define void @st_param_v2_i16_ir(i16 %val) { ; CHECK-NEXT: { // callseq 9, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v2.b16 [param0], {1, %rs1}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i16, (param0); ; CHECK-NEXT: } // callseq 9 ; CHECK-NEXT: ret; %struct.ir0 = insertvalue %struct.short2 poison, i16 1, 0 @@ -240,11 +200,7 @@ define void @st_param_v2_i16_ri(i16 %val) { ; CHECK-NEXT: { // callseq 10, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v2.b16 [param0], {%rs1, 2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i16, (param0); ; CHECK-NEXT: } // callseq 10 ; CHECK-NEXT: ret; %struct.ri0 = insertvalue %struct.short2 poison, i16 %val, 0 @@ -262,11 +218,7 @@ define void @st_param_v2_i32_ii() { ; CHECK-NEXT: { // callseq 11, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v2.b32 [param0], {1, 2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i32, (param0); ; CHECK-NEXT: } // callseq 11 ; CHECK-NEXT: ret; call void @call_v2_i32(%struct.int2 { i32 1, i32 2 }) @@ -282,11 +234,7 @@ define void @st_param_v2_i32_ir(i32 %val) { ; CHECK-NEXT: { // callseq 12, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v2.b32 [param0], {1, %r1}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i32, (param0); ; CHECK-NEXT: } // callseq 12 ; CHECK-NEXT: ret; %struct.ir0 = insertvalue %struct.int2 poison, i32 1, 0 @@ -304,11 +252,7 @@ define void @st_param_v2_i32_ri(i32 %val) { ; CHECK-NEXT: { // callseq 13, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i32, (param0); ; CHECK-NEXT: } // callseq 13 ; CHECK-NEXT: ret; %struct.ri0 = insertvalue %struct.int2 poison, i32 %val, 0 @@ -326,11 +270,7 @@ define void @st_param_v2_i64_ii() { ; CHECK-NEXT: { // callseq 14, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v2.b64 [param0], {1, 2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i64, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i64, (param0); ; CHECK-NEXT: } // callseq 14 ; CHECK-NEXT: ret; call void @call_v2_i64(%struct.longlong2 { i64 1, i64 2 }) @@ -346,11 +286,7 @@ define void @st_param_v2_i64_ir(i64 %val) { ; CHECK-NEXT: { // callseq 15, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v2.b64 [param0], {1, %rd1}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i64, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i64, (param0); ; CHECK-NEXT: } // callseq 15 ; CHECK-NEXT: ret; %struct.ir0 = insertvalue %struct.longlong2 poison, i64 1, 0 @@ -368,11 +304,7 @@ define void @st_param_v2_i64_ri(i64 %val) { ; CHECK-NEXT: { // callseq 16, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v2.b64 [param0], {%rd1, 2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_i64, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_i64, (param0); ; CHECK-NEXT: } // callseq 16 ; CHECK-NEXT: ret; %struct.ri0 = insertvalue %struct.longlong2 poison, i64 %val, 0 @@ -390,11 +322,7 @@ define void @st_param_v2_f32_ii(float %val) { ; CHECK-NEXT: { // callseq 17, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v2.b32 [param0], {0f3F800000, 0f40000000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_f32, (param0); ; CHECK-NEXT: } // callseq 17 ; CHECK-NEXT: ret; call void @call_v2_f32(%struct.float2 { float 1.0, float 2.0 }) @@ -410,11 +338,7 @@ define void @st_param_v2_f32_ir(float %val) { ; CHECK-NEXT: { // callseq 18, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v2.b32 [param0], {0f3F800000, %r1}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_f32, (param0); ; CHECK-NEXT: } // callseq 18 ; CHECK-NEXT: ret; %struct.ir0 = insertvalue %struct.float2 poison, float 1.0, 0 @@ -432,11 +356,7 @@ define void @st_param_v2_f32_ri(float %val) { ; CHECK-NEXT: { // callseq 19, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 0f40000000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_f32, (param0); ; CHECK-NEXT: } // callseq 19 ; CHECK-NEXT: ret; %struct.ri0 = insertvalue %struct.float2 poison, float %val, 0 @@ -454,11 +374,7 @@ define void @st_param_v2_f64_ii(double %val) { ; CHECK-NEXT: { // callseq 20, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v2.b64 [param0], {0d3FF0000000000000, 0d4000000000000000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_f64, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_f64, (param0); ; CHECK-NEXT: } // callseq 20 ; CHECK-NEXT: ret; call void @call_v2_f64(%struct.double2 { double 1.0, double 2.0 }) @@ -474,11 +390,7 @@ define void @st_param_v2_f64_ir(double %val) { ; CHECK-NEXT: { // callseq 21, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v2.b64 [param0], {0d3FF0000000000000, %rd1}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_f64, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_f64, (param0); ; CHECK-NEXT: } // callseq 21 ; CHECK-NEXT: ret; %struct.ir0 = insertvalue %struct.double2 poison, double 1.0, 0 @@ -496,11 +408,7 @@ define void @st_param_v2_f64_ri(double %val) { ; CHECK-NEXT: { // callseq 22, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v2.b64 [param0], {%rd1, 0d4000000000000000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v2_f64, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v2_f64, (param0); ; CHECK-NEXT: } // callseq 22 ; CHECK-NEXT: ret; %struct.ri0 = insertvalue %struct.double2 poison, double %val, 0 @@ -525,11 +433,7 @@ define void @st_param_v4_i8_iiii() { ; CHECK-NEXT: { // callseq 23, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 23 ; CHECK-NEXT: ret; call void @call_v4_i8(%struct.char4 { i8 1, i8 2, i8 3, i8 4 }) @@ -547,11 +451,7 @@ define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) { ; CHECK-NEXT: { // callseq 24, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, %rs2, %rs3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 24 ; CHECK-NEXT: ret; %struct.irrr0 = insertvalue %struct.char4 poison, i8 1, 0 @@ -573,11 +473,7 @@ define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) { ; CHECK-NEXT: { // callseq 25, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, %rs2, %rs3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 25 ; CHECK-NEXT: ret; %struct.rirr0 = insertvalue %struct.char4 poison, i8 %a, 0 @@ -599,11 +495,7 @@ define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) { ; CHECK-NEXT: { // callseq 26, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, 3, %rs3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 26 ; CHECK-NEXT: ret; %struct.rrir0 = insertvalue %struct.char4 poison, i8 %a, 0 @@ -625,11 +517,7 @@ define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) { ; CHECK-NEXT: { // callseq 27, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, %rs3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 27 ; CHECK-NEXT: ret; %struct.rrri0 = insertvalue %struct.char4 poison, i8 %a, 0 @@ -650,11 +538,7 @@ define void @st_param_v4_i8_iirr(i8 %c, i8 %d) { ; CHECK-NEXT: { // callseq 28, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, %rs2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 28 ; CHECK-NEXT: ret; %struct.iirr0 = insertvalue %struct.char4 poison, i8 1, 0 @@ -675,11 +559,7 @@ define void @st_param_v4_i8_irir(i8 %b, i8 %d) { ; CHECK-NEXT: { // callseq 29, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, %rs2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 29 ; CHECK-NEXT: ret; %struct.irir0 = insertvalue %struct.char4 poison, i8 1, 0 @@ -700,11 +580,7 @@ define void @st_param_v4_i8_irri(i8 %b, i8 %c) { ; CHECK-NEXT: { // callseq 30, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, %rs2, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 30 ; CHECK-NEXT: ret; %struct.irri0 = insertvalue %struct.char4 poison, i8 1, 0 @@ -725,11 +601,7 @@ define void @st_param_v4_i8_riir(i8 %a, i8 %d) { ; CHECK-NEXT: { // callseq 31, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, %rs2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 31 ; CHECK-NEXT: ret; %struct.riir0 = insertvalue %struct.char4 poison, i8 %a, 0 @@ -750,11 +622,7 @@ define void @st_param_v4_i8_riri(i8 %a, i8 %c) { ; CHECK-NEXT: { // callseq 32, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, %rs2, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 32 ; CHECK-NEXT: ret; %struct.riri0 = insertvalue %struct.char4 poison, i8 %a, 0 @@ -775,11 +643,7 @@ define void @st_param_v4_i8_rrii(i8 %a, i8 %b) { ; CHECK-NEXT: { // callseq 33, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 33 ; CHECK-NEXT: ret; %struct.rrii0 = insertvalue %struct.char4 poison, i8 %a, 0 @@ -799,11 +663,7 @@ define void @st_param_v4_i8_iiir(i8 %d) { ; CHECK-NEXT: { // callseq 34, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, %rs1}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 34 ; CHECK-NEXT: ret; %struct.iiir0 = insertvalue %struct.char4 poison, i8 1, 0 @@ -823,11 +683,7 @@ define void @st_param_v4_i8_iiri(i8 %c) { ; CHECK-NEXT: { // callseq 35, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 35 ; CHECK-NEXT: ret; %struct.iiri0 = insertvalue %struct.char4 poison, i8 1, 0 @@ -847,11 +703,7 @@ define void @st_param_v4_i8_irii(i8 %b) { ; CHECK-NEXT: { // callseq 36, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 36 ; CHECK-NEXT: ret; %struct.irii0 = insertvalue %struct.char4 poison, i8 1, 0 @@ -871,11 +723,7 @@ define void @st_param_v4_i8_riii(i8 %a) { ; CHECK-NEXT: { // callseq 37, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; ; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i8, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 37 ; CHECK-NEXT: ret; %struct.riii0 = insertvalue %struct.char4 poison, i8 %a, 0 @@ -895,11 +743,7 @@ define void @st_param_v4_i16_iiii() { ; CHECK-NEXT: { // callseq 38, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 38 ; CHECK-NEXT: ret; call void @call_v4_i16(%struct.short4 { i16 1, i16 2, i16 3, i16 4 }) @@ -917,11 +761,7 @@ define void @st_param_v4_i16_irrr(i16 %b, i16 %c, i16 %d) { ; CHECK-NEXT: { // callseq 39, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, %rs2, %rs3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 39 ; CHECK-NEXT: ret; %struct.irrr0 = insertvalue %struct.short4 poison, i16 1, 0 @@ -943,11 +783,7 @@ define void @st_param_v4_i16_rirr(i16 %a, i16 %c, i16 %d) { ; CHECK-NEXT: { // callseq 40, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, %rs2, %rs3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 40 ; CHECK-NEXT: ret; %struct.rirr0 = insertvalue %struct.short4 poison, i16 %a, 0 @@ -969,11 +805,7 @@ define void @st_param_v4_i16_rrir(i16 %a, i16 %b, i16 %d) { ; CHECK-NEXT: { // callseq 41, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, 3, %rs3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 41 ; CHECK-NEXT: ret; %struct.rrir0 = insertvalue %struct.short4 poison, i16 %a, 0 @@ -995,11 +827,7 @@ define void @st_param_v4_i16_rrri(i16 %a, i16 %b, i16 %c) { ; CHECK-NEXT: { // callseq 42, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, %rs3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 42 ; CHECK-NEXT: ret; %struct.rrri0 = insertvalue %struct.short4 poison, i16 %a, 0 @@ -1020,11 +848,7 @@ define void @st_param_v4_i16_iirr(i16 %c, i16 %d) { ; CHECK-NEXT: { // callseq 43, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, %rs2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 43 ; CHECK-NEXT: ret; %struct.iirr0 = insertvalue %struct.short4 poison, i16 1, 0 @@ -1045,11 +869,7 @@ define void @st_param_v4_i16_irir(i16 %b, i16 %d) { ; CHECK-NEXT: { // callseq 44, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, 3, %rs2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 44 ; CHECK-NEXT: ret; %struct.irir0 = insertvalue %struct.short4 poison, i16 1, 0 @@ -1070,11 +890,7 @@ define void @st_param_v4_i16_irri(i16 %b, i16 %c) { ; CHECK-NEXT: { // callseq 45, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, %rs2, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 45 ; CHECK-NEXT: ret; %struct.irri0 = insertvalue %struct.short4 poison, i16 1, 0 @@ -1095,11 +911,7 @@ define void @st_param_v4_i16_riir(i16 %a, i16 %d) { ; CHECK-NEXT: { // callseq 46, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, 3, %rs2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 46 ; CHECK-NEXT: ret; %struct.riir0 = insertvalue %struct.short4 poison, i16 %a, 0 @@ -1120,11 +932,7 @@ define void @st_param_v4_i16_riri(i16 %a, i16 %c) { ; CHECK-NEXT: { // callseq 47, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, %rs2, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 47 ; CHECK-NEXT: ret; %struct.riri0 = insertvalue %struct.short4 poison, i16 %a, 0 @@ -1145,11 +953,7 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) { ; CHECK-NEXT: { // callseq 48, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 48 ; CHECK-NEXT: ret; %struct.rrii0 = insertvalue %struct.short4 poison, i16 %a, 0 @@ -1169,11 +973,7 @@ define void @st_param_v4_i16_iiir(i16 %d) { ; CHECK-NEXT: { // callseq 49, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, %rs1}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 49 ; CHECK-NEXT: ret; %struct.iiir0 = insertvalue %struct.short4 poison, i16 1, 0 @@ -1193,11 +993,7 @@ define void @st_param_v4_i16_iiri(i16 %c) { ; CHECK-NEXT: { // callseq 50, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 50 ; CHECK-NEXT: ret; %struct.iiri0 = insertvalue %struct.short4 poison, i16 1, 0 @@ -1217,11 +1013,7 @@ define void @st_param_v4_i16_irii(i16 %b) { ; CHECK-NEXT: { // callseq 51, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 51 ; CHECK-NEXT: ret; %struct.irii0 = insertvalue %struct.short4 poison, i16 1, 0 @@ -1241,11 +1033,7 @@ define void @st_param_v4_i16_riii(i16 %a) { ; CHECK-NEXT: { // callseq 52, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; ; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i16, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 52 ; CHECK-NEXT: ret; %struct.riii0 = insertvalue %struct.short4 poison, i16 %a, 0 @@ -1265,11 +1053,7 @@ define void @st_param_v4_i32_iiii() { ; CHECK-NEXT: { // callseq 53, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 53 ; CHECK-NEXT: ret; call void @call_v4_i32(%struct.int4 { i32 1, i32 2, i32 3, i32 4 }) @@ -1287,11 +1071,7 @@ define void @st_param_v4_i32_irrr(i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: { // callseq 54, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, %r2, %r3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 54 ; CHECK-NEXT: ret; %struct.irrr0 = insertvalue %struct.int4 poison, i32 1, 0 @@ -1313,11 +1093,7 @@ define void @st_param_v4_i32_rirr(i32 %a, i32 %c, i32 %d) { ; CHECK-NEXT: { // callseq 55, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, %r2, %r3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 55 ; CHECK-NEXT: ret; %struct.rirr0 = insertvalue %struct.int4 poison, i32 %a, 0 @@ -1339,11 +1115,7 @@ define void @st_param_v4_i32_rrir(i32 %a, i32 %b, i32 %d) { ; CHECK-NEXT: { // callseq 56, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, 3, %r3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 56 ; CHECK-NEXT: ret; %struct.rrir0 = insertvalue %struct.int4 poison, i32 %a, 0 @@ -1365,11 +1137,7 @@ define void @st_param_v4_i32_rrri(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: { // callseq 57, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, %r3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 57 ; CHECK-NEXT: ret; %struct.rrri0 = insertvalue %struct.int4 poison, i32 %a, 0 @@ -1390,11 +1158,7 @@ define void @st_param_v4_i32_iirr(i32 %c, i32 %d) { ; CHECK-NEXT: { // callseq 58, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, %r1, %r2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 58 ; CHECK-NEXT: ret; %struct.iirr0 = insertvalue %struct.int4 poison, i32 1, 0 @@ -1415,11 +1179,7 @@ define void @st_param_v4_i32_irir(i32 %b, i32 %d) { ; CHECK-NEXT: { // callseq 59, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, 3, %r2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 59 ; CHECK-NEXT: ret; %struct.irir0 = insertvalue %struct.int4 poison, i32 1, 0 @@ -1440,11 +1200,7 @@ define void @st_param_v4_i32_irri(i32 %b, i32 %c) { ; CHECK-NEXT: { // callseq 60, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, %r2, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 60 ; CHECK-NEXT: ret; %struct.irri0 = insertvalue %struct.int4 poison, i32 1, 0 @@ -1465,11 +1221,7 @@ define void @st_param_v4_i32_riir(i32 %a, i32 %d) { ; CHECK-NEXT: { // callseq 61, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, 3, %r2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 61 ; CHECK-NEXT: ret; %struct.riir0 = insertvalue %struct.int4 poison, i32 %a, 0 @@ -1490,11 +1242,7 @@ define void @st_param_v4_i32_riri(i32 %a, i32 %c) { ; CHECK-NEXT: { // callseq 62, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, %r2, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 62 ; CHECK-NEXT: ret; %struct.riri0 = insertvalue %struct.int4 poison, i32 %a, 0 @@ -1515,11 +1263,7 @@ define void @st_param_v4_i32_rrii(i32 %a, i32 %b) { ; CHECK-NEXT: { // callseq 63, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 63 ; CHECK-NEXT: ret; %struct.rrii0 = insertvalue %struct.int4 poison, i32 %a, 0 @@ -1539,11 +1283,7 @@ define void @st_param_v4_i32_iiir(i32 %d) { ; CHECK-NEXT: { // callseq 64, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, 3, %r1}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 64 ; CHECK-NEXT: ret; %struct.iiir0 = insertvalue %struct.int4 poison, i32 1, 0 @@ -1563,11 +1303,7 @@ define void @st_param_v4_i32_iiri(i32 %c) { ; CHECK-NEXT: { // callseq 65, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, %r1, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 65 ; CHECK-NEXT: ret; %struct.iiri0 = insertvalue %struct.int4 poison, i32 1, 0 @@ -1587,11 +1323,7 @@ define void @st_param_v4_i32_irii(i32 %b) { ; CHECK-NEXT: { // callseq 66, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 66 ; CHECK-NEXT: ret; %struct.irii0 = insertvalue %struct.int4 poison, i32 1, 0 @@ -1611,11 +1343,7 @@ define void @st_param_v4_i32_riii(i32 %a) { ; CHECK-NEXT: { // callseq 67, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, 3, 4}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_i32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_i32, (param0); ; CHECK-NEXT: } // callseq 67 ; CHECK-NEXT: ret; %struct.riii0 = insertvalue %struct.int4 poison, i32 %a, 0 @@ -1635,11 +1363,7 @@ define void @st_param_v4_f32_iiii() { ; CHECK-NEXT: { // callseq 68, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, 0f40000000, 0f40400000, 0f40800000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 68 ; CHECK-NEXT: ret; call void @call_v4_f32(%struct.float4 { float 1.0, float 2.0, float 3.0, float 4.0 }) @@ -1657,11 +1381,7 @@ define void @st_param_v4_f32_irrr(float %b, float %c, float %d) { ; CHECK-NEXT: { // callseq 69, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, %r1, %r2, %r3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 69 ; CHECK-NEXT: ret; %struct.irrr0 = insertvalue %struct.float4 poison, float 1.0, 0 @@ -1683,11 +1403,7 @@ define void @st_param_v4_f32_rirr(float %a, float %c, float %d) { ; CHECK-NEXT: { // callseq 70, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 0f40000000, %r2, %r3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 70 ; CHECK-NEXT: ret; %struct.rirr0 = insertvalue %struct.float4 poison, float %a, 0 @@ -1709,11 +1425,7 @@ define void @st_param_v4_f32_rrir(float %a, float %b, float %d) { ; CHECK-NEXT: { // callseq 71, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, 0f40400000, %r3}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 71 ; CHECK-NEXT: ret; %struct.rrir0 = insertvalue %struct.float4 poison, float %a, 0 @@ -1735,11 +1447,7 @@ define void @st_param_v4_f32_rrri(float %a, float %b, float %c) { ; CHECK-NEXT: { // callseq 72, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, %r3, 0f40800000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 72 ; CHECK-NEXT: ret; %struct.rrri0 = insertvalue %struct.float4 poison, float %a, 0 @@ -1760,11 +1468,7 @@ define void @st_param_v4_f32_iirr(float %c, float %d) { ; CHECK-NEXT: { // callseq 73, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, 0f40000000, %r1, %r2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 73 ; CHECK-NEXT: ret; %struct.iirr0 = insertvalue %struct.float4 poison, float 1.0, 0 @@ -1785,11 +1489,7 @@ define void @st_param_v4_f32_irir(float %b, float %d) { ; CHECK-NEXT: { // callseq 74, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, %r1, 0f40400000, %r2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 74 ; CHECK-NEXT: ret; %struct.irir0 = insertvalue %struct.float4 poison, float 1.0, 0 @@ -1810,11 +1510,7 @@ define void @st_param_v4_f32_irri(float %b, float %c) { ; CHECK-NEXT: { // callseq 75, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, %r1, %r2, 0f40800000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 75 ; CHECK-NEXT: ret; %struct.irri0 = insertvalue %struct.float4 poison, float 1.0, 0 @@ -1835,11 +1531,7 @@ define void @st_param_v4_f32_riir(float %a, float %d) { ; CHECK-NEXT: { // callseq 76, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 0f40000000, 0f40400000, %r2}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 76 ; CHECK-NEXT: ret; %struct.riir0 = insertvalue %struct.float4 poison, float %a, 0 @@ -1860,11 +1552,7 @@ define void @st_param_v4_f32_riri(float %a, float %c) { ; CHECK-NEXT: { // callseq 77, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 0f40000000, %r2, 0f40800000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 77 ; CHECK-NEXT: ret; %struct.riri0 = insertvalue %struct.float4 poison, float %a, 0 @@ -1885,11 +1573,7 @@ define void @st_param_v4_f32_rrii(float %a, float %b) { ; CHECK-NEXT: { // callseq 78, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, 0f40400000, 0f40800000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 78 ; CHECK-NEXT: ret; %struct.rrii0 = insertvalue %struct.float4 poison, float %a, 0 @@ -1909,11 +1593,7 @@ define void @st_param_v4_f32_iiir(float %d) { ; CHECK-NEXT: { // callseq 79, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, 0f40000000, 0f40400000, %r1}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 79 ; CHECK-NEXT: ret; %struct.iiir0 = insertvalue %struct.float4 poison, float 1.0, 0 @@ -1933,11 +1613,7 @@ define void @st_param_v4_f32_iiri(float %c) { ; CHECK-NEXT: { // callseq 80, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, 0f40000000, %r1, 0f40800000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 80 ; CHECK-NEXT: ret; %struct.iiri0 = insertvalue %struct.float4 poison, float 1.0, 0 @@ -1957,11 +1633,7 @@ define void @st_param_v4_f32_irii(float %b) { ; CHECK-NEXT: { // callseq 81, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, %r1, 0f40400000, 0f40800000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 81 ; CHECK-NEXT: ret; %struct.irii0 = insertvalue %struct.float4 poison, float 1.0, 0 @@ -1981,11 +1653,7 @@ define void @st_param_v4_f32_riii(float %a) { ; CHECK-NEXT: { // callseq 82, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; ; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 0f40000000, 0f40400000, 0f40800000}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_v4_f32, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_v4_f32, (param0); ; CHECK-NEXT: } // callseq 82 ; CHECK-NEXT: ret; %struct.riii0 = insertvalue %struct.float4 poison, float %a, 0 @@ -2011,11 +1679,7 @@ define void @st_param_bfloat() { ; CHECK-NEXT: { // callseq 83, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[2]; ; CHECK-NEXT: st.param.b16 [param0], %rs1; -; CHECK-NEXT: call.uni -; CHECK-NEXT: call_bfloat, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni call_bfloat, (param0); ; CHECK-NEXT: } // callseq 83 ; CHECK-NEXT: ret; %five = bitcast i16 16640 to bfloat diff --git a/llvm/test/CodeGen/NVPTX/store-undef.ll b/llvm/test/CodeGen/NVPTX/store-undef.ll index 52415b05e03d0..5b31b5e24bc68 100644 --- a/llvm/test/CodeGen/NVPTX/store-undef.ll +++ b/llvm/test/CodeGen/NVPTX/store-undef.ll @@ -16,11 +16,7 @@ define void @test_store_param_undef() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[32]; -; CHECK-NEXT: call.uni -; CHECK-NEXT: test_call, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni test_call, (param0); ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: ret; call void @test_call(%struct.T undef) @@ -41,11 +37,7 @@ define void @test_store_param_def(i64 %param0, i32 %param1) { ; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r2, %r1}; ; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5}; -; CHECK-NEXT: call.uni -; CHECK-NEXT: test_call, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni test_call, (param0); ; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: ret; %V2 = insertelement <2 x i32> undef, i32 %param1, i32 1 diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll index a97a8b5822f99..d6961a9541776 100644 --- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -71,11 +71,7 @@ define ptx_kernel void @baz(ptr %red, i32 %idx) { ; CHECK-NEXT: .param .b64 param0; ; CHECK-NEXT: st.param.b64 [param0], %rd3; ; CHECK-NEXT: .param .b32 retval0; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: texfunc, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), texfunc, (param0); ; CHECK-NEXT: ld.param.b32 %r6, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: add.rn.f32 %r8, %r2, %r6; diff --git a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll index efbac868dba38..178ee7ff6db18 100644 --- a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll @@ -33,11 +33,7 @@ ; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]]; ; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]]; ; CHECK: .param .align 8 .b8 retval0[16]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_s_i8i16p, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0); ; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+3]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+4]; @@ -80,11 +76,7 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) { ; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; ; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]]; ; CHECK: .param .align 8 .b8 retval0[24]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_s_i8i32p, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0); ; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; @@ -147,11 +139,7 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) { ; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]]; ; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]]; ; CHECK: .param .align 8 .b8 retval0[32]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_s_i8i64p, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0); ; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10]; @@ -192,11 +180,7 @@ define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) { ; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]]; ; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]]; ; CHECK: .param .align 8 .b8 retval0[16]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_s_i8f16p, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0); ; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2I_0:%rs[0-9]+]], [retval0+3]; ; CHECK-DAG: ld.param.b8 [[R2I_1:%rs[0-9]+]], [retval0+4]; @@ -239,11 +223,7 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) { ; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; ; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]]; ; CHECK: .param .align 8 .b8 retval0[24]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_s_i8f16x2p, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0); ; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; @@ -286,11 +266,7 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) { ; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; ; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]]; ; CHECK: .param .align 8 .b8 retval0[24]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_s_i8f32p, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0); ; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; @@ -353,11 +329,7 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) { ; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]]; ; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]]; ; CHECK: .param .align 8 .b8 retval0[32]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: test_s_i8f64p, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0); ; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9]; ; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10]; diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll index 80cf938d48b53..618c7ed0c4997 100644 --- a/llvm/test/CodeGen/NVPTX/unreachable.ll +++ b/llvm/test/CodeGen/NVPTX/unreachable.ll @@ -28,10 +28,7 @@ define ptx_kernel void @kernel_func() { ; NO-TRAP-UNREACHABLE-EMPTY: ; NO-TRAP-UNREACHABLE-NEXT: // %bb.0: ; NO-TRAP-UNREACHABLE-NEXT: { // callseq 0, 0 -; NO-TRAP-UNREACHABLE-NEXT: call.uni -; NO-TRAP-UNREACHABLE-NEXT: throw, -; NO-TRAP-UNREACHABLE-NEXT: ( -; NO-TRAP-UNREACHABLE-NEXT: ); +; NO-TRAP-UNREACHABLE-NEXT: call.uni throw, (); ; NO-TRAP-UNREACHABLE-NEXT: } // callseq 0 ; NO-TRAP-UNREACHABLE-NEXT: // begin inline asm ; NO-TRAP-UNREACHABLE-NEXT: exit; @@ -43,10 +40,7 @@ define ptx_kernel void @kernel_func() { ; NO-TRAP-AFTER-NORETURN-EMPTY: ; NO-TRAP-AFTER-NORETURN-NEXT: // %bb.0: ; NO-TRAP-AFTER-NORETURN-NEXT: { // callseq 0, 0 -; NO-TRAP-AFTER-NORETURN-NEXT: call.uni -; NO-TRAP-AFTER-NORETURN-NEXT: throw, -; NO-TRAP-AFTER-NORETURN-NEXT: ( -; NO-TRAP-AFTER-NORETURN-NEXT: ); +; NO-TRAP-AFTER-NORETURN-NEXT: call.uni throw, (); ; NO-TRAP-AFTER-NORETURN-NEXT: } // callseq 0 ; NO-TRAP-AFTER-NORETURN-NEXT: // begin inline asm ; NO-TRAP-AFTER-NORETURN-NEXT: exit; @@ -59,10 +53,7 @@ define ptx_kernel void @kernel_func() { ; TRAP-EMPTY: ; TRAP-NEXT: // %bb.0: ; TRAP-NEXT: { // callseq 0, 0 -; TRAP-NEXT: call.uni -; TRAP-NEXT: throw, -; TRAP-NEXT: ( -; TRAP-NEXT: ); +; TRAP-NEXT: call.uni throw, (); ; TRAP-NEXT: } // callseq 0 ; TRAP-NEXT: trap; exit; ; @@ -72,10 +63,7 @@ define ptx_kernel void @kernel_func() { ; BUG-FIXED-EMPTY: ; BUG-FIXED-NEXT: // %bb.0: ; BUG-FIXED-NEXT: { // callseq 0, 0 -; BUG-FIXED-NEXT: call.uni -; BUG-FIXED-NEXT: throw, -; BUG-FIXED-NEXT: ( -; BUG-FIXED-NEXT: ); +; BUG-FIXED-NEXT: call.uni throw, (); ; BUG-FIXED-NEXT: } // callseq 0 ; BUG-FIXED-NEXT: trap; call void @throw() diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll index ddaa9fd831af7..ca1b722527a89 100644 --- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll +++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll @@ -126,12 +126,7 @@ define dso_local i32 @foo() { ; CHECK-PTX-NEXT: .param .b64 param1; ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd4; ; CHECK-PTX-NEXT: .param .b32 retval0; -; CHECK-PTX-NEXT: call.uni (retval0), -; CHECK-PTX-NEXT: variadics1, -; CHECK-PTX-NEXT: ( -; CHECK-PTX-NEXT: param0, -; CHECK-PTX-NEXT: param1 -; CHECK-PTX-NEXT: ); +; CHECK-PTX-NEXT: call.uni (retval0), variadics1, (param0, param1); ; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0]; ; CHECK-PTX-NEXT: } // callseq 0 ; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2; @@ -238,12 +233,7 @@ define dso_local i32 @bar() { ; CHECK-PTX-NEXT: .param .b64 param1; ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd4; ; CHECK-PTX-NEXT: .param .b32 retval0; -; CHECK-PTX-NEXT: call.uni (retval0), -; CHECK-PTX-NEXT: variadics2, -; CHECK-PTX-NEXT: ( -; CHECK-PTX-NEXT: param0, -; CHECK-PTX-NEXT: param1 -; CHECK-PTX-NEXT: ); +; CHECK-PTX-NEXT: call.uni (retval0), variadics2, (param0, param1); ; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0]; ; CHECK-PTX-NEXT: } // callseq 1 ; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2; @@ -315,12 +305,7 @@ define dso_local i32 @baz() { ; CHECK-PTX-NEXT: .param .b64 param1; ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; ; CHECK-PTX-NEXT: .param .b32 retval0; -; CHECK-PTX-NEXT: call.uni (retval0), -; CHECK-PTX-NEXT: variadics3, -; CHECK-PTX-NEXT: ( -; CHECK-PTX-NEXT: param0, -; CHECK-PTX-NEXT: param1 -; CHECK-PTX-NEXT: ); +; CHECK-PTX-NEXT: call.uni (retval0), variadics3, (param0, param1); ; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0]; ; CHECK-PTX-NEXT: } // callseq 2 ; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2; @@ -397,12 +382,7 @@ define dso_local void @qux() { ; CHECK-PTX-NEXT: .param .b64 param1; ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd8; ; CHECK-PTX-NEXT: .param .b32 retval0; -; CHECK-PTX-NEXT: call.uni (retval0), -; CHECK-PTX-NEXT: variadics4, -; CHECK-PTX-NEXT: ( -; CHECK-PTX-NEXT: param0, -; CHECK-PTX-NEXT: param1 -; CHECK-PTX-NEXT: ); +; CHECK-PTX-NEXT: call.uni (retval0), variadics4, (param0, param1); ; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-PTX-NEXT: } // callseq 3 ; CHECK-PTX-NEXT: ret; diff --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll index dac17dc3225ee..b7852c3c3e6e0 100644 --- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll +++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll @@ -18,7 +18,6 @@ define signext i32 @main() nounwind { ; CHECK-NEXT: sth 3, 46(1) ; CHECK-NEXT: addi 3, 1, 46 ; CHECK-NEXT: lharx 4, 0, 3 -; CHECK-NEXT: clrlwi 4, 4, 16 ; CHECK-NEXT: cmplwi 4, 33059 ; CHECK-NEXT: bne 0, .LBB0_4 ; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -32,7 +31,6 @@ define signext i32 @main() nounwind { ; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload ; CHECK-NEXT: # ; CHECK-NEXT: lharx 5, 0, 3 -; CHECK-NEXT: clrlwi 5, 5, 16 ; CHECK-NEXT: cmplwi 5, 33059 ; CHECK-NEXT: beq 0, .LBB0_2 ; CHECK-NEXT: .LBB0_4: # %cmpxchg.nostore diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll index 5e14fbbb6ad61..07afea75aec67 100644 --- a/llvm/test/CodeGen/PowerPC/all-atomics.ll +++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll @@ -4346,8 +4346,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 7, uc@toc@l(3) ; CHECK-NEXT: lbz 8, sc@toc@l(4) ; CHECK-NEXT: lbarx 5, 0, 6 -; CHECK-NEXT: clrlwi 9, 5, 24 -; CHECK-NEXT: cmplw 9, 7 +; CHECK-NEXT: cmplw 5, 7 ; CHECK-NEXT: bne 0, .LBB3_4 ; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore276 ; CHECK-NEXT: sync @@ -4359,8 +4358,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload274 ; CHECK-NEXT: # ; CHECK-NEXT: lbarx 5, 0, 6 -; CHECK-NEXT: clrlwi 9, 5, 24 -; CHECK-NEXT: cmplw 9, 7 +; CHECK-NEXT: cmplw 5, 7 ; CHECK-NEXT: beq 0, .LBB3_2 ; CHECK-NEXT: .LBB3_4: # %cmpxchg.nostore272 ; CHECK-NEXT: addi 7, 3, uc@toc@l @@ -4368,8 +4366,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: stb 5, sc@toc@l(4) ; CHECK-NEXT: lbz 9, uc@toc@l(3) ; CHECK-NEXT: lbarx 8, 0, 7 -; CHECK-NEXT: clrlwi 10, 8, 24 -; CHECK-NEXT: cmplw 10, 9 +; CHECK-NEXT: cmplw 8, 9 ; CHECK-NEXT: bne 0, .LBB3_8 ; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore257 ; CHECK-NEXT: sync @@ -4382,8 +4379,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: # %bb.7: # %cmpxchg.releasedload255 ; CHECK-NEXT: # ; CHECK-NEXT: lbarx 8, 0, 7 -; CHECK-NEXT: clrlwi 10, 8, 24 -; CHECK-NEXT: cmplw 10, 9 +; CHECK-NEXT: cmplw 8, 9 ; CHECK-NEXT: beq 0, .LBB3_6 ; CHECK-NEXT: .LBB3_8: # %cmpxchg.nostore253 ; CHECK-NEXT: addis 5, 2, ss@toc@ha @@ -4393,8 +4389,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 11, sc@toc@l(4) ; CHECK-NEXT: addi 8, 5, ss@toc@l ; CHECK-NEXT: lharx 9, 0, 8 -; CHECK-NEXT: clrlwi 12, 9, 16 -; CHECK-NEXT: cmplw 12, 10 +; CHECK-NEXT: cmplw 9, 10 ; CHECK-NEXT: bne 0, .LBB3_12 ; CHECK-NEXT: # %bb.9: # %cmpxchg.fencedstore238 ; CHECK-NEXT: extsb 11, 11 @@ -4408,8 +4403,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: # %bb.11: # %cmpxchg.releasedload236 ; CHECK-NEXT: # ; CHECK-NEXT: lharx 9, 0, 8 -; CHECK-NEXT: clrlwi 12, 9, 16 -; CHECK-NEXT: cmplw 12, 10 +; CHECK-NEXT: cmplw 9, 10 ; CHECK-NEXT: beq 0, .LBB3_10 ; CHECK-NEXT: .LBB3_12: # %cmpxchg.nostore234 ; CHECK-NEXT: lwsync @@ -4419,8 +4413,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 12, sc@toc@l(4) ; CHECK-NEXT: addi 9, 5, us@toc@l ; CHECK-NEXT: lharx 10, 0, 9 -; CHECK-NEXT: clrlwi 0, 10, 16 -; CHECK-NEXT: cmplw 0, 11 +; CHECK-NEXT: cmplw 10, 11 ; CHECK-NEXT: bne 0, .LBB3_16 ; CHECK-NEXT: # %bb.13: # %cmpxchg.fencedstore219 ; CHECK-NEXT: extsb 12, 12 @@ -4434,8 +4427,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: # %bb.15: # %cmpxchg.releasedload217 ; CHECK-NEXT: # ; CHECK-NEXT: lharx 10, 0, 9 -; CHECK-NEXT: clrlwi 0, 10, 16 -; CHECK-NEXT: cmplw 0, 11 +; CHECK-NEXT: cmplw 10, 11 ; CHECK-NEXT: beq 0, .LBB3_14 ; CHECK-NEXT: .LBB3_16: # %cmpxchg.nostore215 ; CHECK-NEXT: lwsync @@ -4535,7 +4527,6 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: lbz 30, uc@toc@l(3) ; CHECK-NEXT: lbz 29, sc@toc@l(4) ; CHECK-NEXT: lbarx 28, 0, 6 -; CHECK-NEXT: clrlwi 28, 28, 24 ; CHECK-NEXT: cmplw 28, 30 ; CHECK-NEXT: bne 0, .LBB3_36 ; CHECK-NEXT: # %bb.33: # %cmpxchg.fencedstore124 @@ -4548,7 +4539,6 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: # %bb.35: # %cmpxchg.releasedload122 ; CHECK-NEXT: # ; CHECK-NEXT: lbarx 28, 0, 6 -; CHECK-NEXT: clrlwi 28, 28, 24 ; CHECK-NEXT: cmplw 28, 30 ; CHECK-NEXT: beq 0, .LBB3_34 ; CHECK-NEXT: .LBB3_36: # %cmpxchg.nostore120 @@ -4566,7 +4556,6 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: stw 6, ui@toc@l(5) ; CHECK-NEXT: lbz 6, uc@toc@l(3) ; CHECK-NEXT: lbarx 29, 0, 7 -; CHECK-NEXT: clrlwi 29, 29, 24 ; CHECK-NEXT: cmplw 29, 6 ; CHECK-NEXT: bne 0, .LBB3_42 ; CHECK-NEXT: # %bb.39: # %cmpxchg.fencedstore105 @@ -4579,7 +4568,6 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: # %bb.41: # %cmpxchg.releasedload103 ; CHECK-NEXT: # ; CHECK-NEXT: lbarx 29, 0, 7 -; CHECK-NEXT: clrlwi 29, 29, 24 ; CHECK-NEXT: cmplw 29, 6 ; CHECK-NEXT: beq 0, .LBB3_40 ; CHECK-NEXT: .LBB3_42: # %cmpxchg.nostore101 @@ -4597,7 +4585,6 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: stw 6, ui@toc@l(5) ; CHECK-NEXT: lbz 6, uc@toc@l(3) ; CHECK-NEXT: lharx 30, 0, 8 -; CHECK-NEXT: clrlwi 30, 30, 16 ; CHECK-NEXT: cmplw 30, 6 ; CHECK-NEXT: bne 0, .LBB3_48 ; CHECK-NEXT: # %bb.45: # %cmpxchg.fencedstore86 @@ -4612,7 +4599,6 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: # %bb.47: # %cmpxchg.releasedload84 ; CHECK-NEXT: # ; CHECK-NEXT: lharx 30, 0, 8 -; CHECK-NEXT: clrlwi 30, 30, 16 ; CHECK-NEXT: cmplw 30, 6 ; CHECK-NEXT: beq 0, .LBB3_46 ; CHECK-NEXT: .LBB3_48: # %cmpxchg.nostore82 @@ -4630,7 +4616,6 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: stw 6, ui@toc@l(5) ; CHECK-NEXT: lbz 6, uc@toc@l(3) ; CHECK-NEXT: lharx 8, 0, 9 -; CHECK-NEXT: clrlwi 8, 8, 16 ; CHECK-NEXT: cmplw 8, 6 ; CHECK-NEXT: bne 0, .LBB3_54 ; CHECK-NEXT: # %bb.51: # %cmpxchg.fencedstore67 @@ -4645,7 +4630,6 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-NEXT: # %bb.53: # %cmpxchg.releasedload65 ; CHECK-NEXT: # ; CHECK-NEXT: lharx 8, 0, 9 -; CHECK-NEXT: clrlwi 8, 8, 16 ; CHECK-NEXT: cmplw 8, 6 ; CHECK-NEXT: beq 0, .LBB3_52 ; CHECK-NEXT: .LBB3_54: # %cmpxchg.nostore63 diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll index 280c4299c30b7..0474a479a1fef 100644 --- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll @@ -406,7 +406,6 @@ define void @test40(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: .LBB40_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -428,7 +427,6 @@ define void @test41(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: .LBB41_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -451,7 +449,6 @@ define void @test42(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: .LBB42_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB42_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -470,7 +467,6 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -484,7 +480,6 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB43_2 ; PPC64LE-NEXT: blr @@ -497,7 +492,6 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB44_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -511,7 +505,6 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB44_2 ; PPC64LE-NEXT: .LBB44_4: # %cmpxchg.nostore @@ -526,7 +519,6 @@ define void @test45(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -540,7 +532,6 @@ define void @test45(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB45_2 ; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end @@ -557,7 +548,6 @@ define void @test46(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB46_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -571,7 +561,6 @@ define void @test46(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB46_2 ; PPC64LE-NEXT: .LBB46_4: # %cmpxchg.nostore @@ -586,7 +575,6 @@ define void @test47(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -600,7 +588,6 @@ define void @test47(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB47_2 ; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end @@ -617,7 +604,6 @@ define void @test48(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB48_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -631,7 +617,6 @@ define void @test48(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB48_2 ; PPC64LE-NEXT: .LBB48_4: # %cmpxchg.nostore @@ -646,7 +631,6 @@ define void @test49(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB49_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -660,7 +644,6 @@ define void @test49(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB49_2 ; PPC64LE-NEXT: .LBB49_4: # %cmpxchg.nostore @@ -679,7 +662,6 @@ define void @test50(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: .LBB50_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -701,7 +683,6 @@ define void @test51(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: .LBB51_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -724,7 +705,6 @@ define void @test52(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: .LBB52_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB52_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -743,7 +723,6 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -757,7 +736,6 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB53_2 ; PPC64LE-NEXT: blr @@ -770,7 +748,6 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB54_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -784,7 +761,6 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB54_2 ; PPC64LE-NEXT: .LBB54_4: # %cmpxchg.nostore @@ -799,7 +775,6 @@ define void @test55(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -813,7 +788,6 @@ define void @test55(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB55_2 ; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end @@ -830,7 +804,6 @@ define void @test56(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB56_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -844,7 +817,6 @@ define void @test56(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB56_2 ; PPC64LE-NEXT: .LBB56_4: # %cmpxchg.nostore @@ -859,7 +831,6 @@ define void @test57(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -873,7 +844,6 @@ define void @test57(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB57_2 ; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end @@ -890,7 +860,6 @@ define void @test58(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB58_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -904,7 +873,6 @@ define void @test58(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB58_2 ; PPC64LE-NEXT: .LBB58_4: # %cmpxchg.nostore @@ -919,7 +887,6 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB59_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -933,7 +900,6 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB59_2 ; PPC64LE-NEXT: .LBB59_4: # %cmpxchg.nostore @@ -1424,7 +1390,6 @@ define void @test80(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: .LBB80_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -1446,7 +1411,6 @@ define void @test81(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: .LBB81_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -1469,7 +1433,6 @@ define void @test82(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: .LBB82_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB82_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -1488,7 +1451,6 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1502,7 +1464,6 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB83_2 ; PPC64LE-NEXT: blr @@ -1515,7 +1476,6 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB84_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1529,7 +1489,6 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB84_2 ; PPC64LE-NEXT: .LBB84_4: # %cmpxchg.nostore @@ -1544,7 +1503,6 @@ define void @test85(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1558,7 +1516,6 @@ define void @test85(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB85_2 ; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end @@ -1575,7 +1532,6 @@ define void @test86(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB86_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1589,7 +1545,6 @@ define void @test86(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB86_2 ; PPC64LE-NEXT: .LBB86_4: # %cmpxchg.nostore @@ -1604,7 +1559,6 @@ define void @test87(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1618,7 +1572,6 @@ define void @test87(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB87_2 ; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end @@ -1635,7 +1588,6 @@ define void @test88(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB88_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1649,7 +1601,6 @@ define void @test88(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB88_2 ; PPC64LE-NEXT: .LBB88_4: # %cmpxchg.nostore @@ -1664,7 +1615,6 @@ define void @test89(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB89_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1678,7 +1628,6 @@ define void @test89(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB89_2 ; PPC64LE-NEXT: .LBB89_4: # %cmpxchg.nostore @@ -1697,7 +1646,6 @@ define void @test90(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: .LBB90_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -1719,7 +1667,6 @@ define void @test91(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: .LBB91_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -1742,7 +1689,6 @@ define void @test92(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: .LBB92_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB92_3 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore @@ -1761,7 +1707,6 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1775,7 +1720,6 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB93_2 ; PPC64LE-NEXT: blr @@ -1788,7 +1732,6 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB94_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1802,7 +1745,6 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB94_2 ; PPC64LE-NEXT: .LBB94_4: # %cmpxchg.nostore @@ -1817,7 +1759,6 @@ define void @test95(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1831,7 +1772,6 @@ define void @test95(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB95_2 ; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end @@ -1848,7 +1788,6 @@ define void @test96(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB96_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1862,7 +1801,6 @@ define void @test96(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB96_2 ; PPC64LE-NEXT: .LBB96_4: # %cmpxchg.nostore @@ -1877,7 +1815,6 @@ define void @test97(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1891,7 +1828,6 @@ define void @test97(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB97_2 ; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end @@ -1908,7 +1844,6 @@ define void @test98(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB98_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1922,7 +1857,6 @@ define void @test98(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB98_2 ; PPC64LE-NEXT: .LBB98_4: # %cmpxchg.nostore @@ -1937,7 +1871,6 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: # %cmpxchg.start ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: clrlwi 4, 4, 16 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bne 0, .LBB99_4 ; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore @@ -1951,7 +1884,6 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) { ; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lharx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 16 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: beq 0, .LBB99_2 ; PPC64LE-NEXT: .LBB99_4: # %cmpxchg.nostore diff --git a/llvm/test/CodeGen/PowerPC/builtins-bcd-transform.ll b/llvm/test/CodeGen/PowerPC/builtins-bcd-transform.ll new file mode 100644 index 0000000000000..449beeb18c2de --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/builtins-bcd-transform.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Testfile that verifies positive case (0 or 1 only) for BCD builtins national2packed, packed2zoned and zoned2packed. +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s + +declare <16 x i8> @llvm.ppc.national2packed(<16 x i8>, i32 immarg) + +define <16 x i8> @tBcd_National2packed_imm0(<16 x i8> %a) { +; CHECK-LABEL: tBcd_National2packed_imm0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bcdcfn. v2, v2, 0 +; CHECK-NEXT: blr +entry: + %0 = call <16 x i8> @llvm.ppc.national2packed(<16 x i8> %a, i32 0) + ret <16 x i8> %0 +} + +define <16 x i8> @tBcd_National2packed_imm1(<16 x i8> %a) { +; CHECK-LABEL: tBcd_National2packed_imm1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bcdcfn. v2, v2, 1 +; CHECK-NEXT: blr +entry: + %0 = call <16 x i8> @llvm.ppc.national2packed(<16 x i8> %a, i32 1) + ret <16 x i8> %0 +} + +declare <16 x i8> @llvm.ppc.packed2national(<16 x i8>) + +define <16 x i8> @tBcd_Packed2national(<16 x i8> %a) { +; CHECK-LABEL: tBcd_Packed2national: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bcdctn. v2, v2 +; CHECK-NEXT: blr +entry: + %0 = call <16 x i8> @llvm.ppc.packed2national(<16 x i8> %a) + ret <16 x i8> %0 +} + +declare <16 x i8> @llvm.ppc.packed2zoned(<16 x i8>, i32 immarg) + +define <16 x i8> @tBcd_Packed2zoned_imm0(<16 x i8> %a) { +; CHECK-LABEL: tBcd_Packed2zoned_imm0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bcdctz. v2, v2, 0 +; CHECK-NEXT: blr +entry: + %0 = call <16 x i8> @llvm.ppc.packed2zoned(<16 x i8> %a, i32 0) + ret <16 x i8> %0 +} + +define <16 x i8> @tBcd_Packed2zoned_imm1(<16 x i8> %a) { +; CHECK-LABEL: tBcd_Packed2zoned_imm1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bcdctz. v2, v2, 1 +; CHECK-NEXT: blr +entry: + %0 = call <16 x i8> @llvm.ppc.packed2zoned(<16 x i8> %a, i32 1) + ret <16 x i8> %0 +} + +declare <16 x i8> @llvm.ppc.zoned2packed(<16 x i8>, i32 immarg) + +define <16 x i8> @tBcd_Zoned2packed_imm0(<16 x i8> %a) { +; CHECK-LABEL: tBcd_Zoned2packed_imm0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bcdcfz. v2, v2, 0 +; CHECK-NEXT: blr +entry: + %0 = call <16 x i8> @llvm.ppc.zoned2packed(<16 x i8> %a, i32 0) + ret <16 x i8> %0 +} + +define <16 x i8> @tBcd_Zoned2packed_imm1(<16 x i8> %a) { +; CHECK-LABEL: tBcd_Zoned2packed_imm1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bcdcfz. v2, v2, 1 +; CHECK-NEXT: blr +entry: + %0 = call <16 x i8> @llvm.ppc.zoned2packed(<16 x i8> %a, i32 1) + ret <16 x i8> %0 +} diff --git a/llvm/test/CodeGen/PowerPC/loop-comment.ll b/llvm/test/CodeGen/PowerPC/loop-comment.ll index 34b29cbe901e9..530e67b4804fb 100644 --- a/llvm/test/CodeGen/PowerPC/loop-comment.ll +++ b/llvm/test/CodeGen/PowerPC/loop-comment.ll @@ -10,7 +10,6 @@ define void @test(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE-NEXT: .LBB0_1: # %cmpxchg.start ; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 ; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 8b931f70aa5cc..999ecba7f1b9c 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -61,7 +61,6 @@ ; CHECK-NEXT: m - 'M' (Integer Multiplication and Division). ; CHECK-NEXT: mips-p8700 - MIPS p8700 processor. ; CHECK-NEXT: no-default-unroll - Disable default unroll preference.. -; CHECK-NEXT: no-rvc-hints - Disable RVC Hint Instructions.. ; CHECK-NEXT: no-sink-splat-operands - Disable sink splat operands to enable .vx, .vf,.wx, and .wf instructions. ; CHECK-NEXT: no-trailing-seq-cst-fence - Disable trailing fence for seq-cst store.. ; CHECK-NEXT: optimized-nf2-segment-load-store - vlseg2eN.v and vsseg2eN.v are implemented as a wide memory op and shuffle. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index c2ae1ce491389..3e822d357b667 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -293,31 +293,6 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_fact ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3 } -; TODO: Remove once recursive deinterleaving support is removed -define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor4_recursive(ptr %p) { -; CHECK-LABEL: vector_deinterleave_load_factor4_recursive: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vlseg4e8.v v8, (a0) -; CHECK-NEXT: ret - %vec = load <32 x i8>, ptr %p - %d0 = call {<16 x i8>, <16 x i8>} @llvm.vector.deinterleave2.v32i8(<32 x i8> %vec) - %d0.0 = extractvalue { <16 x i8>, <16 x i8> } %d0, 0 - %d0.1 = extractvalue { <16 x i8>, <16 x i8> } %d0, 1 - %d1 = call {<8 x i8>, <8 x i8>} @llvm.vector.deinterleave2.v16i8(<16 x i8> %d0.0) - %t0 = extractvalue { <8 x i8>, <8 x i8> } %d1, 0 - %t2 = extractvalue { <8 x i8>, <8 x i8> } %d1, 1 - %d2 = call {<8 x i8>, <8 x i8>} @llvm.vector.deinterleave2.v16i8(<16 x i8> %d0.1) - %t1 = extractvalue { <8 x i8>, <8 x i8> } %d2, 0 - %t3 = extractvalue { <8 x i8>, <8 x i8> } %d2, 1 - - %res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } undef, <8 x i8> %t0, 0 - %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1 - %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2 - %res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3 - ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3 -} - define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor5(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor5: ; CHECK: # %bb.0: @@ -414,45 +389,3 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, < %res7 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res6, <8 x i8> %t6, 7 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res7 } - -; TODO: Remove once recursive deinterleaving support is removed -define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave_load_factor8_recursive(ptr %ptr) { -; CHECK-LABEL: vector_deinterleave_load_factor8_recursive: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v8, (a0) -; CHECK-NEXT: ret - %vec = load <16 x i32>, ptr %ptr - %d0 = call { <8 x i32>, <8 x i32> } @llvm.vector.deinterleave2.v16i32(<16 x i32> %vec) - %d0.0 = extractvalue { <8 x i32>, <8 x i32> } %d0, 0 - %d0.1 = extractvalue { <8 x i32>, <8 x i32> } %d0, 1 - %d1 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %d0.0) - %d1.0 = extractvalue { <4 x i32>, <4 x i32> } %d1, 0 - %d1.1 = extractvalue { <4 x i32>, <4 x i32> } %d1, 1 - %d2 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %d0.1) - %d2.0 = extractvalue { <4 x i32>, <4 x i32> } %d2, 0 - %d2.1 = extractvalue { <4 x i32>, <4 x i32> } %d2, 1 - - %d3 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d1.0) - %t0 = extractvalue { <2 x i32>, <2 x i32> } %d3, 0 - %t4 = extractvalue { <2 x i32>, <2 x i32> } %d3, 1 - %d4 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d1.1) - %t2 = extractvalue { <2 x i32>, <2 x i32> } %d4, 0 - %t6 = extractvalue { <2 x i32>, <2 x i32> } %d4, 1 - %d5 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d2.0) - %t1 = extractvalue { <2 x i32>, <2 x i32> } %d5, 0 - %t5 = extractvalue { <2 x i32>, <2 x i32> } %d5, 1 - %d6 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d2.1) - %t3 = extractvalue { <2 x i32>, <2 x i32> } %d6, 0 - %t7 = extractvalue { <2 x i32>, <2 x i32> } %d6, 1 - - %res0 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } undef, <2 x i32> %t0, 0 - %res1 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res0, <2 x i32> %t1, 1 - %res2 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res1, <2 x i32> %t2, 2 - %res3 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res2, <2 x i32> %t3, 3 - %res4 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res3, <2 x i32> %t4, 4 - %res5 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res4, <2 x i32> %t5, 5 - %res6 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res5, <2 x i32> %t6, 6 - %res7 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res6, <2 x i32> %t7, 7 - ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res7 -} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll index c394e7aa2e3e8..a49eeed3605c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll @@ -203,20 +203,6 @@ define void @vector_interleave_store_factor4(<4 x i32> %a, <4 x i32> %b, <4 x i3 ret void } -; TODO: Remove once recursive interleaving support is removed -define void @vector_interleave_store_factor4_recursive(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, ptr %p) { -; CHECK-LABEL: vector_interleave_store_factor4_recursive: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vsseg4e32.v v8, (a0) -; CHECK-NEXT: ret - %v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %c) - %v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %b, <4 x i32> %d) - %v2 = call <16 x i32> @llvm.vector.interleave2.v16i32(<8 x i32> %v0, <8 x i32> %v1) - store <16 x i32> %v2, ptr %p - ret void -} - define void @vector_interleave_store_factor5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, ptr %p) { ; CHECK-LABEL: vector_interleave_store_factor5: ; CHECK: # %bb.0: @@ -260,23 +246,3 @@ define void @vector_interleave_store_factor8(<4 x i32> %a, <4 x i32> %b, <4 x i3 store <32 x i32> %v, ptr %p ret void } - -; TODO: Remove once recursive interleaving support is removed -define void @vector_interleave_store_factor8_recursive(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h, ptr %p) { -; CHECK-LABEL: vector_interleave_store_factor8_recursive: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vsseg8e32.v v8, (a0) -; CHECK-NEXT: ret - %v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %e) - %v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %c, <4 x i32> %g) - %v2 = call <16 x i32> @llvm.vector.interleave2.v16i32(<8 x i32> %v0, <8 x i32> %v1) - - %v3 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %b, <4 x i32> %f) - %v4 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %d, <4 x i32> %h) - %v5 = call <16 x i32> @llvm.vector.interleave2.v16i32(<8 x i32> %v3, <8 x i32> %v4) - - %v6 = call <32 x i32> @llvm.vector.interleave2.v32i32(<16 x i32> %v2, <16 x i32> %v5) - store <32 x i32> %v6, ptr %p - ret void -} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 8ac4c7447c7d4..5e3ae2faf1a53 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -302,15 +302,11 @@ define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vpload_factor4_intrinsics(p ; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret %wide.masked.load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 8) - %d0 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %wide.masked.load) - %d0.0 = extractvalue { <4 x i32>, <4 x i32> } %d0, 0 - %d0.1 = extractvalue { <4 x i32>, <4 x i32> } %d0, 1 - %d1 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.0) - %t0 = extractvalue { <2 x i32>, <2 x i32> } %d1, 0 - %t2 = extractvalue { <2 x i32>, <2 x i32> } %d1, 1 - %d2 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.1) - %t1 = extractvalue { <2 x i32>, <2 x i32> } %d2, 0 - %t3 = extractvalue { <2 x i32>, <2 x i32> } %d2, 1 + %d = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.vector.deinterleave4.v8i32(<8 x i32> %wide.masked.load) + %t0 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 0 + %t1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 1 + %t2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 2 + %t3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %d, 3 %res0 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } poison, <2 x i32> %t0, 0 %res1 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res0, <2 x i32> %t1, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverser-float.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverse-float.ll similarity index 71% rename from llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverser-float.ll rename to llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverse-float.ll index 1d21cb5586984..ad84aaccc2171 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverser-float.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverse-float.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfh -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \ ; RUN: < %s | FileCheck %s define <2 x double> @test_vp_reverse_v2f64_masked(<2 x double> %src, <2 x i1> %mask, i32 zeroext %evl) { @@ -88,3 +90,32 @@ define <4 x half> @test_vp_reverse_v4f16(<4 x half> %src, i32 zeroext %evl) { %dst = call <4 x half> @llvm.experimental.vp.reverse.v4f16(<4 x half> %src, <4 x i1> splat (i1 1), i32 %evl) ret <4 x half> %dst } + +define <4 x bfloat> @test_vp_reverse_v4bf16_masked(<4 x bfloat> %src, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v4bf16_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %dst = call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> %src, <4 x i1> %mask, i32 %evl) + ret <4 x bfloat> %dst +} + +define <4 x bfloat> @test_vp_reverse_v4bf16(<4 x bfloat> %src, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_reverse_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1 +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + + %dst = call <4 x bfloat> @llvm.experimental.vp.reverse.v4bf16(<4 x bfloat> %src, <4 x i1> splat (i1 1), i32 %evl) + ret <4 x bfloat> %dst +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverser-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverse-int.ll similarity index 100% rename from llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverser-int.ll rename to llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-reverse-int.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll index 7bf22247093f7..8160e62a43106 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh -verify-machineinstrs -riscv-v-vector-bits-min=128 \ -; RUN: < %s | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define <2 x i64> @test_vp_splice_v2i64(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v2i64: @@ -299,3 +301,103 @@ define <8 x half> @test_vp_splice_v8f16_masked(<8 x half> %va, <8 x half> %vb, < %v = call <8 x half> @llvm.experimental.vp.splice.v8f16(<8 x half> %va, <8 x half> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb) ret <8 x half> %v } + +define <4 x i32> @test_vp_splice_v4i32_with_firstelt(i32 %first, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_v4i32_with_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %va = insertelement <4 x i32> poison, i32 %first, i32 0 + %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @test_vp_splice_v4i32_with_splat_firstelt(i32 %first, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_v4i32_with_splat_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ins = insertelement <4 x i32> poison, i32 %first, i32 0 + %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %splat, <4 x i32> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl) + ret <4 x i32> %v +} + +define <4 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first, <4 x float> %vb, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_nxv2f32_with_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %va = insertelement <4 x float> poison, float %first, i32 0 + %v = call <4 x float> @llvm.experimental.vp.splice.nxv2f32(<4 x float> %va, <4 x float> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl) + ret <4 x float> %v +} + +define <4 x half> @test_vp_splice_nxv2f16_with_firstelt(half %first, <4 x half> %vb, <4 x i1> %mask, i32 zeroext %evl) { +; ZVFH-LABEL: test_vp_splice_nxv2f16_with_firstelt: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0, v0.t +; ZVFH-NEXT: vmv1r.v v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: test_vp_splice_nxv2f16_with_firstelt: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.s.x v9, a1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; ZVFHMIN-NEXT: vslideup.vi v9, v8, 1, v0.t +; ZVFHMIN-NEXT: vmv1r.v v8, v9 +; ZVFHMIN-NEXT: ret + %va = insertelement <4 x half> poison, half %first, i32 0 + %v = call <4 x half> @llvm.experimental.vp.splice.nxv2f16(<4 x half> %va, <4 x half> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl) + ret <4 x half> %v +} + +define <8 x bfloat> @test_vp_splice_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + + %v = call <8 x bfloat> @llvm.experimental.vp.splice.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @test_vp_splice_v8bf16_negative_offset(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v8bf16_negative_offset: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetivli zero, 5, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 5 +; CHECK-NEXT: ret + + %v = call <8 x bfloat> @llvm.experimental.vp.splice.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @test_vp_splice_v8bf16_masked(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { +; CHECK-LABEL: test_vp_splice_v8bf16_masked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -5 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t +; CHECK-NEXT: ret + %v = call <8 x bfloat> @llvm.experimental.vp.splice.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb) + ret <8 x bfloat> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 9344c52098684..b11db3d61f693 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -380,31 +380,6 @@ define { , , , , , , } %res3 } -; TODO: Remove once recursive deinterleaving support is removed -define { , , , } @vector_deinterleave_load_factor4_recursive(ptr %p) { -; CHECK-LABEL: vector_deinterleave_load_factor4_recursive: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v8, (a0) -; CHECK-NEXT: ret - %vec = load , ptr %p - %d0 = call {, } @llvm.vector.deinterleave2.nxv32i8( %vec) - %d0.0 = extractvalue { , } %d0, 0 - %d0.1 = extractvalue { , } %d0, 1 - %d1 = call {, } @llvm.vector.deinterleave2.nxv16i8( %d0.0) - %t0 = extractvalue { , } %d1, 0 - %t2 = extractvalue { , } %d1, 1 - %d2 = call {, } @llvm.vector.deinterleave2.nxv16i8( %d0.1) - %t1 = extractvalue { , } %d2, 0 - %t3 = extractvalue { , } %d2, 1 - - %res0 = insertvalue { , , , } undef, %t0, 0 - %res1 = insertvalue { , , , } %res0, %t1, 1 - %res2 = insertvalue { , , , } %res1, %t2, 2 - %res3 = insertvalue { , , , } %res2, %t3, 3 - ret { , , , } %res3 -} - define { , , , , } @vector_deinterleave_load_factor5(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor5: ; CHECK: # %bb.0: @@ -500,45 +475,3 @@ define { , , , , , , , , , , } %res6, %t7, 7 ret { , , , , , , , } %res7 } - -; TODO: Remove once recursive deinterleaving support is removed -define {, , , , , , , } @vector_deinterleave_load_factor8_recursive(ptr %ptr) { -; CHECK-LABEL: vector_deinterleave_load_factor8_recursive: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v8, (a0) -; CHECK-NEXT: ret - %vec = load , ptr %ptr - %d0 = call { , } @llvm.vector.deinterleave2.nxv16i32( %vec) - %d0.0 = extractvalue { , } %d0, 0 - %d0.1 = extractvalue { , } %d0, 1 - %d1 = call { , } @llvm.vector.deinterleave2.nxv8i32( %d0.0) - %d1.0 = extractvalue { , } %d1, 0 - %d1.1 = extractvalue { , } %d1, 1 - %d2 = call { , } @llvm.vector.deinterleave2.nxv8i32( %d0.1) - %d2.0 = extractvalue { , } %d2, 0 - %d2.1 = extractvalue { , } %d2, 1 - - %d3 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d1.0) - %t0 = extractvalue { , } %d3, 0 - %t4 = extractvalue { , } %d3, 1 - %d4 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d1.1) - %t2 = extractvalue { , } %d4, 0 - %t6 = extractvalue { , } %d4, 1 - %d5 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d2.0) - %t1 = extractvalue { , } %d5, 0 - %t5 = extractvalue { , } %d5, 1 - %d6 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d2.1) - %t3 = extractvalue { , } %d6, 0 - %t7 = extractvalue { , } %d6, 1 - - %res0 = insertvalue { , , , , , , , } undef, %t0, 0 - %res1 = insertvalue { , , , , , , , } %res0, %t1, 1 - %res2 = insertvalue { , , , , , , , } %res1, %t2, 2 - %res3 = insertvalue { , , , , , , , } %res2, %t3, 3 - %res4 = insertvalue { , , , , , , , } %res3, %t4, 4 - %res5 = insertvalue { , , , , , , , } %res4, %t5, 5 - %res6 = insertvalue { , , , , , , , } %res5, %t6, 6 - %res7 = insertvalue { , , , , , , , } %res6, %t7, 7 - ret { , , , , , , , } %res7 -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index a5811e697634a..af55aaa8fce86 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -260,20 +260,6 @@ define void @vector_interleave_store_factor4( %a, %a, %b, %c, %d, ptr %p) { -; CHECK-LABEL: vector_interleave_store_factor4_recursive: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vsseg4e32.v v8, (a0) -; CHECK-NEXT: ret - %v0 = call @llvm.vector.interleave2.nxv8i32( %a, %c) - %v1 = call @llvm.vector.interleave2.nxv8i32( %b, %d) - %v2 = call @llvm.vector.interleave2.nxv16i32( %v0, %v1) - store %v2, ptr %p - ret void -} - define void @vector_interleave_store_factor5( %a, %b, %c, %d, %e, ptr %p) { ; CHECK-LABEL: vector_interleave_store_factor5: ; CHECK: # %bb.0: @@ -317,23 +303,3 @@ define void @vector_interleave_store_factor8( %a, %v, ptr %p ret void } - -; TODO: Remove once recursive interleaving support is removed -define void @vector_interleave_store_factor8_recursive( %a, %b, %c, %d, %e, %f, %g, %h, ptr %p) { -; CHECK-LABEL: vector_interleave_store_factor8_recursive: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vsseg8e32.v v8, (a0) -; CHECK-NEXT: ret - %v0 = call @llvm.vector.interleave2.nxv4i32( %a, %e) - %v1 = call @llvm.vector.interleave2.nxv4i32( %c, %g) - %v2 = call @llvm.vector.interleave2.nxv8i32( %v0, %v1) - - %v3 = call @llvm.vector.interleave2.nxv4i32( %b, %f) - %v4 = call @llvm.vector.interleave2.nxv4i32( %d, %h) - %v5 = call @llvm.vector.interleave2.nxv8i32( %v3, %v4) - - %v6 = call @llvm.vector.interleave2.nxv16i32( %v2, %v5) - store %v6, ptr %p - ret void -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll index 6008ea43e9158..9c8c5da75ff7c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define @test_vp_splice_nxv2i64( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv2i64: @@ -505,3 +505,73 @@ define @test_vp_splice_nxv2bf16_masked( @llvm.experimental.vp.splice.nxv2bf16( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v } + +define @test_vp_splice_nxv2i32_with_firstelt(i32 %first, %vb, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_nxv2i32_with_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %va = insertelement poison, i32 %first, i32 0 + %v = call @llvm.experimental.vp.splice.nxv2i32( %va, %vb, i32 0, %mask, i32 1, i32 %evl) + ret %v +} + +define @test_vp_splice_nxv2i32_with_splat_firstelt(i32 %first, %vb, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_nxv2i32_with_splat_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ins = insertelement poison, i32 %first, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %v = call @llvm.experimental.vp.splice.nxv2i32( %splat, %vb, i32 0, %mask, i32 1, i32 %evl) + ret %v +} + +define @test_vp_splice_nxv2f32_with_firstelt(float %first, %vb, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_nxv2f32_with_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %va = insertelement poison, float %first, i32 0 + %v = call @llvm.experimental.vp.splice.nxv2f32( %va, %vb, i32 0, %mask, i32 1, i32 %evl) + ret %v +} + +define @test_vp_splice_nxv2f16_with_firstelt(half %first, %vb, %mask, i32 zeroext %evl) { +; ZVFH-LABEL: test_vp_splice_nxv2f16_with_firstelt: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0, v0.t +; ZVFH-NEXT: vmv1r.v v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: test_vp_splice_nxv2f16_with_firstelt: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vslide1up.vx v9, v8, a1, v0.t +; ZVFHMIN-NEXT: vmv1r.v v8, v9 +; ZVFHMIN-NEXT: ret + %va = insertelement poison, half %first, i32 0 + %v = call @llvm.experimental.vp.splice.nxv2f16( %va, %vb, i32 0, %mask, i32 1, i32 %evl) + ret %v +} + +define @test_vp_splice_nxv2bf16_with_firstelt(bfloat %first, %vb, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_nxv2bf16_with_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a1, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %va = insertelement poison, bfloat %first, i32 0 + %v = call @llvm.experimental.vp.splice.nxv2bf16( %va, %vb, i32 0, %mask, i32 1, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index 1868154052272..35f01f608b56e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -84,15 +84,11 @@ define {, , , @llvm.vp.load.nxv8i32.p0(ptr %ptr, splat (i1 true), i32 %rvl) - %d0 = call { , } @llvm.vector.deinterleave2.nxv8i32( %wide.masked.load) - %d0.0 = extractvalue { , } %d0, 0 - %d0.1 = extractvalue { , } %d0, 1 - %d1 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d0.0) - %t0 = extractvalue { , } %d1, 0 - %t2 = extractvalue { , } %d1, 1 - %d2 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d0.1) - %t1 = extractvalue { , } %d2, 0 - %t3 = extractvalue { , } %d2, 1 + %d = call { , , , } @llvm.vector.deinterleave4.nxv8i32( %wide.masked.load) + %t0 = extractvalue { , , , } %d, 0 + %t1 = extractvalue { , , , } %d, 1 + %t2 = extractvalue { , , , } %d, 2 + %t3 = extractvalue { , , , } %d, 3 %res0 = insertvalue { , , , } poison, %t0, 0 %res1 = insertvalue { , , , } %res0, %t1, 1 @@ -214,28 +210,15 @@ define {, , , @llvm.vp.load.nxv16i32.p0(ptr %ptr, splat (i1 true), i32 %rvl) - %d0 = call { , } @llvm.vector.deinterleave2.nxv16i32( %wide.masked.load) - %d0.0 = extractvalue { , } %d0, 0 - %d0.1 = extractvalue { , } %d0, 1 - %d1 = call { , } @llvm.vector.deinterleave2.nxv8i32( %d0.0) - %d1.0 = extractvalue { , } %d1, 0 - %d1.1 = extractvalue { , } %d1, 1 - %d2 = call { , } @llvm.vector.deinterleave2.nxv8i32( %d0.1) - %d2.0 = extractvalue { , } %d2, 0 - %d2.1 = extractvalue { , } %d2, 1 - - %d3 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d1.0) - %t0 = extractvalue { , } %d3, 0 - %t4 = extractvalue { , } %d3, 1 - %d4 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d1.1) - %t2 = extractvalue { , } %d4, 0 - %t6 = extractvalue { , } %d4, 1 - %d5 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d2.0) - %t1 = extractvalue { , } %d5, 0 - %t5 = extractvalue { , } %d5, 1 - %d6 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d2.1) - %t3 = extractvalue { , } %d6, 0 - %t7 = extractvalue { , } %d6, 1 + %d = call { , , , , , , , } @llvm.vector.deinterleave8.nxv16i32( %wide.masked.load) + %t0 = extractvalue { , , , , , , , } %d, 0 + %t1 = extractvalue { , , , , , , , } %d, 1 + %t2 = extractvalue { , , , , , , , } %d, 2 + %t3 = extractvalue { , , , , , , , } %d, 3 + %t4 = extractvalue { , , , , , , , } %d, 4 + %t5 = extractvalue { , , , , , , , } %d, 5 + %t6 = extractvalue { , , , , , , , } %d, 6 + %t7 = extractvalue { , , , , , , , } %d, 7 %res0 = insertvalue { , , , , , , , } poison, %t0, 0 %res1 = insertvalue { , , , , , , , } %res0, %t1, 1 @@ -323,10 +306,8 @@ define void @store_factor4_v2( %v0, %v1, pt ; RV64-NEXT: vsseg4e32.v v8, (a0) ; RV64-NEXT: ret %rvl = mul i32 %evl, 8 - %interleaved.vec0 = call @llvm.vector.interleave2.nxv2i32( %v0, %v0) - %interleaved.vec1 = call @llvm.vector.interleave2.nxv2i32( %v1, %v1) - %interleaved.vec2 = call @llvm.vector.interleave2.nxv4i32( %interleaved.vec0, %interleaved.vec1) - call void @llvm.vp.store.nxv4i32.p0( %interleaved.vec2, ptr %ptr, splat (i1 true), i32 %rvl) + %interleaved.vec = call @llvm.vector.interleave4.nxv4i32( %v0, %v1, %v0, %v1) + call void @llvm.vp.store.nxv4i32.p0( %interleaved.vec, ptr %ptr, splat (i1 true), i32 %rvl) ret void } @@ -430,14 +411,8 @@ define void @store_factor8_v2( %v0, %v1, pt ; RV64-NEXT: vsseg8e32.v v8, (a0) ; RV64-NEXT: ret %rvl = mul i32 %evl, 8 - %interleaved.vec0 = call @llvm.vector.interleave2.nxv2i32( %v0, %v0) - %interleaved.vec1 = call @llvm.vector.interleave2.nxv2i32( %v0, %v0) - %interleaved.vec2 = call @llvm.vector.interleave2.nxv4i32( %interleaved.vec0, %interleaved.vec1) - %interleaved.vec3 = call @llvm.vector.interleave2.nxv2i32( %v1, %v1) - %interleaved.vec4 = call @llvm.vector.interleave2.nxv2i32( %v1, %v1) - %interleaved.vec5 = call @llvm.vector.interleave2.nxv4i32( %interleaved.vec3, %interleaved.vec4) - %interleaved.vec6 = call @llvm.vector.interleave2.nxv8i32( %interleaved.vec2, %interleaved.vec5) - call void @llvm.vp.store.nxv8i32.p0( %interleaved.vec6, ptr %ptr, splat (i1 true), i32 %rvl) + %interleaved.vec = call @llvm.vector.interleave8.nxv8i32( %v0, %v1, %v0, %v1, %v0, %v1, %v0, %v1) + call void @llvm.vp.store.nxv8i32.p0( %interleaved.vec, ptr %ptr, splat (i1 true), i32 %rvl) ret void } @@ -485,19 +460,13 @@ define {, , , @llvm.vector.interleave2.nxv4i1( %mask, %mask) - %interleaved.mask1 = call @llvm.vector.interleave2.nxv4i1( %mask, %mask) - %interleaved.mask2 = call @llvm.vector.interleave2.nxv8i1( %interleaved.mask0, %interleaved.mask1) - %wide.masked.load = call @llvm.vp.load.nxv8i32.p0(ptr %ptr, %interleaved.mask2, i32 %rvl) - %d0 = call { , } @llvm.vector.deinterleave2.nxv8i32( %wide.masked.load) - %d0.0 = extractvalue { , } %d0, 0 - %d0.1 = extractvalue { , } %d0, 1 - %d1 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d0.0) - %t0 = extractvalue { , } %d1, 0 - %t2 = extractvalue { , } %d1, 1 - %d2 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d0.1) - %t1 = extractvalue { , } %d2, 0 - %t3 = extractvalue { , } %d2, 1 + %interleaved.mask = call @llvm.vector.interleave4.nxv8i1( %mask, %mask, %mask, %mask) + %wide.masked.load = call @llvm.vp.load.nxv8i32.p0(ptr %ptr, %interleaved.mask, i32 %rvl) + %d = call { , , , } @llvm.vector.deinterleave4.nxv8i32( %wide.masked.load) + %t0 = extractvalue { , , , } %d, 0 + %t1 = extractvalue { , , , } %d, 1 + %t2 = extractvalue { , , , } %d, 2 + %t3 = extractvalue { , , , } %d, 3 %res0 = insertvalue { , , , } poison, %t0, 0 %res1 = insertvalue { , , , } %res0, %t1, 1 @@ -674,175 +643,14 @@ define void @masked_store_factor4_v2( %mask, ; RV64-NEXT: vsseg4e32.v v8, (a0), v0.t ; RV64-NEXT: ret %rvl = mul i32 %evl, 4 - %interleaved.mask0 = call @llvm.vector.interleave2.nxv2i1( %mask, %mask) - %interleaved.mask1 = call @llvm.vector.interleave2.nxv2i1( %mask, %mask) - %interleaved.mask2 = call @llvm.vector.interleave2.nxv4i1( %interleaved.mask0, %interleaved.mask1) - %interleaved.vec0 = call @llvm.vector.interleave2.nxv2i32( %v0, %v0) - %interleaved.vec1 = call @llvm.vector.interleave2.nxv2i32( %v1, %v1) - %interleaved.vec2 = call @llvm.vector.interleave2.nxv4i32( %interleaved.vec0, %interleaved.vec1) - call void @llvm.vp.store.nxv4i32.p0( %interleaved.vec2, ptr %ptr, %interleaved.mask2, i32 %rvl) + %interleaved.mask = call @llvm.vector.interleave4.nxv4i1( %mask, %mask, %mask, %mask) + %interleaved.vec = call @llvm.vector.interleave4.nxv2i32( %v0, %v1, %v0, %v1) + call void @llvm.vp.store.nxv4i32.p0( %interleaved.vec, ptr %ptr, %interleaved.mask, i32 %rvl) ret void } ; Negative tests -; We should not transform this function because the deinterleave tree is not in a desired form. -define {, , , } @incorrect_extract_value_index(ptr %ptr, i32 %evl) { -; RV32-LABEL: incorrect_extract_value_index: -; RV32: # %bb.0: -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vnsrl.wi v12, v8, 0 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vnsrl.wx v9, v12, a0 -; RV32-NEXT: vnsrl.wi v8, v12, 0 -; RV32-NEXT: vmv.v.v v10, v9 -; RV32-NEXT: vmv.v.v v11, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: incorrect_extract_value_index: -; RV64: # %bb.0: -; RV64-NEXT: slli a1, a1, 34 -; RV64-NEXT: srli a1, a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vnsrl.wi v12, v8, 0 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vnsrl.wx v9, v12, a0 -; RV64-NEXT: vnsrl.wi v8, v12, 0 -; RV64-NEXT: vmv.v.v v10, v9 -; RV64-NEXT: vmv.v.v v11, v9 -; RV64-NEXT: ret - %rvl = mul i32 %evl, 4 - %wide.masked.load = call @llvm.vp.load.nxv8i32.p0(ptr %ptr, splat (i1 true), i32 %rvl) - %d0 = call { , } @llvm.vector.deinterleave2.nxv8i32( %wide.masked.load) - %d0.0 = extractvalue { , } %d0, 0 - %d0.1 = extractvalue { , } %d0, 0 - %d1 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d0.0) - %t0 = extractvalue { , } %d1, 0 - %t2 = extractvalue { , } %d1, 1 - %d2 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d0.1) - %t1 = extractvalue { , } %d2, 1 - %t3 = extractvalue { , } %d2, 1 - - %res0 = insertvalue { , , , } poison, %t0, 0 - %res1 = insertvalue { , , , } %res0, %t1, 1 - %res2 = insertvalue { , , , } %res1, %t2, 2 - %res3 = insertvalue { , , , } %res2, %t3, 3 - ret { , , , } %res3 -} - -; We should not transform this function because the expression is not a balanced tree. -define {, , , } @not_balanced_load_tree(ptr %ptr, i32 %evl) { -; RV32-LABEL: not_balanced_load_tree: -; RV32: # %bb.0: -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vle32.v v12, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vnsrl.wx v8, v12, a0 -; RV32-NEXT: vnsrl.wi v16, v12, 0 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vnsrl.wi v10, v16, 0 -; RV32-NEXT: vnsrl.wx v11, v16, a0 -; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32-NEXT: vnsrl.wx v12, v11, a0 -; RV32-NEXT: vnsrl.wi v11, v11, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: not_balanced_load_tree: -; RV64: # %bb.0: -; RV64-NEXT: slli a1, a1, 34 -; RV64-NEXT: srli a1, a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vle32.v v12, (a0) -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vnsrl.wx v8, v12, a0 -; RV64-NEXT: vnsrl.wi v16, v12, 0 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vnsrl.wi v10, v16, 0 -; RV64-NEXT: vnsrl.wx v11, v16, a0 -; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64-NEXT: vnsrl.wx v12, v11, a0 -; RV64-NEXT: vnsrl.wi v11, v11, 0 -; RV64-NEXT: ret - %rvl = mul i32 %evl, 4 - %wide.masked.load = call @llvm.vp.load.nxv8i32.p0(ptr %ptr, splat (i1 true), i32 %rvl) - %d0 = call { , } @llvm.vector.deinterleave2.nxv8i32( %wide.masked.load) - %d0.0 = extractvalue { , } %d0, 0 - %t0 = extractvalue { , } %d0, 1 - %d1 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d0.0) - %t1 = extractvalue { , } %d1, 0 - %d1.1 = extractvalue { , } %d1, 1 - %d2 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d1.1) - %t2 = extractvalue { , } %d2, 0 - %t3 = extractvalue { , } %d2, 1 - - %res0 = insertvalue { , , , } poison, %t0, 0 - %res1 = insertvalue { , , , } %res0, %t1, 1 - %res2 = insertvalue { , , , } %res1, %t2, 2 - %res3 = insertvalue { , , , } %res2, %t3, 3 - ret { , , , } %res3 -} - -define void @not_balanced_store_tree( %v0, %v1, %v2, ptr %ptr, i32 %evl) { -; RV32-LABEL: not_balanced_store_tree: -; RV32: # %bb.0: -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: vsetvli a2, zero, e32, mf2, ta, ma -; RV32-NEXT: vwaddu.vv v12, v8, v8 -; RV32-NEXT: li a2, -1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: vwmaccu.vx v12, a2, v8 -; RV32-NEXT: srli a3, a3, 3 -; RV32-NEXT: vsetvli a4, zero, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vx v8, v12, a3 -; RV32-NEXT: vslideup.vx v12, v8, a3 -; RV32-NEXT: vwaddu.vv v16, v12, v9 -; RV32-NEXT: vwmaccu.vx v16, a2, v9 -; RV32-NEXT: vsetvli a3, zero, e32, m2, ta, ma -; RV32-NEXT: vwaddu.vv v12, v16, v10 -; RV32-NEXT: vwmaccu.vx v12, a2, v10 -; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV32-NEXT: vse32.v v12, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: not_balanced_store_tree: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, mf2, ta, ma -; RV64-NEXT: vwaddu.vv v12, v8, v8 -; RV64-NEXT: li a2, -1 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a1, a1, 34 -; RV64-NEXT: vwmaccu.vx v12, a2, v8 -; RV64-NEXT: srli a3, a3, 3 -; RV64-NEXT: vsetvli a4, zero, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vx v8, v12, a3 -; RV64-NEXT: vslideup.vx v12, v8, a3 -; RV64-NEXT: vwaddu.vv v16, v12, v9 -; RV64-NEXT: vwmaccu.vx v16, a2, v9 -; RV64-NEXT: vsetvli a3, zero, e32, m2, ta, ma -; RV64-NEXT: vwaddu.vv v12, v16, v10 -; RV64-NEXT: vwmaccu.vx v12, a2, v10 -; RV64-NEXT: srli a1, a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; RV64-NEXT: vse32.v v12, (a0) -; RV64-NEXT: ret - %rvl = mul i32 %evl, 4 - %interleaved.vec0 = call @llvm.vector.interleave2.nxv2i32( %v0, %v0) - %interleaved.vec1 = call @llvm.vector.interleave2.nxv2i32( %interleaved.vec0, %v1) - %interleaved.vec2 = call @llvm.vector.interleave2.nxv4i32( %interleaved.vec1, %v2) - call void @llvm.vp.store.nxv8i32.p0( %interleaved.vec2, ptr %ptr, splat (i1 true), i32 %rvl) - ret void -} - define {, } @not_same_mask( %mask0, %mask1, ptr %ptr, i32 %evl) { ; RV32-LABEL: not_same_mask: ; RV32: # %bb.0: @@ -931,48 +739,58 @@ define {, } @not_same_mask( define {, , , } @invalid_evl(ptr %ptr, i32 %evl) { ; RV32-LABEL: invalid_evl: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb ; RV32-NEXT: ori a1, a1, 1 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vnsrl.wx v12, v8, a0 -; RV32-NEXT: vnsrl.wi v14, v8, 0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs4r.v v8, (a0) ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vnsrl.wx v10, v14, a0 -; RV32-NEXT: vnsrl.wi v8, v14, 0 -; RV32-NEXT: vnsrl.wx v11, v12, a0 -; RV32-NEXT: vnsrl.wi v9, v12, 0 +; RV32-NEXT: vlseg4e32.v v8, (a0) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; ; RV64-LABEL: invalid_evl: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb ; RV64-NEXT: ori a1, a1, 1 ; RV64-NEXT: slli a1, a1, 32 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vnsrl.wx v12, v8, a0 -; RV64-NEXT: vnsrl.wi v14, v8, 0 +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs4r.v v8, (a0) ; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vnsrl.wx v10, v14, a0 -; RV64-NEXT: vnsrl.wi v8, v14, 0 -; RV64-NEXT: vnsrl.wx v11, v12, a0 -; RV64-NEXT: vnsrl.wi v9, v12, 0 +; RV64-NEXT: vlseg4e32.v v8, (a0) +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 16 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %rvl = or i32 %evl, 1 %wide.masked.load = call @llvm.vp.load.nxv8i32.p0(ptr %ptr, splat (i1 true), i32 %rvl) - %d0 = call { , } @llvm.vector.deinterleave2.nxv8i32( %wide.masked.load) - %d0.0 = extractvalue { , } %d0, 0 - %d0.1 = extractvalue { , } %d0, 1 - %d1 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d0.0) - %t0 = extractvalue { , } %d1, 0 - %t2 = extractvalue { , } %d1, 1 - %d2 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d0.1) - %t1 = extractvalue { , } %d2, 0 - %t3 = extractvalue { , } %d2, 1 + %d = call { , , , } @llvm.vector.deinterleave4.nxv8i32( %wide.masked.load) + %t0 = extractvalue { , , , } %d, 0 + %t1 = extractvalue { , , , } %d, 1 + %t2 = extractvalue { , , , } %d, 2 + %t3 = extractvalue { , , , } %d, 3 %res0 = insertvalue { , , , } poison, %t0, 0 %res1 = insertvalue { , , , } %res0, %t1, 1 diff --git a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll index e8d4b2828c84b..2125a0b8912b1 100644 --- a/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll +++ b/llvm/test/CodeGen/SystemZ/vec-max-min-zerosplat.ll @@ -1,12 +1,15 @@ -; Test vector maximum/minimum with a zero splat on z14. -; +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; Test vector maximum/minimum with a zero splat on z14. + define <2 x double> @f1(<2 x double> %val) { ; CHECK-LABEL: f1: -; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfmaxdb %v24, %v24, %v0, 4 -; CHECK-NEXT: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 0 +; CHECK-NEXT: vfmaxdb %v24, %v24, %v0, 4 +; CHECK-NEXT: br %r14 %cmp = fcmp ogt <2 x double> %val, zeroinitializer %ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer ret <2 x double> %ret @@ -14,9 +17,10 @@ define <2 x double> @f1(<2 x double> %val) { define <2 x double> @f2(<2 x double> %val) { ; CHECK-LABEL: f2: -; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfmindb %v24, %v24, %v0, 4 -; CHECK-NEXT: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 0 +; CHECK-NEXT: vfmindb %v24, %v24, %v0, 4 +; CHECK-NEXT: br %r14 %cmp = fcmp olt <2 x double> %val, zeroinitializer %ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer ret <2 x double> %ret @@ -24,9 +28,10 @@ define <2 x double> @f2(<2 x double> %val) { define <4 x float> @f3(<4 x float> %val) { ; CHECK-LABEL: f3: -; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfmaxsb %v24, %v24, %v0, 4 -; CHECK-NEXT: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 0 +; CHECK-NEXT: vfmaxsb %v24, %v24, %v0, 4 +; CHECK-NEXT: br %r14 %cmp = fcmp ogt <4 x float> %val, zeroinitializer %ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer ret <4 x float> %ret @@ -34,9 +39,10 @@ define <4 x float> @f3(<4 x float> %val) { define <4 x float> @f4(<4 x float> %val) { ; CHECK-LABEL: f4: -; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfminsb %v24, %v24, %v0, 4 -; CHECK-NEXT: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 0 +; CHECK-NEXT: vfminsb %v24, %v24, %v0, 4 +; CHECK-NEXT: br %r14 %cmp = fcmp olt <4 x float> %val, zeroinitializer %ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer ret <4 x float> %ret @@ -44,10 +50,11 @@ define <4 x float> @f4(<4 x float> %val) { define <2 x double> @f5(<2 x double> %val) { ; CHECK-LABEL: f5: -; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfchedb %v1, %v0, %v24 -; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 -; CHECK-NEXT: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 0 +; CHECK-NEXT: vfchedb %v1, %v0, %v24 +; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 +; CHECK-NEXT: br %r14 %cmp = fcmp ugt <2 x double> %val, zeroinitializer %ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer ret <2 x double> %ret @@ -55,10 +62,11 @@ define <2 x double> @f5(<2 x double> %val) { define <2 x double> @f6(<2 x double> %val) { ; CHECK-LABEL: f6: -; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfchedb %v1, %v24, %v0 -; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 -; CHECK-NEXT: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 0 +; CHECK-NEXT: vfchedb %v1, %v24, %v0 +; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 +; CHECK-NEXT: br %r14 %cmp = fcmp ult <2 x double> %val, zeroinitializer %ret = select <2 x i1> %cmp, <2 x double> %val, <2 x double> zeroinitializer ret <2 x double> %ret @@ -66,10 +74,11 @@ define <2 x double> @f6(<2 x double> %val) { define <4 x float> @f7(<4 x float> %val) { ; CHECK-LABEL: f7: -; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfchesb %v1, %v0, %v24 -; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 -; CHECK-NEXT: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 0 +; CHECK-NEXT: vfchesb %v1, %v0, %v24 +; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 +; CHECK-NEXT: br %r14 %cmp = fcmp ugt <4 x float> %val, zeroinitializer %ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer ret <4 x float> %ret @@ -77,10 +86,11 @@ define <4 x float> @f7(<4 x float> %val) { define <4 x float> @f8(<4 x float> %val) { ; CHECK-LABEL: f8: -; CHECK: vgbm %v0, 0 -; CHECK-NEXT: vfchesb %v1, %v24, %v0 -; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 -; CHECK-NEXT: br %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: vgbm %v0, 0 +; CHECK-NEXT: vfchesb %v1, %v24, %v0 +; CHECK-NEXT: vsel %v24, %v0, %v24, %v1 +; CHECK-NEXT: br %r14 %cmp = fcmp ult <4 x float> %val, zeroinitializer %ret = select <4 x i1> %cmp, <4 x float> %val, <4 x float> zeroinitializer ret <4 x float> %ret diff --git a/llvm/test/CodeGen/X86/GlobalISel/llvm.sincos.mir b/llvm/test/CodeGen/X86/GlobalISel/llvm.sincos.mir new file mode 100644 index 0000000000000..f8ce9ea8be650 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/llvm.sincos.mir @@ -0,0 +1,189 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=i686-linux-gnu -run-pass=regbankselect,instruction-select %s -o - | FileCheck %s --check-prefixes GISEL-I686 + +--- +name: test_sincos_f32 +alignment: 16 +legalized: true +fixedStack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +stack: + - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.1: + ; GISEL-I686-LABEL: name: test_sincos_f32 + ; GISEL-I686: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0, align 16) + ; GISEL-I686-NEXT: [[LEA32r:%[0-9]+]]:gr32 = LEA32r %stack.0, 1, $noreg, 0, $noreg + ; GISEL-I686-NEXT: [[LEA32r1:%[0-9]+]]:gr32 = LEA32r %stack.1, 1, $noreg, 0, $noreg + ; GISEL-I686-NEXT: ADJCALLSTACKDOWN32 12, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; GISEL-I686-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esp + ; GISEL-I686-NEXT: MOV32mr [[COPY]], 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store (s32) into stack, align 1) + ; GISEL-I686-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esp + ; GISEL-I686-NEXT: MOV32mr [[COPY1]], 1, $noreg, 4, $noreg, [[LEA32r]] :: (store (s32) into stack + 4, align 1) + ; GISEL-I686-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $esp + ; GISEL-I686-NEXT: MOV32mr [[COPY2]], 1, $noreg, 8, $noreg, [[LEA32r1]] :: (store (s32) into stack + 8, align 1) + ; GISEL-I686-NEXT: CALLpcrel32 &sincosf, csr_32, implicit $esp, implicit $ssp + ; GISEL-I686-NEXT: ADJCALLSTACKUP32 12, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; GISEL-I686-NEXT: [[LD_Fp32m:%[0-9]+]]:rfp32 = nofpexcept LD_Fp32m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %stack.0) + ; GISEL-I686-NEXT: [[LD_Fp32m1:%[0-9]+]]:rfp32 = nofpexcept LD_Fp32m %stack.1, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %stack.1) + ; GISEL-I686-NEXT: $fp0 = COPY [[LD_Fp32m]] + ; GISEL-I686-NEXT: $fp1 = COPY [[LD_Fp32m1]] + ; GISEL-I686-NEXT: RET 0, implicit $fp0, implicit $fp1 + %1:_(p0) = G_FRAME_INDEX %fixed-stack.0 + %0:_(s32) = G_LOAD %1(p0) :: (invariant load (s32) from %fixed-stack.0, align 16) + %4:_(p0) = G_FRAME_INDEX %stack.0 + %5:_(p0) = G_FRAME_INDEX %stack.1 + ADJCALLSTACKDOWN32 12, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + %6:_(p0) = COPY $esp + %7:_(s32) = G_CONSTANT i32 0 + %8:_(p0) = G_PTR_ADD %6, %7(s32) + G_STORE %0(s32), %8(p0) :: (store (s32) into stack, align 1) + %9:_(p0) = COPY $esp + %10:_(s32) = G_CONSTANT i32 4 + %11:_(p0) = G_PTR_ADD %9, %10(s32) + G_STORE %4(p0), %11(p0) :: (store (s32) into stack + 4, align 1) + %12:_(p0) = COPY $esp + %13:_(s32) = G_CONSTANT i32 8 + %14:_(p0) = G_PTR_ADD %12, %13(s32) + G_STORE %5(p0), %14(p0) :: (store (s32) into stack + 8, align 1) + CALLpcrel32 &sincosf, csr_32, implicit $esp, implicit $ssp + ADJCALLSTACKUP32 12, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + %2:_(s32) = G_LOAD %4(p0) :: (load (s32) from %stack.0) + %3:_(s32) = G_LOAD %5(p0) :: (load (s32) from %stack.1) + $fp0 = COPY %2(s32) + $fp1 = COPY %3(s32) + RET 0, implicit $fp0, implicit $fp1 +... +--- +name: test_sincos_f64 +alignment: 16 +legalized: true +fixedStack: + - { id: 0, type: default, offset: 0, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +stack: + - { id: 0, name: '', type: default, offset: 0, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: default, offset: 0, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.1: + ; GISEL-I686-LABEL: name: test_sincos_f64 + ; GISEL-I686: [[LEA32r:%[0-9]+]]:gr32 = LEA32r %fixed-stack.0, 1, $noreg, 0, $noreg + ; GISEL-I686-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0, align 16) + ; GISEL-I686-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[LEA32r]], 1, $noreg, 4, $noreg :: (invariant load (s32) from %fixed-stack.0 + 4, basealign 16) + ; GISEL-I686-NEXT: [[LEA32r1:%[0-9]+]]:gr32 = LEA32r %stack.0, 1, $noreg, 0, $noreg + ; GISEL-I686-NEXT: [[LEA32r2:%[0-9]+]]:gr32 = LEA32r %stack.1, 1, $noreg, 0, $noreg + ; GISEL-I686-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; GISEL-I686-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esp + ; GISEL-I686-NEXT: [[MOV32r0_:%[0-9]+]]:gr32_nosp = MOV32r0 implicit-def dead $eflags + ; GISEL-I686-NEXT: [[LEA32r3:%[0-9]+]]:gr32 = LEA32r [[COPY]], 1, [[MOV32r0_]], 0, $noreg + ; GISEL-I686-NEXT: MOV32mr [[COPY]], 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store (s32) into stack, align 1) + ; GISEL-I686-NEXT: MOV32mr [[LEA32r3]], 1, $noreg, 4, $noreg, [[MOV32rm1]] :: (store (s32) into stack + 4, align 1) + ; GISEL-I686-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esp + ; GISEL-I686-NEXT: MOV32mr [[COPY1]], 1, $noreg, 8, $noreg, [[LEA32r1]] :: (store (s32) into stack + 8, align 1) + ; GISEL-I686-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $esp + ; GISEL-I686-NEXT: MOV32mr [[COPY2]], 1, $noreg, 12, $noreg, [[LEA32r2]] :: (store (s32) into stack + 12, align 1) + ; GISEL-I686-NEXT: CALLpcrel32 &sincos, csr_32, implicit $esp, implicit $ssp + ; GISEL-I686-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; GISEL-I686-NEXT: $fp0 = IMPLICIT_DEF + ; GISEL-I686-NEXT: $fp1 = IMPLICIT_DEF + ; GISEL-I686-NEXT: RET 0, implicit $fp0, implicit $fp1 + %1:_(p0) = G_FRAME_INDEX %fixed-stack.0 + %25:_(s32) = G_LOAD %1(p0) :: (invariant load (s32) from %fixed-stack.0, align 16) + %17:_(s32) = G_CONSTANT i32 4 + %26:_(p0) = G_PTR_ADD %1, %17(s32) + %27:_(s32) = G_LOAD %26(p0) :: (invariant load (s32) from %fixed-stack.0 + 4, basealign 16) + %4:_(p0) = G_FRAME_INDEX %stack.0 + %5:_(p0) = G_FRAME_INDEX %stack.1 + ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + %6:_(p0) = COPY $esp + %7:_(s32) = G_CONSTANT i32 0 + %8:_(p0) = G_PTR_ADD %6, %7(s32) + G_STORE %25(s32), %8(p0) :: (store (s32) into stack, align 1) + %24:_(p0) = G_PTR_ADD %8, %17(s32) + G_STORE %27(s32), %24(p0) :: (store (s32) into stack + 4, align 1) + %9:_(p0) = COPY $esp + %10:_(s32) = G_CONSTANT i32 8 + %11:_(p0) = G_PTR_ADD %9, %10(s32) + G_STORE %4(p0), %11(p0) :: (store (s32) into stack + 8, align 1) + %12:_(p0) = COPY $esp + %13:_(s32) = G_CONSTANT i32 12 + %14:_(p0) = G_PTR_ADD %12, %13(s32) + G_STORE %5(p0), %14(p0) :: (store (s32) into stack + 12, align 1) + CALLpcrel32 &sincos, csr_32, implicit $esp, implicit $ssp + ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + $fp0 = IMPLICIT_DEF + $fp1 = IMPLICIT_DEF + RET 0, implicit $fp0, implicit $fp1 +... +--- +name: test_sincos_f80 +alignment: 16 +legalized: true +fixedStack: + - { id: 0, type: default, offset: 0, size: 10, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +stack: + - { id: 0, name: '', type: default, offset: 0, size: 10, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: default, offset: 0, size: 10, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.1: + ; GISEL-I686-LABEL: name: test_sincos_f80 + ; GISEL-I686: [[LD_Fp80m:%[0-9]+]]:rfp80 = nofpexcept LD_Fp80m %fixed-stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (invariant load (s80) from %fixed-stack.0, align 16) + ; GISEL-I686-NEXT: [[LEA32r:%[0-9]+]]:gr32 = LEA32r %stack.0, 1, $noreg, 0, $noreg + ; GISEL-I686-NEXT: [[LEA32r1:%[0-9]+]]:gr32 = LEA32r %stack.1, 1, $noreg, 0, $noreg + ; GISEL-I686-NEXT: ADJCALLSTACKDOWN32 20, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; GISEL-I686-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esp + ; GISEL-I686-NEXT: nofpexcept ST_FpP80m [[COPY]], 1, $noreg, 0, $noreg, [[LD_Fp80m]], implicit-def dead $fpsw, implicit $fpcw :: (store (s80) into stack, align 1) + ; GISEL-I686-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esp + ; GISEL-I686-NEXT: MOV32mr [[COPY1]], 1, $noreg, 12, $noreg, [[LEA32r]] :: (store (s32) into stack + 12, align 1) + ; GISEL-I686-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $esp + ; GISEL-I686-NEXT: MOV32mr [[COPY2]], 1, $noreg, 16, $noreg, [[LEA32r1]] :: (store (s32) into stack + 16, align 1) + ; GISEL-I686-NEXT: CALLpcrel32 &sincosl, csr_32, implicit $esp, implicit $ssp + ; GISEL-I686-NEXT: ADJCALLSTACKUP32 20, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; GISEL-I686-NEXT: [[LD_Fp80m1:%[0-9]+]]:rfp80 = nofpexcept LD_Fp80m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s80) from %stack.0, align 16) + ; GISEL-I686-NEXT: [[LD_Fp80m2:%[0-9]+]]:rfp80 = nofpexcept LD_Fp80m %stack.1, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s80) from %stack.1, align 16) + ; GISEL-I686-NEXT: $fp0 = COPY [[LD_Fp80m1]] + ; GISEL-I686-NEXT: $fp1 = COPY [[LD_Fp80m2]] + ; GISEL-I686-NEXT: RET 0, implicit $fp0, implicit $fp1 + %1:_(p0) = G_FRAME_INDEX %fixed-stack.0 + %0:_(s80) = G_LOAD %1(p0) :: (invariant load (s80) from %fixed-stack.0, align 16) + %4:_(p0) = G_FRAME_INDEX %stack.0 + %5:_(p0) = G_FRAME_INDEX %stack.1 + ADJCALLSTACKDOWN32 20, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + %6:_(p0) = COPY $esp + %7:_(s32) = G_CONSTANT i32 0 + %8:_(p0) = G_PTR_ADD %6, %7(s32) + G_STORE %0(s80), %8(p0) :: (store (s80) into stack, align 1) + %9:_(p0) = COPY $esp + %10:_(s32) = G_CONSTANT i32 12 + %11:_(p0) = G_PTR_ADD %9, %10(s32) + G_STORE %4(p0), %11(p0) :: (store (s32) into stack + 12, align 1) + %12:_(p0) = COPY $esp + %13:_(s32) = G_CONSTANT i32 16 + %14:_(p0) = G_PTR_ADD %12, %13(s32) + G_STORE %5(p0), %14(p0) :: (store (s32) into stack + 16, align 1) + CALLpcrel32 &sincosl, csr_32, implicit $esp, implicit $ssp + ADJCALLSTACKUP32 20, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + %2:_(s80) = G_LOAD %4(p0) :: (load (s80) from %stack.0, align 16) + %3:_(s80) = G_LOAD %5(p0) :: (load (s80) from %stack.1, align 16) + $fp0 = COPY %2(s80) + $fp1 = COPY %3(s80) + RET 0, implicit $fp0, implicit $fp1 +... diff --git a/llvm/test/CodeGen/X86/isel-fabs-x87.ll b/llvm/test/CodeGen/X86/isel-fabs-x87.ll index 8b846499946cc..a0534e6a1a82e 100644 --- a/llvm/test/CodeGen/X86/isel-fabs-x87.ll +++ b/llvm/test/CodeGen/X86/isel-fabs-x87.ll @@ -1,8 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse2,-sse | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse2,-sse -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse2,-sse | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse2,-sse -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse2,-sse -fast-isel | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse2,-sse -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse2,-sse | FileCheck %s --check-prefixes=X86,SDAG-ISEL +; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse2,-sse -fast-isel | FileCheck %s --check-prefixes=X86,Fast-ISEL +; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse2,-sse -global-isel -global-isel-abort=0 | FileCheck %s --check-prefixes=X86,GISEL-ISEL + +define void @test_float_abs(ptr %argptr) { +; SDAG-ISEL-LABEL: test_float_abs: +; SDAG-ISEL: # %bb.0: +; SDAG-ISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; SDAG-ISEL-NEXT: andb $127, 3(%eax) +; SDAG-ISEL-NEXT: retl +; +; Fast-ISEL-LABEL: test_float_abs: +; Fast-ISEL: # %bb.0: +; Fast-ISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; Fast-ISEL-NEXT: andb $127, 3(%eax) +; Fast-ISEL-NEXT: retl +; +; GISEL-ISEL-LABEL: test_float_abs: +; GISEL-ISEL: # %bb.0: +; GISEL-ISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-ISEL-NEXT: andl $2147483647, (%eax) # imm = 0x7FFFFFFF +; GISEL-ISEL-NEXT: retl + %arg = load float, float* %argptr + %abs = tail call float @llvm.fabs.f32(float %arg) + store float %abs, ptr %argptr + ret void + } + +define void @test_double_abs(ptr %argptr) { +; X86-LABEL: test_double_abs: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: fldl (%eax) +; X86-NEXT: fabs +; X86-NEXT: fstpl (%eax) +; X86-NEXT: retl + %arg = load double, double* %argptr + %abs = tail call double @llvm.fabs.f64(double %arg) + store double %abs, double* %argptr + ret void +} define x86_fp80 @test_x86_fp80_abs(x86_fp80 %arg) { ; X64-LABEL: test_x86_fp80_abs: diff --git a/llvm/test/CodeGen/X86/isel-fabs.ll b/llvm/test/CodeGen/X86/isel-fabs.ll index 10bd5799280ad..c2d29248e49ba 100644 --- a/llvm/test/CodeGen/X86/isel-fabs.ll +++ b/llvm/test/CodeGen/X86/isel-fabs.ll @@ -1,37 +1,61 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=i686-- -mattr=-x87 | FileCheck %s --check-prefixes=X86 -; RUN: llc < %s -mtriple=i686-- -mattr=-x87 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=FASTISEL-X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87,+sse,+sse2 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87,+sse,+sse2 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-- -mattr=-x87,+sse,+sse2 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 +; RUN: llc < %s -mtriple=i686-- -mattr=-x87,+sse,+sse2 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=i686-- -mattr=-x87,+sse,+sse2 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefixes=FASTISEL-X86 +; RUN: llc < %s -mtriple=i686-- -mattr=-x87,+sse,+sse2 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X86 - -define float @test_float_abs(float %arg) { +define float @test_float_abs(float %arg) nounwind { ; X64-LABEL: test_float_abs: ; X64: # %bb.0: ; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; X64-NEXT: retq ; +; GISEL-X64-LABEL: test_float_abs: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movd %xmm0, %eax +; GISEL-X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; GISEL-X64-NEXT: movd %eax, %xmm0 +; GISEL-X64-NEXT: retq +; ; X86-LABEL: test_float_abs: ; X86: # %bb.0: -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: movd %xmm0, %eax ; X86-NEXT: retl ; ; FASTISEL-X86-LABEL: test_float_abs: ; FASTISEL-X86: # %bb.0: -; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; FASTISEL-X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; FASTISEL-X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; FASTISEL-X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; FASTISEL-X86-NEXT: movd %xmm0, %eax ; FASTISEL-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_float_abs: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; GISEL-X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: retl %abs = tail call float @llvm.fabs.f32(float %arg) ret float %abs } -define double @test_double_abs(double %arg) { +define double @test_double_abs(double %arg) nounwind { ; X64-LABEL: test_double_abs: ; X64: # %bb.0: ; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; X64-NEXT: retq ; +; GISEL-X64-LABEL: test_double_abs: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF +; GISEL-X64-NEXT: movq %xmm0, %rcx +; GISEL-X64-NEXT: andq %rax, %rcx +; GISEL-X64-NEXT: movq %rcx, %xmm0 +; GISEL-X64-NEXT: retq +; ; X86-LABEL: test_double_abs: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -41,10 +65,26 @@ define double @test_double_abs(double %arg) { ; ; FASTISEL-X86-LABEL: test_double_abs: ; FASTISEL-X86: # %bb.0: +; FASTISEL-X86-NEXT: pushl %ebp +; FASTISEL-X86-NEXT: movl %esp, %ebp +; FASTISEL-X86-NEXT: andl $-8, %esp +; FASTISEL-X86-NEXT: subl $8, %esp +; FASTISEL-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; FASTISEL-X86-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; FASTISEL-X86-NEXT: movlps %xmm0, (%esp) +; FASTISEL-X86-NEXT: movl (%esp), %eax ; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; FASTISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; FASTISEL-X86-NEXT: andl $2147483647, %edx # imm = 0x7FFFFFFF +; FASTISEL-X86-NEXT: movl %ebp, %esp +; FASTISEL-X86-NEXT: popl %ebp ; FASTISEL-X86-NEXT: retl +; +; GISEL-X86-LABEL: test_double_abs: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: movl $-1, %eax +; GISEL-X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF +; GISEL-X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: retl %abs = tail call double @llvm.fabs.f64(double %arg) ret double %abs } diff --git a/llvm/test/CodeGen/X86/llvm.sincos.ll b/llvm/test/CodeGen/X86/llvm.sincos.ll index 5734729a2c507..065710f91457b 100644 --- a/llvm/test/CodeGen/X86/llvm.sincos.ll +++ b/llvm/test/CodeGen/X86/llvm.sincos.ll @@ -3,8 +3,9 @@ ; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel | FileCheck %s --check-prefixes=X64,FASTISEL-X64 ; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel=0 -fast-isel=0 | FileCheck %s --check-prefixes=X86,SDAG-X86 ; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel=0 -fast-isel=0 | FileCheck %s --check-prefixes=X64,SDAG-X64 -; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes=X86,GISEL-X86 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes=X64,GISEL-X64 +; TODO: The below RUN line will fails GISEL selection and will fallback to DAG selection due to lack of support for loads/stores in i686 mode, support is expected soon enough, for this reason the llvm/test/CodeGen/X86/GlobalISel/llvm.sincos.mir test is added for now because of the lack of support for i686 in GlobalISel. +; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL-X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes=GISEL-X64 define { float, float } @test_sincos_f32(float %Val) nounwind { ; X86-LABEL: test_sincos_f32: @@ -32,6 +33,35 @@ define { float, float } @test_sincos_f32(float %Val) nounwind { ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-NEXT: popq %rax ; X64-NEXT: retq +; +; GISEL-X86-LABEL: test_sincos_f32: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $28, %esp +; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; GISEL-X86-NEXT: movl %eax, (%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll sincosf +; GISEL-X86-NEXT: flds {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: flds {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: fxch %st(1) +; GISEL-X86-NEXT: addl $28, %esp +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: test_sincos_f32: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; GISEL-X64-NEXT: movq %rsp, %rsi +; GISEL-X64-NEXT: callq sincosf +; GISEL-X64-NEXT: movl {{[0-9]+}}(%rsp), %eax +; GISEL-X64-NEXT: movl (%rsp), %ecx +; GISEL-X64-NEXT: movd %eax, %xmm0 +; GISEL-X64-NEXT: movd %ecx, %xmm1 +; GISEL-X64-NEXT: popq %rax +; GISEL-X64-NEXT: retq %res = call { float, float } @llvm.sincos.f32(float %Val) ret { float, float } %res } @@ -62,6 +92,34 @@ define { double, double } @test_sincos_f64(double %Val) nounwind { ; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq +; +; GISEL-X86-LABEL: test_sincos_f64: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $44, %esp +; GISEL-X86-NEXT: fldl {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: fstpl (%esp) +; GISEL-X86-NEXT: calll sincos +; GISEL-X86-NEXT: fldl {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: fldl {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: addl $44, %esp +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: test_sincos_f64: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: subq $24, %rsp +; GISEL-X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; GISEL-X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; GISEL-X64-NEXT: callq sincos +; GISEL-X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; GISEL-X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; GISEL-X64-NEXT: movq %rax, %xmm0 +; GISEL-X64-NEXT: movq %rcx, %xmm1 +; GISEL-X64-NEXT: addq $24, %rsp +; GISEL-X64-NEXT: retq %res = call { double, double } @llvm.sincos.f64(double %Val) ret { double, double } %res } @@ -94,6 +152,36 @@ define { x86_fp80, x86_fp80 } @test_sincos_f80(x86_fp80 %Val) nounwind { ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: addq $56, %rsp ; X64-NEXT: retq +; +; GISEL-X86-LABEL: test_sincos_f80: +; GISEL-X86: # %bb.0: +; GISEL-X86-NEXT: subl $60, %esp +; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; GISEL-X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: fstpt (%esp) +; GISEL-X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: calll sincosl +; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: fxch %st(1) +; GISEL-X86-NEXT: addl $60, %esp +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: test_sincos_f80: +; GISEL-X64: # %bb.0: +; GISEL-X64-NEXT: subq $56, %rsp +; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; GISEL-X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; GISEL-X64-NEXT: fstpt (%rsp) +; GISEL-X64-NEXT: callq sincosl +; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: fxch %st(1) +; GISEL-X64-NEXT: addq $56, %rsp +; GISEL-X64-NEXT: retq %res = call { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80 %Val) ret { x86_fp80, x86_fp80 } %res } diff --git a/llvm/test/CodeGen/X86/shift-i512.ll b/llvm/test/CodeGen/X86/shift-i512.ll index c7da04171e6a1..756019d0e98a0 100644 --- a/llvm/test/CodeGen/X86/shift-i512.ll +++ b/llvm/test/CodeGen/X86/shift-i512.ll @@ -48,46 +48,20 @@ define <8 x i64> @shl_i512_1(<8 x i64> %a) { ; ; ZNVER4-LABEL: shl_i512_1: ; ZNVER4: # %bb.0: -; ZNVER4-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; ZNVER4-NEXT: vmovq %xmm0, %rdx -; ZNVER4-NEXT: vpextrq $1, %xmm0, %r9 -; ZNVER4-NEXT: vpextrq $1, %xmm1, %rax -; ZNVER4-NEXT: vmovq %xmm1, %rcx ; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; ZNVER4-NEXT: shrq $63, %rdx -; ZNVER4-NEXT: vpextrq $1, %xmm1, %rsi -; ZNVER4-NEXT: vmovq %xmm1, %rdi -; ZNVER4-NEXT: vextracti128 $1, %ymm0, %xmm1 -; ZNVER4-NEXT: leaq (%rdx,%r9,2), %rdx -; ZNVER4-NEXT: shrq $63, %r9 -; ZNVER4-NEXT: vpsllq $1, %xmm0, %xmm0 -; ZNVER4-NEXT: vmovq %xmm1, %r10 -; ZNVER4-NEXT: vpextrq $1, %xmm1, %r8 -; ZNVER4-NEXT: leaq (%r9,%r10,2), %r9 -; ZNVER4-NEXT: shrq $63, %r10 -; ZNVER4-NEXT: vmovq %rdx, %xmm4 -; ZNVER4-NEXT: leaq (%r10,%r8,2), %r10 -; ZNVER4-NEXT: shrq $63, %r8 -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] -; ZNVER4-NEXT: leaq (%r8,%rdi,2), %r8 -; ZNVER4-NEXT: shrq $63, %rdi -; ZNVER4-NEXT: leaq (%rdi,%rsi,2), %rdi -; ZNVER4-NEXT: shrq $63, %rsi -; ZNVER4-NEXT: leaq (%rsi,%rcx,2), %rsi -; ZNVER4-NEXT: shrq $63, %rcx -; ZNVER4-NEXT: vmovq %r8, %xmm3 -; ZNVER4-NEXT: leaq (%rcx,%rax,2), %rax -; ZNVER4-NEXT: vmovq %rsi, %xmm2 -; ZNVER4-NEXT: vmovq %rax, %xmm1 -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; ZNVER4-NEXT: vmovq %rdi, %xmm2 -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; ZNVER4-NEXT: vmovq %r10, %xmm3 +; ZNVER4-NEXT: vextracti128 $1, %ymm0, %xmm2 +; ZNVER4-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; ZNVER4-NEXT: vpsllq $1, %xmm0, %xmm4 ; ZNVER4-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; ZNVER4-NEXT: vmovq %r9, %xmm2 -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; ZNVER4-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; ZNVER4-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; ZNVER4-NEXT: vpshldq $1, %xmm3, %xmm2, %xmm3 +; ZNVER4-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; ZNVER4-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7] +; ZNVER4-NEXT: vpshldq $1, %ymm1, %ymm2, %ymm1 +; ZNVER4-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3 +; ZNVER4-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1 +; ZNVER4-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] +; ZNVER4-NEXT: vpshldq $1, %zmm0, %zmm3, %zmm0 +; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[2],zmm0[2],zmm1[4],zmm0[4],zmm1[6],zmm0[6] ; ZNVER4-NEXT: retq %d = bitcast <8 x i64> %a to i512 %s = shl i512 %d, 1 @@ -142,65 +116,21 @@ define <8 x i64> @lshr_i512_1(<8 x i64> %a) { ; ; ZNVER4-LABEL: lshr_i512_1: ; ZNVER4: # %bb.0: -; ZNVER4-NEXT: pushq %rbx -; ZNVER4-NEXT: .cfi_def_cfa_offset 16 -; ZNVER4-NEXT: .cfi_offset %rbx, -16 +; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; ZNVER4-NEXT: vextracti128 $1, %ymm0, %xmm1 -; ZNVER4-NEXT: vmovq %xmm0, %r10 -; ZNVER4-NEXT: vpextrq $1, %xmm0, %rsi -; ZNVER4-NEXT: vpextrq $1, %xmm1, %rcx -; ZNVER4-NEXT: vmovq %xmm1, %r9 -; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; ZNVER4-NEXT: vextracti32x4 $3, %zmm0, %xmm0 -; ZNVER4-NEXT: shrq %r10 -; ZNVER4-NEXT: vpextrq $1, %xmm0, %rax -; ZNVER4-NEXT: vmovq %xmm0, %rdx -; ZNVER4-NEXT: vmovq %xmm1, %rdi -; ZNVER4-NEXT: vpextrq $1, %xmm1, %r11 -; ZNVER4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; ZNVER4-NEXT: movq %rdx, %r8 -; ZNVER4-NEXT: shrq %r8 -; ZNVER4-NEXT: shlq $63, %rax -; ZNVER4-NEXT: movq %rdi, %rbx -; ZNVER4-NEXT: shrq %rbx -; ZNVER4-NEXT: shlq $63, %rdx -; ZNVER4-NEXT: shlq $63, %rdi -; ZNVER4-NEXT: vpsrlq $1, %xmm0, %xmm0 -; ZNVER4-NEXT: orq %r8, %rax -; ZNVER4-NEXT: movq %r11, %r8 -; ZNVER4-NEXT: shlq $63, %r8 -; ZNVER4-NEXT: shrq %r11 -; ZNVER4-NEXT: orq %rbx, %r8 -; ZNVER4-NEXT: movq %r9, %rbx -; ZNVER4-NEXT: orq %r11, %rdx -; ZNVER4-NEXT: movq %rsi, %r11 -; ZNVER4-NEXT: shrq %r11 -; ZNVER4-NEXT: shlq $63, %rbx -; ZNVER4-NEXT: shrq %r9 -; ZNVER4-NEXT: shlq $63, %rsi -; ZNVER4-NEXT: vmovq %rax, %xmm4 -; ZNVER4-NEXT: orq %r11, %rbx -; ZNVER4-NEXT: movq %rcx, %r11 -; ZNVER4-NEXT: shlq $63, %r11 -; ZNVER4-NEXT: shrq %rcx -; ZNVER4-NEXT: orq %r10, %rsi -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0] -; ZNVER4-NEXT: orq %r9, %r11 -; ZNVER4-NEXT: orq %rdi, %rcx -; ZNVER4-NEXT: vmovq %rbx, %xmm3 -; ZNVER4-NEXT: vmovq %rcx, %xmm1 -; ZNVER4-NEXT: vmovq %r11, %xmm2 -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; ZNVER4-NEXT: vmovq %rsi, %xmm2 -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; ZNVER4-NEXT: vmovq %r8, %xmm3 -; ZNVER4-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; ZNVER4-NEXT: vmovq %rdx, %xmm2 -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; ZNVER4-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 -; ZNVER4-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; ZNVER4-NEXT: popq %rbx -; ZNVER4-NEXT: .cfi_def_cfa_offset 8 +; ZNVER4-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; ZNVER4-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3] +; ZNVER4-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; ZNVER4-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7] +; ZNVER4-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4 +; ZNVER4-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; ZNVER4-NEXT: vpshldq $63, %ymm3, %ymm1, %ymm1 +; ZNVER4-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] +; ZNVER4-NEXT: vpsrlq $1, %xmm2, %xmm2 +; ZNVER4-NEXT: vpshldq $63, %zmm0, %zmm3, %zmm0 +; ZNVER4-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; ZNVER4-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; ZNVER4-NEXT: retq %d = bitcast <8 x i64> %a to i512 %s = lshr i512 %d, 1 @@ -255,65 +185,21 @@ define <8 x i64> @ashr_i512_1(<8 x i64> %a) { ; ; ZNVER4-LABEL: ashr_i512_1: ; ZNVER4: # %bb.0: -; ZNVER4-NEXT: pushq %rbx -; ZNVER4-NEXT: .cfi_def_cfa_offset 16 -; ZNVER4-NEXT: .cfi_offset %rbx, -16 +; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; ZNVER4-NEXT: vextracti128 $1, %ymm0, %xmm1 -; ZNVER4-NEXT: vmovq %xmm0, %r10 -; ZNVER4-NEXT: vpextrq $1, %xmm0, %rsi -; ZNVER4-NEXT: vpextrq $1, %xmm1, %rcx -; ZNVER4-NEXT: vmovq %xmm1, %r9 -; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; ZNVER4-NEXT: vextracti32x4 $3, %zmm0, %xmm0 -; ZNVER4-NEXT: shrq %r10 -; ZNVER4-NEXT: vpextrq $1, %xmm0, %rax -; ZNVER4-NEXT: vmovq %xmm0, %rdx -; ZNVER4-NEXT: vmovq %xmm1, %rdi -; ZNVER4-NEXT: vpextrq $1, %xmm1, %r11 -; ZNVER4-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; ZNVER4-NEXT: movq %rdx, %r8 -; ZNVER4-NEXT: shrq %r8 -; ZNVER4-NEXT: shlq $63, %rax -; ZNVER4-NEXT: movq %rdi, %rbx -; ZNVER4-NEXT: shrq %rbx -; ZNVER4-NEXT: shlq $63, %rdx -; ZNVER4-NEXT: shlq $63, %rdi -; ZNVER4-NEXT: vpsraq $1, %xmm0, %xmm0 -; ZNVER4-NEXT: orq %r8, %rax -; ZNVER4-NEXT: movq %r11, %r8 -; ZNVER4-NEXT: shlq $63, %r8 -; ZNVER4-NEXT: shrq %r11 -; ZNVER4-NEXT: orq %rbx, %r8 -; ZNVER4-NEXT: movq %r9, %rbx -; ZNVER4-NEXT: orq %r11, %rdx -; ZNVER4-NEXT: movq %rsi, %r11 -; ZNVER4-NEXT: shrq %r11 -; ZNVER4-NEXT: shlq $63, %rbx -; ZNVER4-NEXT: shrq %r9 -; ZNVER4-NEXT: shlq $63, %rsi -; ZNVER4-NEXT: vmovq %rax, %xmm4 -; ZNVER4-NEXT: orq %r11, %rbx -; ZNVER4-NEXT: movq %rcx, %r11 -; ZNVER4-NEXT: shlq $63, %r11 -; ZNVER4-NEXT: shrq %rcx -; ZNVER4-NEXT: orq %r10, %rsi -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0] -; ZNVER4-NEXT: orq %r9, %r11 -; ZNVER4-NEXT: orq %rdi, %rcx -; ZNVER4-NEXT: vmovq %rbx, %xmm3 -; ZNVER4-NEXT: vmovq %rcx, %xmm1 -; ZNVER4-NEXT: vmovq %r11, %xmm2 -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; ZNVER4-NEXT: vmovq %rsi, %xmm2 -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; ZNVER4-NEXT: vmovq %r8, %xmm3 -; ZNVER4-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; ZNVER4-NEXT: vmovq %rdx, %xmm2 -; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; ZNVER4-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 -; ZNVER4-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; ZNVER4-NEXT: popq %rbx -; ZNVER4-NEXT: .cfi_def_cfa_offset 8 +; ZNVER4-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; ZNVER4-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3] +; ZNVER4-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; ZNVER4-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7] +; ZNVER4-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4 +; ZNVER4-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; ZNVER4-NEXT: vpshldq $63, %ymm3, %ymm1, %ymm1 +; ZNVER4-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] +; ZNVER4-NEXT: vpsraq $1, %xmm2, %xmm2 +; ZNVER4-NEXT: vpshldq $63, %zmm0, %zmm3, %zmm0 +; ZNVER4-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; ZNVER4-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 +; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; ZNVER4-NEXT: retq %d = bitcast <8 x i64> %a to i512 %s = ashr i512 %d, 1 diff --git a/llvm/test/CodeGen/X86/var-permute-256.ll b/llvm/test/CodeGen/X86/var-permute-256.ll index 7296cc27894c3..283c6a303a581 100644 --- a/llvm/test/CodeGen/X86/var-permute-256.ll +++ b/llvm/test/CodeGen/X86/var-permute-256.ll @@ -2,12 +2,12 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s --check-prefix=XOP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=INT256,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=INT256,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=INT256,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=INT256,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefixes=INT256,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=INT256,AVX512VL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=INT256,AVX512VL,AVX512VLDQ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=INT256,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=INT256,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=INT256,AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefixes=INT256,AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=INT256,AVX512VL,AVX512VLF +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=INT256,AVX512VL,AVX512VLF ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=INT256,AVX512VL,AVX512VLBW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+avx512vbmi | FileCheck %s --check-prefixes=INT256,AVX512VL,VLVBMI @@ -393,17 +393,17 @@ define <16 x i16> @var_shuffle_v16i16(<16 x i16> %v, <16 x i16> %indices) nounwi ; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512-NEXT: retq ; -; AVX512VLDQ-LABEL: var_shuffle_v16i16: -; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] -; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2 -; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 -; AVX512VLDQ-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm3 ^ ymm2)) -; AVX512VLDQ-NEXT: retq +; AVX512VLF-LABEL: var_shuffle_v16i16: +; AVX512VLF: # %bb.0: +; AVX512VLF-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] +; AVX512VLF-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VLF-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm2, %ymm2 +; AVX512VLF-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLF-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLF-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm3 ^ ymm2)) +; AVX512VLF-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v16i16: ; AVX512VLBW: # %bb.0: @@ -533,21 +533,57 @@ define <16 x i16> @var_shuffle_zero_v16i16(<16 x i16> %v, <16 x i16> %indices) n ; AVX2-NEXT: vpandn %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VLDQ-LABEL: var_shuffle_zero_v16i16: -; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 -; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm2 -; AVX512VLDQ-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] -; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm3 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm3, %ymm3 -; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm3 ^ (ymm1 & (ymm0 ^ ymm3)) -; AVX512VLDQ-NEXT: vpandn %ymm1, %ymm2, %ymm0 -; AVX512VLDQ-NEXT: retq +; AVX512F-LABEL: var_shuffle_zero_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm2 +; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] +; AVX512F-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm3 +; AVX512F-NEXT: vpshufb %ymm1, %ymm3, %ymm3 +; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX512F-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm3, %ymm0 +; AVX512F-NEXT: vpandn %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: var_shuffle_zero_v16i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpcmpnleuw %zmm2, %zmm1, %k1 +; AVX512BW-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512BW-NEXT: vmovdqu16 %zmm2, %zmm1 {%k1} +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] +; AVX512BW-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] +; AVX512BW-NEXT: vpshufb %ymm1, %ymm2, %ymm2 +; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512BW-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLF-LABEL: var_shuffle_zero_v16i16: +; AVX512VLF: # %bb.0: +; AVX512VLF-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; AVX512VLF-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm2 +; AVX512VLF-NEXT: vpor %ymm1, %ymm2, %ymm1 +; AVX512VLF-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] +; AVX512VLF-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VLF-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm3 +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm3, %ymm3 +; AVX512VLF-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX512VLF-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VLF-NEXT: vpternlogq {{.*#+}} ymm1 = ymm3 ^ (ymm1 & (ymm0 ^ ymm3)) +; AVX512VLF-NEXT: vpandn %ymm1, %ymm2, %ymm0 +; AVX512VLF-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_zero_v16i16: ; AVX512VLBW: # %bb.0: @@ -668,15 +704,15 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind { ; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512-NEXT: retq ; -; AVX512VLDQ-LABEL: var_shuffle_v32i8: -; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2 -; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 -; AVX512VLDQ-NEXT: vpternlogq {{.*#+}} ymm0 = ymm3 ^ (ymm0 & (ymm2 ^ ymm3)) -; AVX512VLDQ-NEXT: retq +; AVX512VLF-LABEL: var_shuffle_v32i8: +; AVX512VLF: # %bb.0: +; AVX512VLF-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm2, %ymm2 +; AVX512VLF-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLF-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLF-NEXT: vpternlogq {{.*#+}} ymm0 = ymm3 ^ (ymm0 & (ymm2 ^ ymm3)) +; AVX512VLF-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v32i8: ; AVX512VLBW: # %bb.0: @@ -847,19 +883,51 @@ define <32 x i8> @var_shuffle_zero_v32i8(<32 x i8> %v, <32 x i8> %indices) nounw ; AVX2-NEXT: vpandn %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VLDQ-LABEL: var_shuffle_zero_v32i8: -; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 -; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm2 -; AVX512VLDQ-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm3 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm3, %ymm3 -; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm3 ^ (ymm1 & (ymm0 ^ ymm3)) -; AVX512VLDQ-NEXT: vpandn %ymm1, %ymm2, %ymm0 -; AVX512VLDQ-NEXT: retq +; AVX512F-LABEL: var_shuffle_zero_v32i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm2 +; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm3 +; AVX512F-NEXT: vpshufb %ymm1, %ymm3, %ymm3 +; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX512F-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm3, %ymm0 +; AVX512F-NEXT: vpandn %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: var_shuffle_zero_v32i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512BW-NEXT: vpbroadcastb {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] +; AVX512BW-NEXT: vpcmpnleub %zmm2, %zmm1, %k1 +; AVX512BW-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} +; AVX512BW-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] +; AVX512BW-NEXT: vpshufb %ymm1, %ymm2, %ymm2 +; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512BW-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLF-LABEL: var_shuffle_zero_v32i8: +; AVX512VLF: # %bb.0: +; AVX512VLF-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; AVX512VLF-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm2 +; AVX512VLF-NEXT: vpor %ymm1, %ymm2, %ymm1 +; AVX512VLF-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm3 +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm3, %ymm3 +; AVX512VLF-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX512VLF-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VLF-NEXT: vpternlogq {{.*#+}} ymm1 = ymm3 ^ (ymm1 & (ymm0 ^ ymm3)) +; AVX512VLF-NEXT: vpandn %ymm1, %ymm2, %ymm0 +; AVX512VLF-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_zero_v32i8: ; AVX512VLBW: # %bb.0: @@ -1493,17 +1561,17 @@ define <16 x i16> @var_shuffle_v16i16_from_v8i16(<8 x i16> %v, <16 x i16> %indic ; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512-NEXT: retq ; -; AVX512VLDQ-LABEL: var_shuffle_v16i16_from_v8i16: -; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] -; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 -; AVX512VLDQ-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm3 ^ ymm2)) -; AVX512VLDQ-NEXT: retq +; AVX512VLF-LABEL: var_shuffle_v16i16_from_v8i16: +; AVX512VLF: # %bb.0: +; AVX512VLF-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512VLF-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] +; AVX512VLF-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VLF-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm0, %ymm2 +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLF-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLF-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm3 ^ ymm2)) +; AVX512VLF-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v16i16_from_v8i16: ; AVX512VLBW: # %bb.0: @@ -1611,15 +1679,15 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices) ; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512-NEXT: retq ; -; AVX512VLDQ-LABEL: var_shuffle_v32i8_from_v16i8: -; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2 -; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 -; AVX512VLDQ-NEXT: vpternlogq {{.*#+}} ymm0 = ymm3 ^ (ymm0 & (ymm2 ^ ymm3)) -; AVX512VLDQ-NEXT: retq +; AVX512VLF-LABEL: var_shuffle_v32i8_from_v16i8: +; AVX512VLF: # %bb.0: +; AVX512VLF-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm0, %ymm2 +; AVX512VLF-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512VLF-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLF-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLF-NEXT: vpternlogq {{.*#+}} ymm0 = ymm3 ^ (ymm0 & (ymm2 ^ ymm3)) +; AVX512VLF-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v32i8_from_v16i8: ; AVX512VLBW: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index 62ab5d82bfbb6..910dd1ee6c419 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -2099,21 +2099,19 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: psrlq $1, %xmm2 ; SSE41-NEXT: por %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 -; SSE41-NEXT: pextrq $1, %xmm1, %rax +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm0 +; SSE41-NEXT: pextrq $1, %xmm0, %rax +; SSE41-NEXT: cvtsi2ss %rax, %xmm3 +; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: cvtsi2ss %rax, %xmm2 -; SSE41-NEXT: movq %xmm1, %rax -; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: cvtsi2ss %rax, %xmm1 -; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero -; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: addps %xmm1, %xmm2 -; SSE41-NEXT: xorps %xmm3, %xmm3 -; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm3[2,3] -; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 -; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],zero,zero +; SSE41-NEXT: movaps %xmm2, %xmm3 +; SSE41-NEXT: addps %xmm2, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2 +; SSE41-NEXT: movaps %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: uitofp_4i64_to_4f32_undef: diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll index 58f6a66aeff79..c5e879c0135f4 100644 --- a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll +++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll @@ -12,12 +12,12 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver1 | FileCheck %s --check-prefixes=BMI ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=BMI ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s --check-prefixes=BMI -; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s --check-prefixes=BMI2 -; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=BMI2 -; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=BMI2 -; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=BMI2 -; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=BMI2 -; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=BMI2 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s --check-prefixes=BMI2-SLOW +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=BMI2-SLOW +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=BMI2-SLOW +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=BMI2-FAST +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=BMI2-FAST +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=BMI2-FAST ; Verify that for the X86_64 processors that are known to have poor latency ; double precision shift instructions we do not generate 'shld' or 'shrd' @@ -53,15 +53,23 @@ define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone { ; BMI-NEXT: orq %rdi, %rax ; BMI-NEXT: retq ; -; BMI2-LABEL: lshift: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; BMI2-NEXT: shlxq %rdx, %rdi, %rcx -; BMI2-NEXT: notb %dl -; BMI2-NEXT: shrq %rsi -; BMI2-NEXT: shrxq %rdx, %rsi, %rax -; BMI2-NEXT: orq %rcx, %rax -; BMI2-NEXT: retq +; BMI2-SLOW-LABEL: lshift: +; BMI2-SLOW: # %bb.0: # %entry +; BMI2-SLOW-NEXT: # kill: def $edx killed $edx def $rdx +; BMI2-SLOW-NEXT: shlxq %rdx, %rdi, %rcx +; BMI2-SLOW-NEXT: notb %dl +; BMI2-SLOW-NEXT: shrq %rsi +; BMI2-SLOW-NEXT: shrxq %rdx, %rsi, %rax +; BMI2-SLOW-NEXT: orq %rcx, %rax +; BMI2-SLOW-NEXT: retq +; +; BMI2-FAST-LABEL: lshift: +; BMI2-FAST: # %bb.0: # %entry +; BMI2-FAST-NEXT: movl %edx, %ecx +; BMI2-FAST-NEXT: movq %rdi, %rax +; BMI2-FAST-NEXT: # kill: def $cl killed $cl killed $ecx +; BMI2-FAST-NEXT: shldq %cl, %rsi, %rax +; BMI2-FAST-NEXT: retq entry: %sh_prom = zext i32 %c to i64 %shl = shl i64 %a, %sh_prom @@ -100,15 +108,23 @@ define i64 @rshift(i64 %a, i64 %b, i32 %c) nounwind readnone { ; BMI-NEXT: orq %rdi, %rax ; BMI-NEXT: retq ; -; BMI2-LABEL: rshift: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; BMI2-NEXT: shrxq %rdx, %rdi, %rcx -; BMI2-NEXT: notb %dl -; BMI2-NEXT: addq %rsi, %rsi -; BMI2-NEXT: shlxq %rdx, %rsi, %rax -; BMI2-NEXT: orq %rcx, %rax -; BMI2-NEXT: retq +; BMI2-SLOW-LABEL: rshift: +; BMI2-SLOW: # %bb.0: # %entry +; BMI2-SLOW-NEXT: # kill: def $edx killed $edx def $rdx +; BMI2-SLOW-NEXT: shrxq %rdx, %rdi, %rcx +; BMI2-SLOW-NEXT: notb %dl +; BMI2-SLOW-NEXT: addq %rsi, %rsi +; BMI2-SLOW-NEXT: shlxq %rdx, %rsi, %rax +; BMI2-SLOW-NEXT: orq %rcx, %rax +; BMI2-SLOW-NEXT: retq +; +; BMI2-FAST-LABEL: rshift: +; BMI2-FAST: # %bb.0: # %entry +; BMI2-FAST-NEXT: movl %edx, %ecx +; BMI2-FAST-NEXT: movq %rdi, %rax +; BMI2-FAST-NEXT: # kill: def $cl killed $cl killed $ecx +; BMI2-FAST-NEXT: shrdq %cl, %rsi, %rax +; BMI2-FAST-NEXT: retq entry: %sh_prom = zext i32 %c to i64 %shr = lshr i64 %a, %sh_prom diff --git a/llvm/test/DebugInfo/dynamic-bitfield.ll b/llvm/test/DebugInfo/dynamic-bitfield.ll new file mode 100644 index 0000000000000..1a5ed81774538 --- /dev/null +++ b/llvm/test/DebugInfo/dynamic-bitfield.ll @@ -0,0 +1,62 @@ +; RUN: llc -O0 -filetype=obj -o - %s | llvm-dwarfdump -v -debug-info - | FileCheck %s + +; A basic test of using a DIExpression for DW_AT_data_bit_offset and +; DW_AT_bit_size. + +source_filename = "bitfield.c" + +%struct.PackedBits = type <{ i8, i32 }> + +@s = common global %struct.PackedBits zeroinitializer, align 1, !dbg !2 +@value = common global i32 zeroinitializer, align 4, !dbg !0 + +!llvm.dbg.cu = !{!4} +!llvm.module.flags = !{!17, !18, !19} +!llvm.ident = !{!20} + +!0 = distinct !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = !DIGlobalVariable(name: "value", scope: !4, file: !5, line: 8, type: !15, isLocal: false, isDefinition: true) +!2 = distinct !DIGlobalVariableExpression(var: !3, expr: !DIExpression()) +!3 = !DIGlobalVariable(name: "s", scope: !4, file: !5, line: 8, type: !8, isLocal: false, isDefinition: true) + + +!4 = distinct !DICompileUnit(language: DW_LANG_C99, file: !5, producer: "clang version 3.9.0 (trunk 267633)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !6, globals: !7) +!5 = !DIFile(filename: "bitfield.c", directory: "/Volumes/Data/llvm") +!6 = !{} +!7 = !{!0, !2} +!8 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "PackedBits", file: !5, line: 3, size: 40, elements: !9) +!9 = !{!10, !12, !16} +!10 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !8, file: !5, line: 5, baseType: !11, size: 8) +; CHECK: DW_TAG_member +; CHECK-NEXT: DW_AT_name{{.*}}"a" +; CHECK-NOT: DW_TAG +; CHECK-NOT: DW_AT_bit_offset +; CHECK-NOT: DW_AT_data_bit_offset +; CHECK: DW_AT_data_member_location [DW_FORM_data1] (0x00) +!11 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!12 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !8, file: !5, line: 6, baseType: !13, size: !3, offset: !3, flags: DIFlagBitField) +!13 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", file: !14, line: 183, baseType: !15) +!14 = !DIFile(filename: "/Volumes/Data/llvm/_build.ninja.release/bin/../lib/clang/3.9.0/include/stdint.h", directory: "/Volumes/Data/llvm") +!15 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +; CHECK: DW_TAG_member +; CHECK-NEXT: DW_AT_name{{.*}}"b" +; CHECK-NOT: DW_TAG +; CHECK-NOT: DW_AT_bit_offset +; CHECK-NOT: DW_AT_byte_size +; CHECK: DW_AT_bit_size [DW_FORM_ref4] ({{.*}}) +; CHECK-NEXT: DW_AT_data_bit_offset [DW_FORM_ref4] ({{.*}}) +; CHECK-NOT: DW_AT_data_member_location +!16 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !8, file: !5, line: 7, baseType: !13, size: !DIExpression(DW_OP_constu, 27), offset: !DIExpression(DW_OP_constu, 13), flags: DIFlagBitField) +!17 = !{i32 2, !"Dwarf Version", i32 4} +!18 = !{i32 2, !"Debug Info Version", i32 3} +!19 = !{i32 1, !"PIC Level", i32 2} +; CHECK: DW_TAG_member +; CHECK-NEXT: DW_AT_name{{.*}}"c" +; CHECK-NOT: DW_TAG +; CHECK-NOT: DW_AT_bit_offset +; CHECK-NOT: DW_AT_byte_size +; CHECK: DW_AT_bit_size [DW_FORM_exprloc] (DW_OP_lit27) +; CHECK-NEXT: DW_AT_data_bit_offset [DW_FORM_exprloc] (DW_OP_lit13) +; CHECK-NOT: DW_AT_data_member_location +; CHECK: DW_TAG +!20 = !{!"clang version 3.9.0 (trunk 267633)"} diff --git a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll index 6c1acf6d13775..2006a6db2ef40 100644 --- a/llvm/test/Instrumentation/BoundsChecking/runtimes.ll +++ b/llvm/test/Instrumentation/BoundsChecking/runtimes.ll @@ -205,7 +205,7 @@ define void @f1(i64 %x) nounwind { ; TR-GUARD: attributes #[[ATTR3]] = { nomerge noreturn nounwind } ;. ; RT-GUARD: attributes #[[ATTR0]] = { nounwind } -; RT-GUARD: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +; RT-GUARD: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; RT-GUARD: attributes #[[ATTR2]] = { nomerge nounwind } ;. ; TR: [[META0]] = !{} diff --git a/llvm/test/Instrumentation/MemorySanitizer/count-zeroes.ll b/llvm/test/Instrumentation/MemorySanitizer/count-zeroes.ll index 73e047e68ddc6..c51dc1a373629 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/count-zeroes.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/count-zeroes.ll @@ -9,10 +9,14 @@ define i64 @test_ctlz_i64_zeropoison(i64 %v) #0 { ; CHECK-LABEL: @test_ctlz_i64_zeropoison( ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCZ_BS:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: [[_MSCZ_BZP:%.*]] = icmp eq i64 [[V:%.*]], 0 -; CHECK-NEXT: [[_MSCZ_BS1:%.*]] = or i1 [[_MSCZ_BS]], [[_MSCZ_BZP]] -; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext i1 [[_MSCZ_BS1]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[_MSCZ_CMP_ZEROS:%.*]] = icmp uge i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSCZ_SHADOW_NOT_NULL:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCZ_MAIN:%.*]] = and i1 [[_MSCZ_CMP_ZEROS]], [[_MSCZ_SHADOW_NOT_NULL]] +; CHECK-NEXT: [[_MSCZ_BZP:%.*]] = icmp eq i64 [[V]], 0 +; CHECK-NEXT: [[_MSCZ_BS:%.*]] = or i1 [[_MSCZ_MAIN]], [[_MSCZ_BZP]] +; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext i1 [[_MSCZ_BS]] to i64 ; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.ctlz.i64(i64 [[V]], i1 true) ; CHECK-NEXT: store i64 [[_MSCZ_OS]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[RES]] @@ -24,9 +28,13 @@ define i64 @test_ctlz_i64_nozeropoison(i64 %v) #0 { ; CHECK-LABEL: @test_ctlz_i64_nozeropoison( ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCZ_BS:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext i1 [[_MSCZ_BS]] to i64 -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.ctlz.i64(i64 [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[_MSCZ_CMP_ZEROS:%.*]] = icmp uge i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSCZ_SHADOW_NOT_NULL:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCZ_MAIN:%.*]] = and i1 [[_MSCZ_CMP_ZEROS]], [[_MSCZ_SHADOW_NOT_NULL]] +; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext i1 [[_MSCZ_MAIN]] to i64 +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.ctlz.i64(i64 [[V]], i1 false) ; CHECK-NEXT: store i64 [[_MSCZ_OS]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[RES]] ; @@ -39,10 +47,14 @@ define <2 x i64> @test_ctlz_v2i64_zeropoison(<2 x i64> %v) #0 { ; CHECK-LABEL: @test_ctlz_v2i64_zeropoison( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCZ_BS:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSCZ_BZP:%.*]] = icmp eq <2 x i64> [[V:%.*]], zeroinitializer -; CHECK-NEXT: [[_MSCZ_BS1:%.*]] = or <2 x i1> [[_MSCZ_BS]], [[_MSCZ_BZP]] -; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext <2 x i1> [[_MSCZ_BS1]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP1]], i1 false) +; CHECK-NEXT: [[_MSCZ_CMP_ZEROS:%.*]] = icmp uge <2 x i64> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSCZ_SHADOW_NOT_NULL:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[_MSCZ_MAIN:%.*]] = and <2 x i1> [[_MSCZ_CMP_ZEROS]], [[_MSCZ_SHADOW_NOT_NULL]] +; CHECK-NEXT: [[_MSCZ_BZP:%.*]] = icmp eq <2 x i64> [[V]], zeroinitializer +; CHECK-NEXT: [[_MSCZ_BS:%.*]] = or <2 x i1> [[_MSCZ_MAIN]], [[_MSCZ_BZP]] +; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext <2 x i1> [[_MSCZ_BS]] to <2 x i64> ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[V]], i1 true) ; CHECK-NEXT: store <2 x i64> [[_MSCZ_OS]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] @@ -54,9 +66,13 @@ define <2 x i64> @test_ctlz_v2i64_nozeropoison(<2 x i64> %v) #0 { ; CHECK-LABEL: @test_ctlz_v2i64_nozeropoison( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCZ_BS:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext <2 x i1> [[_MSCZ_BS]] to <2 x i64> -; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP1]], i1 false) +; CHECK-NEXT: [[_MSCZ_CMP_ZEROS:%.*]] = icmp uge <2 x i64> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSCZ_SHADOW_NOT_NULL:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[_MSCZ_MAIN:%.*]] = and <2 x i1> [[_MSCZ_CMP_ZEROS]], [[_MSCZ_SHADOW_NOT_NULL]] +; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext <2 x i1> [[_MSCZ_MAIN]] to <2 x i64> +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[V]], i1 false) ; CHECK-NEXT: store <2 x i64> [[_MSCZ_OS]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -69,10 +85,14 @@ define i64 @test_cttz_i64_zeropoison(i64 %v) #0 { ; CHECK-LABEL: @test_cttz_i64_zeropoison( ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCZ_BS:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: [[_MSCZ_BZP:%.*]] = icmp eq i64 [[V:%.*]], 0 -; CHECK-NEXT: [[_MSCZ_BS1:%.*]] = or i1 [[_MSCZ_BS]], [[_MSCZ_BZP]] -; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext i1 [[_MSCZ_BS1]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.cttz.i64(i64 [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.cttz.i64(i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[_MSCZ_CMP_ZEROS:%.*]] = icmp uge i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSCZ_SHADOW_NOT_NULL:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCZ_MAIN:%.*]] = and i1 [[_MSCZ_CMP_ZEROS]], [[_MSCZ_SHADOW_NOT_NULL]] +; CHECK-NEXT: [[_MSCZ_BZP:%.*]] = icmp eq i64 [[V]], 0 +; CHECK-NEXT: [[_MSCZ_BS:%.*]] = or i1 [[_MSCZ_MAIN]], [[_MSCZ_BZP]] +; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext i1 [[_MSCZ_BS]] to i64 ; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.cttz.i64(i64 [[V]], i1 true) ; CHECK-NEXT: store i64 [[_MSCZ_OS]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[RES]] @@ -84,9 +104,13 @@ define i64 @test_cttz_i64_nozeropoison(i64 %v) #0 { ; CHECK-LABEL: @test_cttz_i64_nozeropoison( ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCZ_BS:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext i1 [[_MSCZ_BS]] to i64 -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.cttz.i64(i64 [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.cttz.i64(i64 [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.cttz.i64(i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[_MSCZ_CMP_ZEROS:%.*]] = icmp uge i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSCZ_SHADOW_NOT_NULL:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCZ_MAIN:%.*]] = and i1 [[_MSCZ_CMP_ZEROS]], [[_MSCZ_SHADOW_NOT_NULL]] +; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext i1 [[_MSCZ_MAIN]] to i64 +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.cttz.i64(i64 [[V]], i1 false) ; CHECK-NEXT: store i64 [[_MSCZ_OS]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[RES]] ; @@ -99,10 +123,14 @@ define <2 x i64> @test_cttz_v2i64_zeropoison(<2 x i64> %v) #0 { ; CHECK-LABEL: @test_cttz_v2i64_zeropoison( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCZ_BS:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSCZ_BZP:%.*]] = icmp eq <2 x i64> [[V:%.*]], zeroinitializer -; CHECK-NEXT: [[_MSCZ_BS1:%.*]] = or <2 x i1> [[_MSCZ_BS]], [[_MSCZ_BZP]] -; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext <2 x i1> [[_MSCZ_BS1]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 false) +; CHECK-NEXT: [[_MSCZ_CMP_ZEROS:%.*]] = icmp uge <2 x i64> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSCZ_SHADOW_NOT_NULL:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[_MSCZ_MAIN:%.*]] = and <2 x i1> [[_MSCZ_CMP_ZEROS]], [[_MSCZ_SHADOW_NOT_NULL]] +; CHECK-NEXT: [[_MSCZ_BZP:%.*]] = icmp eq <2 x i64> [[V]], zeroinitializer +; CHECK-NEXT: [[_MSCZ_BS:%.*]] = or <2 x i1> [[_MSCZ_MAIN]], [[_MSCZ_BZP]] +; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext <2 x i1> [[_MSCZ_BS]] to <2 x i64> ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[V]], i1 true) ; CHECK-NEXT: store <2 x i64> [[_MSCZ_OS]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] @@ -114,9 +142,13 @@ define <2 x i64> @test_cttz_v2i64_nozeropoison(<2 x i64> %v) #0 { ; CHECK-LABEL: @test_cttz_v2i64_nozeropoison( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSCZ_BS:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext <2 x i1> [[_MSCZ_BS]] to <2 x i64> -; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[V:%.*]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 false) +; CHECK-NEXT: [[_MSCZ_CMP_ZEROS:%.*]] = icmp uge <2 x i64> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSCZ_SHADOW_NOT_NULL:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[_MSCZ_MAIN:%.*]] = and <2 x i1> [[_MSCZ_CMP_ZEROS]], [[_MSCZ_SHADOW_NOT_NULL]] +; CHECK-NEXT: [[_MSCZ_OS:%.*]] = sext <2 x i1> [[_MSCZ_MAIN]] to <2 x i64> +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[V]], i1 false) ; CHECK-NEXT: store <2 x i64> [[_MSCZ_OS]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[RES]] ; diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s b/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s index c2785553030fc..2111e91cd5ef2 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_unsupported.s @@ -1,5 +1,99 @@ ; RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s +;; DOT4_F32_*, DOT2_F32_*, DOT2_F16 and DOT2_BF16 + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 row_mirror +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 row_shl:15 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 row_share:15 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot2_f16_f16 v5, v1, v2, s3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot2_bf16_bf16 v5, v1, v2, s3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot2_f32_bf16 v5, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_dot2_f32_f16 v5, v1, v2, s3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +;; LDS-direct and parameter-load, VINTERP + +ds_direct_load v1 wait_va_vdst:15 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_param_load v1, attr0.x wait_va_vdst:15 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_direct_load v1 wait_va_vdst:15 wait_vm_vsrc:1 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_interp_p10_f32 v0, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_interp_p2_f32 v0, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_interp_p10_f16_f32 v0, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_interp_p2_f16_f32 v0, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + ;; Export, S_WAIT_EXPCNT and S_WAIT_EVENT export mrt0 off, off, off, off diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s new file mode 100644 index 0000000000000..e62eb6fbb723c --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -0,0 +1,65 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s + +v_cvt_f32_bf16 v5, v1 +// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] + +v_cvt_f32_bf16 v5, v127 +// GFX1250: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xe5,0x0a,0x7e] + +v_cvt_f32_bf16 v5, s1 +// GFX1250: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, s105 +// GFX1250: v_cvt_f32_bf16_e32 v5, s105 ; encoding: [0x69,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, vcc_lo +// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, vcc_hi +// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, ttmp15 +// GFX1250: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, m0 +// GFX1250: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7d,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, exec_lo +// GFX1250: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, exec_hi +// GFX1250: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, null +// GFX1250: v_cvt_f32_bf16_e32 v5, null ; encoding: [0x7c,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, -1 +// GFX1250: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, 0.5 +// GFX1250: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, src_scc +// GFX1250: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v127, 0x8000 +// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00] + +v_cvt_pk_f16_bf8 v1, v2 +// GFX1250: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e] + +v_cvt_pk_f16_bf8 v1, s2 +// GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e] + +v_cvt_pk_f16_bf8 v1, 100 +// GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v2 +// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e] + +v_cvt_pk_f16_fp8 v1, s2 +// GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e] + +v_cvt_pk_f16_fp8 v1, 100 +// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s new file mode 100644 index 0000000000000..37f39546ae13d --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -0,0 +1,68 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s + +v_cvt_f32_bf16 v5, v1 +// GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] + +v_cvt_f32_bf16 v5, v127 +// GFX1250: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xe5,0x0a,0x7e] + +v_cvt_f32_bf16 v5, s1 +// GFX1250: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, s105 +// GFX1250: v_cvt_f32_bf16_e32 v5, s105 ; encoding: [0x69,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, vcc_lo +// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, vcc_hi +// GFX1250: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, ttmp15 +// GFX1250: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, m0 +// GFX1250: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7d,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, exec_lo +// GFX1250: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, exec_hi +// GFX1250: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, null +// GFX1250: v_cvt_f32_bf16_e32 v5, null ; encoding: [0x7c,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, -1 +// GFX1250: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, 0.5 +// GFX1250: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v5, src_scc +// GFX1250: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xe4,0x0a,0x7e] + +v_cvt_f32_bf16 v127, 0x8000 +// GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00] + +v_cvt_f32_bf16 v5, v1.h +// GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e] + +v_cvt_pk_f16_bf8 v1, v2 +// GFX1250: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e] + +v_cvt_pk_f16_bf8 v1, s2 +// GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e] + +v_cvt_pk_f16_bf8 v1, 100 +// GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v2 +// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e] + +v_cvt_pk_f16_fp8 v1, s2 +// GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e] + +v_cvt_pk_f16_fp8 v1, 100 +// GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s new file mode 100644 index 0000000000000..1ec54d137b335 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s @@ -0,0 +1,67 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_mirror +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_half_mirror +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_shl:1 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_shl:15 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_shr:1 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_shr:15 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_ror:1 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_ror:15 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s new file mode 100644 index 0000000000000..d674a9ea06843 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s @@ -0,0 +1,79 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_mirror +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_half_mirror +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_shl:1 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_shl:15 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_shr:1 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_shr:15 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_ror:1 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_ror:15 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1.h quad_perm:[3,2,1,0] +// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v2.h quad_perm:[0,1,2,3] +// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2.h quad_perm:[0,1,2,3] +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s new file mode 100644 index 0000000000000..9ab3a8adfa511 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s @@ -0,0 +1,23 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s new file mode 100644 index 0000000000000..6904624471801 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s @@ -0,0 +1,35 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16 v5, v1.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_bf8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xec,0x02,0x7e,0x82,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s new file mode 100644 index 0000000000000..c393d3e819880 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s @@ -0,0 +1,36 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_pk_f16_bf8 v1, v2 clamp +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_bf8 v1, v2 clamp +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_pk_f16_bf8 v1, v2 mul:2 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_bf8 v1, v2 mul:2 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_pk_f16_fp8 v1, v2 clamp +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_fp8 v1, v2 clamp +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_pk_f16_fp8 v1, v2 mul:2 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_pk_f16_fp8 v1, v2 mul:2 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_f32_bf16 v5, v1 clamp +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 clamp +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_f32_bf16 v5, v1 mul:2 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 mul:2 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_f32_bf16 v5, v1 div:2 +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 div:2 +// GFX1250-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s new file mode 100644 index 0000000000000..f6c7cf8006508 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s @@ -0,0 +1,101 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s + +v_cvt_f32_bf16_e64 v5, v1 +// GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, v255 +// GFX1250: v_cvt_f32_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xf2,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, s1 +// GFX1250: v_cvt_f32_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, s105 +// GFX1250: v_cvt_f32_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xf2,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, vcc_lo +// GFX1250: v_cvt_f32_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xf2,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, vcc_hi +// GFX1250: v_cvt_f32_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xf2,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, ttmp15 +// GFX1250: v_cvt_f32_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xf2,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, m0 +// GFX1250: v_cvt_f32_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xf2,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, exec_lo +// GFX1250: v_cvt_f32_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xf2,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, exec_hi +// GFX1250: v_cvt_f32_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xf2,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, null +// GFX1250: v_cvt_f32_bf16_e64 v5, null ; encoding: [0x05,0x00,0xf2,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, -1 +// GFX1250: v_cvt_f32_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xf2,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, v1 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, v1 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, v255 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, s1 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, s1 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, s105 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, s105 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, vcc_lo op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, vcc_lo op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, vcc_hi op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, vcc_hi op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, ttmp15 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, ttmp15 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, m0 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, m0 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, exec_lo op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, exec_lo op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, exec_hi op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, exec_hi op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, null op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, null op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, -1 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, -1 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_pk_f16_bf8 v1, v150 +// GFX1250: v_cvt_pk_f16_bf8 v1, v150 ; encoding: [0x01,0x00,0xf6,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_bf8 v1, v2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_bf8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_pk_f16_bf8 v1, v150 op_sel:[1] +// GFX1250: v_cvt_pk_f16_bf8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_bf8 v1, s2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_bf8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x00,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v150 +// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v150 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, s2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s new file mode 100644 index 0000000000000..531d734a0683d --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s @@ -0,0 +1,104 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s + +v_cvt_f32_bf16_e64 v5, v1 +// GFX1250: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, v255 +// GFX1250: v_cvt_f32_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xf2,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, s1 +// GFX1250: v_cvt_f32_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, s105 +// GFX1250: v_cvt_f32_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xf2,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, vcc_lo +// GFX1250: v_cvt_f32_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xf2,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, vcc_hi +// GFX1250: v_cvt_f32_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xf2,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, ttmp15 +// GFX1250: v_cvt_f32_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xf2,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, m0 +// GFX1250: v_cvt_f32_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xf2,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, exec_lo +// GFX1250: v_cvt_f32_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xf2,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, exec_hi +// GFX1250: v_cvt_f32_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xf2,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, null +// GFX1250: v_cvt_f32_bf16_e64 v5, null ; encoding: [0x05,0x00,0xf2,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, -1 +// GFX1250: v_cvt_f32_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xf2,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, v1 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, v1 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, v255 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, s1 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, s1 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, s105 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, s105 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, vcc_lo op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, vcc_lo op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, vcc_hi op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, vcc_hi op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, ttmp15 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, ttmp15 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, m0 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, m0 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, exec_lo op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, exec_lo op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, exec_hi op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, exec_hi op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, null op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, null op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, -1 op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, -1 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1] +// GFX1250: v_cvt_f32_bf16_e64 v5, src_scc op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_f32_bf16_e64 v5, v128.h +// GFX1250: v_cvt_f32_bf16_e64 v5, v128.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x80,0x01,0x00,0x00] + +v_cvt_pk_f16_bf8 v1, v150 +// GFX1250: v_cvt_pk_f16_bf8 v1, v150 ; encoding: [0x01,0x00,0xf6,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_bf8 v1, v2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_bf8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_pk_f16_bf8 v1, v150 op_sel:[1] +// GFX1250: v_cvt_pk_f16_bf8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_bf8 v1, s2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_bf8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x00,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v150 +// GFX1250: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, v150 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00] + +v_cvt_pk_f16_fp8 v1, s2 op_sel:[1] +// GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s new file mode 100644 index 0000000000000..844b4259229ed --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s @@ -0,0 +1,63 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_mirror +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_half_mirror +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_shl:1 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_shl:15 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_shr:1 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_shr:15 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:1 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:15 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v128 op_sel:[1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v2 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x02,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s new file mode 100644 index 0000000000000..32c2e54cf0e71 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s @@ -0,0 +1,67 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_mirror +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_half_mirror +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_shl:1 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_shl:15 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_shr:1 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_shr:15 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:1 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:15 +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v128.h quad_perm:[3,2,1,0] +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v128.h quad_perm:[0,1,2,3] +// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128.h quad_perm:[0,1,2,3] +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s new file mode 100644 index 0000000000000..75692c7422f64 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s @@ -0,0 +1,23 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_pk_f16_bf8 v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v2 op_sel:[1] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v2 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v2 op_sel:[1] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v2 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x02,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s new file mode 100644 index 0000000000000..2c1eb47164e59 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s @@ -0,0 +1,27 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_pk_f16_bf8 v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_bf8 v1, v128.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_bf8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_f32_bf16_e64_dpp v5, v128.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_f16_fp8 v1, v128.h dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt new file mode 100644 index 0000000000000..47eebb9d44a95 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s + +0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xe4,0xfe,0x7e,0x00,0x80,0x00,0x00] + +0xc1,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xe4,0x0a,0x7e] + +0xf0,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xe4,0x0a,0x7e] + +0x7f,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xe4,0x0a,0x7e] + +0x7e,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xe4,0x0a,0x7e] + +0x7d,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7d,0xe4,0x0a,0x7e] + +0x7c,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, null ; encoding: [0x7c,0xe4,0x0a,0x7e] + +0x01,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xe4,0x0a,0x7e] + +0x69,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, s105 ; encoding: [0x69,0xe4,0x0a,0x7e] + +0xfd,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xe4,0x0a,0x7e] + +0x7b,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xe4,0x0a,0x7e] + +0x01,0xe5,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, v1.l ; encoding: [0x01,0xe5,0x0a,0x7e] + +0x7f,0xe5,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, v127.l ; encoding: [0x7f,0xe5,0x0a,0x7e] + +0x6b,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xe4,0x0a,0x7e] + +0x6a,0xe4,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xe4,0x0a,0x7e] + +0x81,0xe5,0x0a,0x7e +# GFX1250: v_cvt_f32_bf16_e32 v5, v1.h ; encoding: [0x81,0xe5,0x0a,0x7e] + +0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00 +# GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00] + +0x02,0xec,0x02,0x7e +# GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e] + +0x02,0xed,0x02,0x7e +# GFX1250-REAL16: v_cvt_pk_f16_bf8 v1, v2.l ; encoding: [0x02,0xed,0x02,0x7e] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e] + +0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00 +# GFX1250: v_cvt_pk_f16_fp8 v1, 0x64 ; encoding: [0xff,0xea,0x02,0x7e,0x64,0x00,0x00,0x00] + +0x02,0xea,0x02,0x7e +# GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e] + +0x02,0xeb,0x02,0x7e +# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v2.l ; encoding: [0x02,0xeb,0x02,0x7e] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt new file mode 100644 index 0000000000000..25e982b7fd688 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt @@ -0,0 +1,64 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s + +0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30 +# GFX1250: v_cvt_f32_bf16_dpp v127, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30] + +0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff] + +0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff] + +0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff] + +0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff] + +0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff] + +0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01 +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01] + +0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff] + +0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff] + +0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13 +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13] + +0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff] + +0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff +# GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xec,0x02,0x7e,0x02,0xe4,0x04,0xff] + +0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xec,0x02,0x7e,0x82,0xe4,0x00,0xff] + +0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff +# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0xfa,0xea,0x02,0x7e,0x02,0xe4,0x04,0xff] + +0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.h quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xea,0x02,0x7e,0x82,0xe4,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt new file mode 100644 index 0000000000000..bd524af907ee0 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt @@ -0,0 +1,31 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s + +0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_dpp v127, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00] + +0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05 +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05] + +0xea,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05 +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05] + +0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05 +# GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05] + +0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05] + +0xe9,0xec,0x02,0x7e,0x82,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xec,0x02,0x7e,0x82,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xec,0x02,0x7e,0x82,0x77,0x39,0x05] + +0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xea,0x02,0x7e,0x02,0x77,0x39,0x05] + +0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_fp8_dpp v1, v2.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_dpp v1, v130/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xea,0x02,0x7e,0x82,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt new file mode 100644 index 0000000000000..70abf4289ac11 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt @@ -0,0 +1,79 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s + +0x05,0x00,0xf2,0xd5,0xc1,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_e64 v5, -1 ; encoding: [0x05,0x00,0xf2,0xd5,0xc1,0x00,0x00,0x00] + +0x05,0x00,0xf2,0xd5,0x7f,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xf2,0xd5,0x7f,0x00,0x00,0x00] + +0x05,0x00,0xf2,0xd5,0x7e,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xf2,0xd5,0x7e,0x00,0x00,0x00] + +0x05,0x00,0xf2,0xd5,0x7d,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_e64 v5, m0 ; encoding: [0x05,0x00,0xf2,0xd5,0x7d,0x00,0x00,0x00] + +0x05,0x00,0xf2,0xd5,0x7c,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_e64 v5, null ; encoding: [0x05,0x00,0xf2,0xd5,0x7c,0x00,0x00,0x00] + +0x05,0x00,0xf2,0xd5,0x01,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_e64 v5, s1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x00,0x00,0x00] + +0x05,0x00,0xf2,0xd5,0x69,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_e64 v5, s105 ; encoding: [0x05,0x00,0xf2,0xd5,0x69,0x00,0x00,0x00] + +0x05,0x00,0xf2,0xd5,0x7b,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xf2,0xd5,0x7b,0x00,0x00,0x00] + +0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_f32_bf16_e64 v5, v1.l ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xf2,0xd5,0x01,0x01,0x00,0x00] + +0x05,0x00,0xf2,0xd5,0xff,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_f32_bf16_e64 v5, v255.l ; encoding: [0x05,0x00,0xf2,0xd5,0xff,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xf2,0xd5,0xff,0x01,0x00,0x00] + +0x05,0x00,0xf2,0xd5,0x6b,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xf2,0xd5,0x6b,0x00,0x00,0x00] + +0x05,0x00,0xf2,0xd5,0x6a,0x00,0x00,0x00 +# GFX1250: v_cvt_f32_bf16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xf2,0xd5,0x6a,0x00,0x00,0x00] + +0x05,0x08,0xf2,0xd5,0x01,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_f32_bf16_e64 v5, v1.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x01,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64 v5, v1 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0x01,0x01,0x00,0x00] + +0x05,0x08,0xf2,0xd5,0xff,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_f32_bf16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xff,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xff,0x01,0x00,0x00] + +0x01,0x08,0xf6,0xd5,0x02,0x00,0x00,0x00 +# GFX1250: v_cvt_pk_f16_bf8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x00,0x00,0x00] + +0x01,0x00,0xf6,0xd5,0x96,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_f16_bf8 v1, v150.l ; encoding: [0x01,0x00,0xf6,0xd5,0x96,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8 v1, v150 ; encoding: [0x01,0x00,0xf6,0xd5,0x96,0x01,0x00,0x00] + +0x01,0x08,0xf6,0xd5,0x96,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_f16_bf8 v1, v150.h op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x96,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x96,0x01,0x00,0x00] + +0x01,0x08,0xf6,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_f16_bf8 v1, v2.h op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0x02,0x01,0x00,0x00] + +0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00 +# GFX1250: v_cvt_pk_f16_fp8 v1, s2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x00,0x00,0x00] + +0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v150.l ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v150 ; encoding: [0x01,0x00,0xf5,0xd5,0x96,0x01,0x00,0x00] + +0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v150.h op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v150 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x96,0x01,0x00,0x00] + +0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00 +# GFX1250-REAL16: v_cvt_pk_f16_fp8 v1, v2.h op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8 v1, v2 op_sel:[1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0x02,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt new file mode 100644 index 0000000000000..d53d532eef804 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt @@ -0,0 +1,67 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-FAKE16 %s + +0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v128 op_sel:[1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0xf2,0xd5,0xfa,0x00,0x00,0x00,0x80,0x1b,0x00,0xff] + +0x01,0x00,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff +# GFX1250-REAL16: v_cvt_pk_f16_bf8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] + +0x01,0x08,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_pk_f16_bf8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8_e64_dpp v1, v128 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf6,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] + +0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff +# GFX1250-REAL16: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_e64_dpp v1, v128 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x04,0xff] + +0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff +# GFX1250-REAL16: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_e64_dpp v1, v128 op_sel:[1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x08,0xf5,0xd5,0xfa,0x00,0x00,0x00,0x80,0xe4,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt new file mode 100644 index 0000000000000..8df21f3f5e4df --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt @@ -0,0 +1,27 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250-FAKE16 %s + +0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_f32_bf16_e64_dpp v5, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_f32_bf16_e64_dpp v5, v128 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0xf2,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] + +0x01,0x00,0xf6,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_bf8_e64_dpp v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8_e64_dpp v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf6,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] + +0x01,0x08,0xf6,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_bf8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_bf8_e64_dpp v1, v128 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf6,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] + +0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_fp8_e64_dpp v1, v128.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_e64_dpp v1, v128 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x01,0x00,0xf5,0xd5,0xea,0x00,0x00,0x00,0x80,0x77,0x39,0x05] + +0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05 +# GFX1250-REAL16: v_cvt_pk_f16_fp8_e64_dpp v1, v128.h op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] +# GFX1250-FAKE16: v_cvt_pk_f16_fp8_e64_dpp v1, v128 op_sel:[1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x08,0xf5,0xd5,0xe9,0x00,0x00,0x00,0x80,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/RISCV/c_lui_disasm.txt b/llvm/test/MC/Disassembler/RISCV/c_lui_disasm.txt index 17889c15cbf95..0a83a200c9317 100644 --- a/llvm/test/MC/Disassembler/RISCV/c_lui_disasm.txt +++ b/llvm/test/MC/Disassembler/RISCV/c_lui_disasm.txt @@ -10,265 +10,197 @@ # RUN: llvm-mc --disassemble -triple=riscv32 -mattr=+c -mattr=+Zcmop \ # RUN: -M no-aliases --show-encoding < %s | \ # RUN: FileCheck --check-prefixes=GOOD,MOP %s -# -# RUN: llvm-mc --disassemble -triple=riscv32 -mattr=+c -mattr=+no-rvc-hints \ -# RUN: -M no-aliases --show-encoding < %s 2>&1 | \ -# RUN: FileCheck --check-prefix=NOHINTS %s # BAD: invalid instruction encoding -# NOHINTS: invalid instruction encoding 0x01 0x60 # GOOD: c.lui zero, 1 -# NOHINTS: invalid instruction encoding 0x05 0x60 # GOOD: c.lui zero, 2 -# NOHINTS: invalid instruction encoding 0x09 0x60 # GOOD: c.lui zero, 3 -# NOHINTS: invalid instruction encoding 0x0D 0x60 # GOOD: c.lui zero, 4 -# NOHINTS: invalid instruction encoding 0x11 0x060 # GOOD: c.lui zero, 5 -# NOHINTS: invalid instruction encoding 0x15 0x60 # GOOD: c.lui zero, 6 -# NOHINTS: invalid instruction encoding 0x19 0x60 # GOOD: c.lui zero, 7 -# NOHINTS: invalid instruction encoding 0x1D 0x60 # GOOD: c.lui zero, 8 -# NOHINTS: invalid instruction encoding 0x21 0x60 # GOOD: c.lui zero, 9 -# NOHINTS: invalid instruction encoding 0x25 0x60 # GOOD: c.lui zero, 10 -# NOHINTS: invalid instruction encoding 0x29 0x60 # GOOD: c.lui zero, 11 -# NOHINTS: invalid instruction encoding 0x2D 0x60 # GOOD: c.lui zero, 12 -# NOHINTS: invalid instruction encoding 0x31 0x60 # GOOD: c.lui zero, 13 -# NOHINTS: invalid instruction encoding 0x35 0x60 # GOOD: c.lui zero, 14 -# NOHINTS: invalid instruction encoding 0x39 0x60 # GOOD: c.lui zero, 15 -# NOHINTS: invalid instruction encoding 0x3D 0x60 # GOOD: c.lui zero, 16 -# NOHINTS: invalid instruction encoding 0x41 0x60 # GOOD: c.lui zero, 17 -# NOHINTS: invalid instruction encoding 0x45 0x60 # GOOD: c.lui zero, 18 -# NOHINTS: invalid instruction encoding 0x49 0x60 # GOOD: c.lui zero, 19 -# NOHINTS: invalid instruction encoding 0x4D 0x60 # GOOD: c.lui zero, 20 -# NOHINTS: invalid instruction encoding 0x51 0x60 # GOOD: c.lui zero, 21 -# NOHINTS: invalid instruction encoding 0x55 0x60 # GOOD: c.lui zero, 22 -# NOHINTS: invalid instruction encoding 0x59 0x60 # GOOD: c.lui zero, 23 -# NOHINTS: invalid instruction encoding 0x5D 0x60 # GOOD: c.lui zero, 24 -# NOHINTS: invalid instruction encoding 0x61 0x60 # GOOD: c.lui zero, 25 -# NOHINTS: invalid instruction encoding 0x65 0x60 # GOOD: c.lui zero, 26 -# NOHINTS: invalid instruction encoding 0x69 0x60 # GOOD: c.lui zero, 27 -# NOHINTS: invalid instruction encoding 0x6D 0x60 # GOOD: c.lui zero, 28 -# NOHINTS: invalid instruction encoding 0x71 0x60 # GOOD: c.lui zero, 29 -# NOHINTS: invalid instruction encoding 0x75 0x60 # GOOD: c.lui zero, 30 -# NOHINTS: invalid instruction encoding 0x79 0x60 # GOOD: c.lui zero, 31 -# NOHINTS: invalid instruction encoding 0x7D 0x60 # GOOD: c.lui zero, 1048544 -# NOHINTS: invalid instruction encoding 0x01 0x70 # GOOD: c.lui zero, 1048545 -# NOHINTS: invalid instruction encoding 0x05 0x70 # GOOD: c.lui zero, 1048546 -# NOHINTS: invalid instruction encoding 0x09 0x70 # GOOD: c.lui zero, 1048547 -# NOHINTS: invalid instruction encoding 0x0D 0x70 # GOOD: c.lui zero, 1048548 -# NOHINTS: invalid instruction encoding 0x11 0x70 # GOOD: c.lui zero, 1048549 -# NOHINTS: invalid instruction encoding 0x15 0x70 # GOOD: c.lui zero, 1048550 -# NOHINTS: invalid instruction encoding 0x19 0x70 # GOOD: c.lui zero, 1048551 -# NOHINTS: invalid instruction encoding 0x1D 0x70 # GOOD: c.lui zero, 1048552 -# NOHINTS: invalid instruction encoding 0x21 0x70 # GOOD: c.lui zero, 1048553 -# NOHINTS: invalid instruction encoding 0x25 0x70 # GOOD: c.lui zero, 1048554 -# NOHINTS: invalid instruction encoding 0x29 0x70 # GOOD: c.lui zero, 1048555 -# NOHINTS: invalid instruction encoding 0x2D 0x70 # GOOD: c.lui zero, 1048556 -# NOHINTS: invalid instruction encoding 0x31 0x70 # GOOD: c.lui zero, 1048557 -# NOHINTS: invalid instruction encoding 0x35 0x70 # GOOD: c.lui zero, 1048558 -# NOHINTS: invalid instruction encoding 0x39 0x70 # GOOD: c.lui zero, 1048559 -# NOHINTS: invalid instruction encoding 0x3D 0x70 # GOOD: c.lui zero, 1048560 -# NOHINTS: invalid instruction encoding 0x41 0x70 # GOOD: c.lui zero, 1048561 -# NOHINTS: invalid instruction encoding 0x45 0x70 # GOOD: c.lui zero, 1048562 -# NOHINTS: invalid instruction encoding 0x49 0x70 # GOOD: c.lui zero, 1048563 -# NOHINTS: invalid instruction encoding 0x4D 0x70 # GOOD: c.lui zero, 1048564 -# NOHINTS: invalid instruction encoding 0x51 0x70 # GOOD: c.lui zero, 1048565 -# NOHINTS: invalid instruction encoding 0x55 0x70 # GOOD: c.lui zero, 1048566 -# NOHINTS: invalid instruction encoding 0x59 0x70 # GOOD: c.lui zero, 1048567 -# NOHINTS: invalid instruction encoding 0x5D 0x70 # GOOD: c.lui zero, 1048568 -# NOHINTS: invalid instruction encoding 0x61 0x70 # GOOD: c.lui zero, 1048569 -# NOHINTS: invalid instruction encoding 0x65 0x70 # GOOD: c.lui zero, 1048570 -# NOHINTS: invalid instruction encoding 0x69 0x70 # GOOD: c.lui zero, 1048571 -# NOHINTS: invalid instruction encoding 0x6D 0x70 # GOOD: c.lui zero, 1048572 -# NOHINTS: invalid instruction encoding 0x71 0x70 # GOOD: c.lui zero, 1048573 -# NOHINTS: invalid instruction encoding 0x75 0x70 # GOOD: c.lui zero, 1048574 -# NOHINTS: invalid instruction encoding 0x79 0x70 # GOOD: c.lui zero, 1048575 -# NOHINTS: invalid instruction encoding 0x7D 0x70 # BAD: invalid instruction encoding diff --git a/llvm/test/MC/RISCV/rv32c-invalid.s b/llvm/test/MC/RISCV/rv32c-invalid.s index 8dddbf887c87c..413573af1c5e6 100644 --- a/llvm/test/MC/RISCV/rv32c-invalid.s +++ b/llvm/test/MC/RISCV/rv32c-invalid.s @@ -1,6 +1,6 @@ -# RUN: not llvm-mc -triple=riscv32 -mattr=+c -mattr=+no-rvc-hints < %s 2>&1 \ +# RUN: not llvm-mc -triple=riscv32 -mattr=+c < %s 2>&1 \ # RUN: | FileCheck %s -# RUN: not llvm-mc -triple=riscv32 -mattr=+zca -mattr=+no-rvc-hints < %s 2>&1 \ +# RUN: not llvm-mc -triple=riscv32 -mattr=+zca < %s 2>&1 \ # RUN: | FileCheck %s ## GPRC @@ -23,16 +23,10 @@ c.lwsp x0, 4(sp) # CHECK: :[[@LINE]]:9: error: register must be a GPR excluding c.lwsp zero, 4(sp) # CHECK: :[[@LINE]]:9: error: register must be a GPR excluding zero (x0) c.jr x0 # CHECK: :[[@LINE]]:7: error: register must be a GPR excluding zero (x0) c.jalr zero # CHECK: :[[@LINE]]:9: error: register must be a GPR excluding zero (x0) -c.addi x0, x0, 1 # CHECK: :[[@LINE]]:13: error: immediate must be zero -c.li zero, 2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RVC Hint Instructions{{$}} -c.slli zero, zero, 4 # CHECK: :[[@LINE]]:15: error: invalid operand for instruction -c.mv zero, s0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RVC Hint Instructions{{$}} c.mv ra, x0 # CHECK: :[[@LINE]]:11: error: register must be a GPR excluding zero (x0) c.add ra, ra, x0 # CHECK: :[[@LINE]]:16: error: invalid operand for instruction -c.add zero, zero, sp # CHECK: :[[@LINE]]:14: error: invalid operand for instruction ## GPRNoX0X2 -c.lui x0, 4 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RVC Hint Instructions{{$}} c.lui x2, 4 # CHECK: :[[@LINE]]:7: error: register must be a GPR excluding zero (x0) and sp (x2){{$}} ## SP @@ -57,7 +51,6 @@ c.andi a0, %lo(foo) # CHECK: :[[@LINE]]:12: error: immediate must be an integer c.andi a0, %hi(foo) # CHECK: :[[@LINE]]:12: error: immediate must be an integer in the range [-32, 31] ## simm6nonzero -c.addi t0, 0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RVC Hint Instructions{{$}} c.addi t0, -33 # CHECK: :[[@LINE]]:12: error: immediate must be non-zero in the range [-32, 31] c.addi t0, 32 # CHECK: :[[@LINE]]:12: error: immediate must be non-zero in the range [-32, 31] c.addi t0, foo # CHECK: :[[@LINE]]:12: error: immediate must be non-zero in the range [-32, 31] diff --git a/llvm/test/MC/X86/gotpcrel-non-globals.ll b/llvm/test/MC/X86/gotpcrel-non-globals.ll new file mode 100644 index 0000000000000..222d2d73ff728 --- /dev/null +++ b/llvm/test/MC/X86/gotpcrel-non-globals.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; Check that we emit the `@bar_*` symbols, and that we don't emit multiple symbols. + +; CHECK-LABEL: .Lrel_0: +; CHECK: .long foo_0@GOTPCREL+0 +; CHECK-LABEL: .Lrel_1_failed: +; CHECK: .long bar_1-foo_0 +; CHECK-LABEL: .Lrel_2: +; CHECK: .long foo_2@GOTPCREL+0 + +; CHECK: bar_0: +; CHECK: bar_1: +; CHECK: bar_2_indirect: + +@rel_0 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr @bar_0 to i64), i64 ptrtoint (ptr @rel_0 to i64)) to i32)] +@rel_1_failed = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr @bar_1 to i64), i64 ptrtoint (ptr @foo_0 to i64)) to i32)] +@rel_2 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr @bar_2_indirect to i64), i64 ptrtoint (ptr @rel_2 to i64)) to i32)] +@bar_0 = internal unnamed_addr constant ptr @foo_0, align 8 +@bar_1 = internal unnamed_addr constant ptr @foo_1, align 8 +@bar_2_indirect = internal unnamed_addr constant ptr @foo_2, align 8 +@foo_0 = external global ptr, align 8 +@foo_1 = external global ptr, align 8 +@foo_2 = external global ptr, align 8 + +define void @foo(ptr %arg0, ptr %arg1) { + store ptr @bar_0, ptr %arg0, align 8 + store ptr @bar_1, ptr %arg1, align 8 + store ptr getelementptr (i8, ptr @bar_2_indirect, i32 1), ptr %arg1, align 8 + ret void +} diff --git a/llvm/test/TableGen/CompressWriteLatencyEntry.td b/llvm/test/TableGen/CompressWriteLatencyEntry.td index 88273e8858448..d6a9f0ac0dd76 100644 --- a/llvm/test/TableGen/CompressWriteLatencyEntry.td +++ b/llvm/test/TableGen/CompressWriteLatencyEntry.td @@ -33,10 +33,10 @@ def Read_D : SchedRead; // CHECK-NEXT: }; // MyTargetReadAdvanceTable // CHECK: static const llvm::MCSchedClassDesc SchedModel_ASchedClasses[] = { -// CHECK-NEXT: {DBGFIELD("InvalidSchedClass") 8191, false, false, false, 0, 0, 0, 0, 0, 0}, -// CHECK-NEXT: {DBGFIELD("Inst_A") 1, false, false, false, 0, 0, 1, 1, 0, 0}, // #1 -// CHECK-NEXT: {DBGFIELD("Inst_B") 1, false, false, false, 0, 0, 2, 1, 0, 0}, // #2 -// CHECK-NEXT: {DBGFIELD("Inst_C") 1, false, false, false, 0, 0, 1, 1, 1, 1}, // #3 +// CHECK-NEXT: {DBGFIELD(1) 8191, false, false, false, 0, 0, 0, 0, 0, 0}, +// CHECK-NEXT: {DBGFIELD(/*Inst_A*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 1, 1, 0, 0}, // #1 +// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 2, 1, 0, 0}, // #2 +// CHECK-NEXT: {DBGFIELD(/*Inst_C*/ {{[0-9]+}}) 1, false, false, false, 0, 0, 1, 1, 1, 1}, // #3 // CHECK-NEXT: }; // SchedModel_ASchedClasses let SchedModel = SchedModel_A in { diff --git a/llvm/test/TableGen/InvalidMCSchedClassDesc.td b/llvm/test/TableGen/InvalidMCSchedClassDesc.td index de5392237a84c..e43edd4174589 100644 --- a/llvm/test/TableGen/InvalidMCSchedClassDesc.td +++ b/llvm/test/TableGen/InvalidMCSchedClassDesc.td @@ -1,13 +1,13 @@ // RUN: llvm-tblgen -gen-subtarget -I %p/../../include %s 2>&1 | FileCheck %s -// Check if it is valid MCSchedClassDesc if didn't have the resources. +// Check if it is valid MCSchedClassDesc if didn't have the resources. include "llvm/Target/Target.td" def MyTarget : Target; let OutOperandList = (outs), InOperandList = (ins) in { - def Inst_A : Instruction; - def Inst_B : Instruction; + def Inst_A : Instruction; + def Inst_B : Instruction; } let CompleteModel = 0 in { @@ -18,8 +18,8 @@ let CompleteModel = 0 in { // Inst_B didn't have the resoures, and it is invalid. // CHECK: SchedModel_ASchedClasses[] = { -// CHECK: {DBGFIELD("Inst_A") 1 -// CHECK-NEXT: {DBGFIELD("Inst_B") 8191 +// CHECK: {DBGFIELD(/*Inst_A*/ 19) 1 +// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ 26) 8191 let SchedModel = SchedModel_A in { def Write_A : SchedWriteRes<[]>; def : InstRW<[Write_A], (instrs Inst_A)>; @@ -27,18 +27,18 @@ let SchedModel = SchedModel_A in { // Inst_A didn't have the resoures, and it is invalid. // CHECK: SchedModel_BSchedClasses[] = { -// CHECK: {DBGFIELD("Inst_A") 8191 -// CHECK-NEXT: {DBGFIELD("Inst_B") 1 +// CHECK: {DBGFIELD(/*Inst_A*/ 19) 8191 +// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ 26) 1 let SchedModel = SchedModel_B in { - def Write_B: SchedWriteRes<[]>; + def Write_B: SchedWriteRes<[]>; def : InstRW<[Write_B], (instrs Inst_B)>; } // CHECK: SchedModel_CSchedClasses[] = { -// CHECK: {DBGFIELD("Inst_A") 1 -// CHECK-NEXT: {DBGFIELD("Inst_B") 1 +// CHECK: {DBGFIELD(/*Inst_A*/ 19) 1 +// CHECK-NEXT: {DBGFIELD(/*Inst_B*/ 26) 1 let SchedModel = SchedModel_C in { - def Write_C: SchedWriteRes<[]>; + def Write_C: SchedWriteRes<[]>; def : InstRW<[Write_C], (instrs Inst_A, Inst_B)>; } diff --git a/llvm/test/Transforms/FunctionAttrs/initializes.ll b/llvm/test/Transforms/FunctionAttrs/initializes.ll index 937595b5e9b74..5800bc1ca7864 100644 --- a/llvm/test/Transforms/FunctionAttrs/initializes.ll +++ b/llvm/test/Transforms/FunctionAttrs/initializes.ll @@ -443,7 +443,7 @@ define void @memset_neg(ptr %p) { } define void @memset_volatile(ptr %p) { -; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: write) +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: write, inaccessiblemem: readwrite) ; CHECK-LABEL: define void @memset_volatile( ; CHECK-SAME: ptr writeonly [[P:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 9, i1 true) @@ -478,7 +478,7 @@ define void @memcpy(ptr %p, ptr %p2) { } define void @memcpy_volatile(ptr %p, ptr %p2) { -; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define void @memcpy_volatile( ; CHECK-SAME: ptr writeonly [[P:%.*]], ptr readonly [[P2:%.*]]) #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 9, i1 true) @@ -541,7 +541,7 @@ define void @memmove(ptr %p, ptr %p2) { } define void @memmove_volatile(ptr %p, ptr %p2) { -; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define void @memmove_volatile( ; CHECK-SAME: ptr writeonly [[P:%.*]], ptr readonly [[P2:%.*]]) #[[ATTR6]] { ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[P]], ptr [[P2]], i64 9, i1 true) diff --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll index de5398f17ce51..9abfbb21a71a0 100644 --- a/llvm/test/Transforms/FunctionAttrs/nosync.ll +++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll @@ -236,7 +236,7 @@ declare void @llvm.memset(ptr %dest, i8 %val, i32 %len, i1 %isvolatile) ; negative, checking volatile intrinsics. define i32 @memcpy_volatile(ptr %ptr1, ptr %ptr2) { -; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: @memcpy_volatile( ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], i32 8, i1 true) ; CHECK-NEXT: ret i32 4 diff --git a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll index 8bc71148352d2..5bc733f5622c7 100644 --- a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll +++ b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 -debug < %s 2>&1 | FileCheck %s +; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 < %s 2>&1 | FileCheck %s ; RUN: opt -passes='function(instcombine),hotcoldsplit,function(instsimplify)' %s -o /dev/null target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/atan-intrinsic.ll b/llvm/test/Transforms/InstSimplify/ConstProp/atan-intrinsic.ll index c5c17d65524c2..d824d6d35643d 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/atan-intrinsic.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/atan-intrinsic.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -passes=instsimplify < %s | FileCheck %s -; XFAIL: target={{.*}}-aix{{.*}} define double @test_atan_0() { ; CHECK-LABEL: define double @test_atan_0() { diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/calls.ll b/llvm/test/Transforms/InstSimplify/ConstProp/calls.ll index 61a30c781c0f4..26fb8c0d7a1c6 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/calls.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/calls.ll @@ -202,5 +202,17 @@ entry: ret float %0 } +define float @test_atan_negzero() nounwind uwtable ssp { +entry: +; CHECK-LABEL: @test_atan_negzero( +; CHECK: ret float -0.000000e+00 +; +; FNOBUILTIN-LABEL: @test_atan_negzero( +; FNOBUILTIN: ret float -0.000000e+00 +; + %1 = call float @atanf(float -0.0) + ret float %1 +} + declare double @llvm.pow.f64(double, double) nounwind readonly declare float @llvm.pow.f32(float, float) nounwind readonly diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll index 61a68692ff5b9..c565066541d1d 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll @@ -16,17 +16,13 @@ define void @deinterleave4(ptr %src) { ; %load = load , ptr %src, align 4 - %deinterleave_src = tail call { , } @llvm.vector.deinterleave2.nxv16i32( %load) - %3 = extractvalue { , } %deinterleave_src, 0 - %4 = extractvalue { , } %deinterleave_src, 1 - %deinterleave_half1 = tail call { , } @llvm.vector.deinterleave2.nxv8i32( %3) - %5 = extractvalue { , } %deinterleave_half1, 0 - %6 = extractvalue { , } %deinterleave_half1, 1 - %deinterleave_half2 = tail call { , } @llvm.vector.deinterleave2.nxv8i32( %4) - %7 = extractvalue { , } %deinterleave_half2, 0 - %8 = extractvalue { , } %deinterleave_half2, 1 - %sum = add %5, %7 - %sub = sub %6, %8 + %deinterleave = tail call { , , , } @llvm.vector.deinterleave4.nxv16i32( %load) + %1 = extractvalue { , , , } %deinterleave, 0 + %2 = extractvalue { , , , } %deinterleave, 1 + %3 = extractvalue { , , , } %deinterleave, 2 + %4 = extractvalue { , , , } %deinterleave, 3 + %sum = add %1, %2 + %sub = sub %3, %4 ret void } @@ -58,17 +54,13 @@ define void @wide_deinterleave4(ptr %src) { ; CHECK-NEXT: ret void ; %load = load , ptr %src, align 4 - %deinterleave_src = tail call { , } @llvm.vector.deinterleave2.nxv32i32( %load) - %3 = extractvalue { , } %deinterleave_src, 0 - %4 = extractvalue { , } %deinterleave_src, 1 - %deinterleave_half1 = tail call { , } @llvm.vector.deinterleave2.nxv16i32( %3) - %5 = extractvalue { , } %deinterleave_half1, 0 - %6 = extractvalue { , } %deinterleave_half1, 1 - %deinterleave_half2 = tail call { , } @llvm.vector.deinterleave2.nxv16i32( %4) - %7 = extractvalue { , } %deinterleave_half2, 0 - %8 = extractvalue { , } %deinterleave_half2, 1 - %sum = add %5, %7 - %sub = sub %6, %8 + %deinterleave = tail call { , , , } @llvm.vector.deinterleave4.nxv32i32( %load) + %1 = extractvalue { , , , } %deinterleave, 0 + %2 = extractvalue { , , , } %deinterleave, 1 + %3 = extractvalue { , , , } %deinterleave, 2 + %4 = extractvalue { , , , } %deinterleave, 3 + %sum = add %1, %2 + %sub = sub %3, %4 ret void } @@ -87,52 +79,36 @@ define void @mix_deinterleave4_deinterleave2(ptr %src) { ; %load = load , ptr %src, align 4 - %deinterleave_src = tail call { , } @llvm.vector.deinterleave2.nxv16i32( %load) - %3 = extractvalue { , } %deinterleave_src, 0 - %4 = extractvalue { , } %deinterleave_src, 1 - %deinterleave_half1 = tail call { , } @llvm.vector.deinterleave2.nxv8i32( %3) - %5 = extractvalue { , } %deinterleave_half1, 0 - %6 = extractvalue { , } %deinterleave_half1, 1 - %deinterleave_half2 = tail call { , } @llvm.vector.deinterleave2.nxv8i32( %4) - %7 = extractvalue { , } %deinterleave_half2, 0 - %8 = extractvalue { , } %deinterleave_half2, 1 + %deinterleave = tail call { , , , } @llvm.vector.deinterleave4.nxv16i32( %load) + %1 = extractvalue { , , , } %deinterleave, 0 + %2 = extractvalue { , , , } %deinterleave, 1 + %3 = extractvalue { , , , } %deinterleave, 2 + %4 = extractvalue { , , , } %deinterleave, 3 %load2 = load , ptr %src, align 4 - %deinterleave_src2 = tail call { , } @llvm.vector.deinterleave2.nxv16i32( %load2) - %ld2_1 = extractvalue { , } %deinterleave_src2, 0 - %ld2_2 = extractvalue { , } %deinterleave_src2, 1 + %deinterleave2 = tail call { , } @llvm.vector.deinterleave2.nxv16i32( %load2) + %ld2_1 = extractvalue { , } %deinterleave2, 0 + %ld2_2 = extractvalue { , } %deinterleave2, 1 ret void } define void @negative_deinterleave4_test(ptr %src) { ; CHECK-LABEL: define void @negative_deinterleave4_test ; CHECK-SAME: (ptr [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[SRC]], i64 0 -; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( splat (i1 true), ptr [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[LDN]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[LDN]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr , ptr [[SRC]], i64 2 -; CHECK-NEXT: [[LDN1:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( splat (i1 true), ptr [[TMP6]]) -; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[LDN1]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP7]], i64 4) -; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , } [[LDN1]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP9]], i64 4) -; CHECK-NEXT: [[DEINTERLEAVE_HALF1:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv8i32( [[TMP8]]) -; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[DEINTERLEAVE_HALF1]], 0 -; CHECK-NEXT: [[DEINTERLEAVE_HALF2:%.*]] = tail call { , } @llvm.vector.deinterleave2.nxv8i32( [[TMP10]]) -; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , } [[DEINTERLEAVE_HALF2]], 1 +; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[SRC]], align 4 +; CHECK-NEXT: [[DEINTERLEAVE:%.*]] = tail call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[LOAD]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[DEINTERLEAVE]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[DEINTERLEAVE]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[DEINTERLEAVE]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[DEINTERLEAVE]], 2 ; CHECK-NEXT: ret void ; %load = load , ptr %src, align 4 - %deinterleave_src = tail call { , } @llvm.vector.deinterleave2.nxv16i32( %load) - %3 = extractvalue { , } %deinterleave_src, 0 - %4 = extractvalue { , } %deinterleave_src, 1 - %deinterleave_half1 = tail call { , } @llvm.vector.deinterleave2.nxv8i32( %3) - %5 = extractvalue { , } %deinterleave_half1, 0 - %deinterleave_half2 = tail call { , } @llvm.vector.deinterleave2.nxv8i32( %4) - %6 = extractvalue { , } %deinterleave_half2, 1 + %deinterleave = tail call { , , , } @llvm.vector.deinterleave4.nxv16i32( %load) + %1 = extractvalue { , , , } %deinterleave, 0 + %2 = extractvalue { , , , } %deinterleave, 1 + %3 = extractvalue { , , , } %deinterleave, 2 + %4 = extractvalue { , , , } %deinterleave, 2 ret void } diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleave4.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleave4.ll index 085089978d8f5..a61db6577d56d 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleave4.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleave4.ll @@ -8,9 +8,7 @@ define void @interleave4(ptr %dst, %a, %b, ; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32( [[A]], [[B]], [[C]], [[D]], splat (i1 true), ptr [[DST]]) ; CHECK-NEXT: ret void ; - %interleaved.half1 = tail call @llvm.vector.interleave2.nxv8i32( %a, %c) - %interleaved.half2 = tail call @llvm.vector.interleave2.nxv8i32( %b, %d) - %interleaved.vec = tail call @llvm.vector.interleave2.nxv16i32( %interleaved.half1, %interleaved.half2) + %interleaved.vec = tail call @llvm.vector.interleave4.nxv16i32( %a, %b, %c, %d) store %interleaved.vec, ptr %dst, align 4 ret void } @@ -32,9 +30,7 @@ define void @wide_interleave4(ptr %dst, %a, [[TMP7]], [[TMP8]], [[TMP9]], [[TMP10]], splat (i1 true), ptr [[TMP6]]) ; CHECK-NEXT: ret void ; - %interleaved.half1 = tail call @llvm.vector.interleave2.nxv16i32( %a, %c) - %interleaved.half2 = tail call @llvm.vector.interleave2.nxv16i32( %b, %d) - %interleaved.vec = tail call @llvm.vector.interleave2.nxv32i32( %interleaved.half1, %interleaved.half2) + %interleaved.vec = tail call @llvm.vector.interleave4.nxv32i32( %a, %b, %c, %d) store %interleaved.vec, ptr %dst, align 4 ret void } @@ -46,9 +42,7 @@ define void @mix_interleave4_interleave2(ptr %dst1, ptr %dst2, [[A]], [[C]], splat (i1 true), ptr [[DST2]]) ; CHECK-NEXT: ret void ; - %interleaved.half1 = tail call @llvm.vector.interleave2.nxv8i32( %a, %c) - %interleaved.half2 = tail call @llvm.vector.interleave2.nxv8i32( %b, %d) - %interleaved.vec = tail call @llvm.vector.interleave2.nxv16i32( %interleaved.half1, %interleaved.half2) + %interleaved.vec = tail call @llvm.vector.interleave4.nxv16i32( %a, %b, %c, %d) store %interleaved.vec, ptr %dst1, align 4 %interleaved = tail call @llvm.vector.interleave2.nxv8i32( %a, %c) @@ -64,8 +58,7 @@ define void @duplicate_by_interleave( %A, % ; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv4i32( [[A]], [[A]], [[B]], [[B]], splat (i1 true), ptr [[AB_DUPLICATE]]) ; CHECK-NEXT: ret void ; - %interleave = tail call @llvm.vector.interleave2.nxv8i32( %A, %B) - %duplicate_by_interleave = tail call @llvm.vector.interleave2.nxv16i32( %interleave, %interleave) - store %duplicate_by_interleave, ptr %AB_duplicate, align 4 + %interleave = tail call @llvm.vector.interleave4.nxv16i32( %A, %A, %B, %B) + store %interleave, ptr %AB_duplicate, align 4 ret void } diff --git a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll index 87b16d17aa5f0..72c1f22032bb7 100644 --- a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll @@ -175,53 +175,6 @@ define void @load_factor4_vscale(ptr %ptr) { ret void } -; TODO: Remove once recursive deinterleaving support is removed -define void @load_factor4_vscale_recursive(ptr %ptr) { -; RV32-LABEL: @load_factor4_vscale_recursive( -; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t.p0.i32(target("riscv.vector.tuple", , 4) poison, ptr [[PTR:%.*]], i32 -1, i32 5) -; RV32-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 0) -; RV32-NEXT: [[TMP3:%.*]] = insertvalue { , , , } poison, [[TMP2]], 0 -; RV32-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 1) -; RV32-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP3]], [[TMP4]], 1 -; RV32-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 2) -; RV32-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 2 -; RV32-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 3) -; RV32-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 3 -; RV32-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP9]], 0 -; RV32-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP9]], 1 -; RV32-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP9]], 2 -; RV32-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP9]], 3 -; RV32-NEXT: ret void -; -; RV64-LABEL: @load_factor4_vscale_recursive( -; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t.p0.i64(target("riscv.vector.tuple", , 4) poison, ptr [[PTR:%.*]], i64 -1, i64 5) -; RV64-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 0) -; RV64-NEXT: [[TMP3:%.*]] = insertvalue { , , , } poison, [[TMP2]], 0 -; RV64-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 1) -; RV64-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP3]], [[TMP4]], 1 -; RV64-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 2) -; RV64-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 2 -; RV64-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 3) -; RV64-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 3 -; RV64-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP9]], 0 -; RV64-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP9]], 1 -; RV64-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP9]], 2 -; RV64-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP9]], 3 -; RV64-NEXT: ret void -; - %interleaved.vec = load , ptr %ptr - %d0 = call { , } @llvm.vector.deinterleave2.nxv16i32( %interleaved.vec) - %d0.0 = extractvalue { , } %d0, 0 - %d0.1 = extractvalue { , } %d0, 1 - %d1 = call { , } @llvm.vector.deinterleave2.nxv8i32( %d0.0) - %t0 = extractvalue { , } %d1, 0 - %t1 = extractvalue { , } %d1, 1 - %d2 = call { , } @llvm.vector.deinterleave2.nxv8i32( %d0.1) - %t2 = extractvalue { , } %d2, 0 - %t3 = extractvalue { , } %d2, 1 - ret void -} - define void @load_factor5(ptr %ptr) { ; RV32-LABEL: @load_factor5( ; RV32-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg5.load.mask.v4i32.p0.i32(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4) @@ -590,91 +543,6 @@ define void @load_factor8_vscale(ptr %ptr) { ret void } -; TODO: Remove once recursive deinterleaving support is removed -define void @load_factor8_vscale_recursive(ptr %ptr) { -; RV32-LABEL: @load_factor8_vscale_recursive( -; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t.p0.i32(target("riscv.vector.tuple", , 8) poison, ptr [[PTR:%.*]], i32 -1, i32 5) -; RV32-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 0) -; RV32-NEXT: [[TMP3:%.*]] = insertvalue { , , , , , , , } poison, [[TMP2]], 0 -; RV32-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 1) -; RV32-NEXT: [[TMP5:%.*]] = insertvalue { , , , , , , , } [[TMP3]], [[TMP4]], 1 -; RV32-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 2) -; RV32-NEXT: [[TMP7:%.*]] = insertvalue { , , , , , , , } [[TMP5]], [[TMP6]], 2 -; RV32-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 3) -; RV32-NEXT: [[TMP9:%.*]] = insertvalue { , , , , , , , } [[TMP7]], [[TMP8]], 3 -; RV32-NEXT: [[TMP10:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 4) -; RV32-NEXT: [[TMP11:%.*]] = insertvalue { , , , , , , , } [[TMP9]], [[TMP10]], 4 -; RV32-NEXT: [[TMP12:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 5) -; RV32-NEXT: [[TMP13:%.*]] = insertvalue { , , , , , , , } [[TMP11]], [[TMP12]], 5 -; RV32-NEXT: [[TMP14:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 6) -; RV32-NEXT: [[TMP15:%.*]] = insertvalue { , , , , , , , } [[TMP13]], [[TMP14]], 6 -; RV32-NEXT: [[TMP16:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 7) -; RV32-NEXT: [[TMP17:%.*]] = insertvalue { , , , , , , , } [[TMP15]], [[TMP16]], 7 -; RV32-NEXT: [[TMP18:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 0 -; RV32-NEXT: [[TMP19:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 1 -; RV32-NEXT: [[TMP20:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 2 -; RV32-NEXT: [[TMP21:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 3 -; RV32-NEXT: [[TMP22:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 4 -; RV32-NEXT: [[TMP23:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 5 -; RV32-NEXT: [[TMP24:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 6 -; RV32-NEXT: [[TMP25:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 7 -; RV32-NEXT: ret void -; -; RV64-LABEL: @load_factor8_vscale_recursive( -; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", , 8) poison, ptr [[PTR:%.*]], i64 -1, i64 5) -; RV64-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 0) -; RV64-NEXT: [[TMP3:%.*]] = insertvalue { , , , , , , , } poison, [[TMP2]], 0 -; RV64-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 1) -; RV64-NEXT: [[TMP5:%.*]] = insertvalue { , , , , , , , } [[TMP3]], [[TMP4]], 1 -; RV64-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 2) -; RV64-NEXT: [[TMP7:%.*]] = insertvalue { , , , , , , , } [[TMP5]], [[TMP6]], 2 -; RV64-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 3) -; RV64-NEXT: [[TMP9:%.*]] = insertvalue { , , , , , , , } [[TMP7]], [[TMP8]], 3 -; RV64-NEXT: [[TMP10:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 4) -; RV64-NEXT: [[TMP11:%.*]] = insertvalue { , , , , , , , } [[TMP9]], [[TMP10]], 4 -; RV64-NEXT: [[TMP12:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 5) -; RV64-NEXT: [[TMP13:%.*]] = insertvalue { , , , , , , , } [[TMP11]], [[TMP12]], 5 -; RV64-NEXT: [[TMP14:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 6) -; RV64-NEXT: [[TMP15:%.*]] = insertvalue { , , , , , , , } [[TMP13]], [[TMP14]], 6 -; RV64-NEXT: [[TMP16:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 7) -; RV64-NEXT: [[TMP17:%.*]] = insertvalue { , , , , , , , } [[TMP15]], [[TMP16]], 7 -; RV64-NEXT: [[TMP18:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 0 -; RV64-NEXT: [[TMP19:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 1 -; RV64-NEXT: [[TMP20:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 2 -; RV64-NEXT: [[TMP21:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 3 -; RV64-NEXT: [[TMP22:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 4 -; RV64-NEXT: [[TMP23:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 5 -; RV64-NEXT: [[TMP24:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 6 -; RV64-NEXT: [[TMP25:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 7 -; RV64-NEXT: ret void -; - %interleaved.vec = load , ptr %ptr - %d0 = call { , } @llvm.vector.deinterleave2.nxv16i32( %interleaved.vec) - %d0.0 = extractvalue { , } %d0, 0 - %d0.1 = extractvalue { , } %d0, 1 - - %d1 = call { , } @llvm.vector.deinterleave2.nxv8i32( %d0.0) - %d1.0 = extractvalue { , } %d1, 0 - %d1.1 = extractvalue { , } %d1, 1 - %d2 = call { , } @llvm.vector.deinterleave2.nxv8i32( %d0.1) - %d2.0 = extractvalue { , } %d2, 0 - %d2.1 = extractvalue { , } %d2, 1 - - %d3 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d1.0) - %t0 = extractvalue { , } %d3, 0 - %t1 = extractvalue { , } %d3, 1 - %d4 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d1.1) - %t2 = extractvalue { , } %d4, 0 - %t3 = extractvalue { , } %d4, 1 - %d5 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d2.0) - %t4 = extractvalue { , } %d5, 0 - %t5 = extractvalue { , } %d5, 1 - %d6 = call { , } @llvm.vector.deinterleave2.nxv4i32( %d2.1) - %t6 = extractvalue { , } %d6, 0 - %t7 = extractvalue { , } %d6, 1 - ret void -} - define void @store_factor2(ptr %ptr, <8 x i8> %v0, <8 x i8> %v1) { ; RV32-LABEL: @store_factor2( @@ -808,31 +676,6 @@ define void @store_factor4_vscale(ptr %ptr, %v0, %v0, %v1) { -; RV32-LABEL: @store_factor4_vscale_recursive( -; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) poison, [[V0:%.*]], i32 0) -; RV32-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP1]], [[V0]], i32 1) -; RV32-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP2]], [[V1:%.*]], i32 2) -; RV32-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP3]], [[V1]], i32 3) -; RV32-NEXT: call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv8i8_4t.p0.i32(target("riscv.vector.tuple", , 4) [[TMP4]], ptr [[PTR:%.*]], i32 -1, i32 3) -; RV32-NEXT: ret void -; -; RV64-LABEL: @store_factor4_vscale_recursive( -; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) poison, [[V0:%.*]], i32 0) -; RV64-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP1]], [[V0]], i32 1) -; RV64-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP2]], [[V1:%.*]], i32 2) -; RV64-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP3]], [[V1]], i32 3) -; RV64-NEXT: call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv8i8_4t.p0.i64(target("riscv.vector.tuple", , 4) [[TMP4]], ptr [[PTR:%.*]], i64 -1, i64 3) -; RV64-NEXT: ret void -; - %i0 = call @llvm.vector.interleave2.nxv8i8( %v0, %v1) - %i1 = call @llvm.vector.interleave2.nxv8i8( %v0, %v1) - %i2 = call @llvm.vector.interleave2.nxv16i8( %i0, %i1) - store %i2, ptr %ptr, align 4 - ret void -} - define void @store_factor5_vscale(ptr %ptr, %v0, %v1, %v2, %v3, %v4) { ; RV32-LABEL: @store_factor5_vscale( ; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_5t.nxv8i8(target("riscv.vector.tuple", , 5) poison, [[V0:%.*]], i32 0) @@ -1013,45 +856,6 @@ define void @store_factor8_vscale(ptr %ptr, %v0, %v0, %v1, %v2, %v3) { -; RV32-LABEL: @store_factor8_vscale_recursive( -; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) poison, [[V0:%.*]], i32 0) -; RV32-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP1]], [[V2:%.*]], i32 1) -; RV32-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP2]], [[V0]], i32 2) -; RV32-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP3]], [[V2]], i32 3) -; RV32-NEXT: [[TMP5:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP4]], [[V1:%.*]], i32 4) -; RV32-NEXT: [[TMP6:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP5]], [[V3:%.*]], i32 5) -; RV32-NEXT: [[TMP7:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP6]], [[V1]], i32 6) -; RV32-NEXT: [[TMP8:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP7]], [[V3]], i32 7) -; RV32-NEXT: call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i32(target("riscv.vector.tuple", , 8) [[TMP8]], ptr [[PTR:%.*]], i32 -1, i32 3) -; RV32-NEXT: ret void -; -; RV64-LABEL: @store_factor8_vscale_recursive( -; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) poison, [[V0:%.*]], i32 0) -; RV64-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP1]], [[V2:%.*]], i32 1) -; RV64-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP2]], [[V0]], i32 2) -; RV64-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP3]], [[V2]], i32 3) -; RV64-NEXT: [[TMP5:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP4]], [[V1:%.*]], i32 4) -; RV64-NEXT: [[TMP6:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP5]], [[V3:%.*]], i32 5) -; RV64-NEXT: [[TMP7:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP6]], [[V1]], i32 6) -; RV64-NEXT: [[TMP8:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP7]], [[V3]], i32 7) -; RV64-NEXT: call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", , 8) [[TMP8]], ptr [[PTR:%.*]], i64 -1, i64 3) -; RV64-NEXT: ret void -; - %i0 = call @llvm.vector.interleave2.nxv8i8( %v0, %v1) - %i1 = call @llvm.vector.interleave2.nxv8i8( %v0, %v1) - %i2 = call @llvm.vector.interleave2.nxv16i8( %i0, %i1) - - %i3 = call @llvm.vector.interleave2.nxv8i8( %v2, %v3) - %i4 = call @llvm.vector.interleave2.nxv8i8( %v2, %v3) - %i5 = call @llvm.vector.interleave2.nxv16i8( %i3, %i4) - - %i6 = call @llvm.vector.interleave2.nxv32i8( %i2, %i5) - store %i6, ptr %ptr, align 4 - ret void -} - define void @load_factor2_fp128(ptr %ptr) { ; RV32-LABEL: @load_factor2_fp128( ; RV32-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x fp128>, ptr [[PTR:%.*]], align 16 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index b23b0ce759d49..dd8b7d6ea7e42 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -274,7 +274,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:for.body.preheader +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: for.body.preheader ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: for.body.preheader: ; preds = %entry ; CHECK-NEXT: %0 = zext i32 %n to i64 @@ -282,7 +282,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %2 = mul nuw i64 %1, 4 ; CHECK-NEXT: %min.iters.check = icmp ult i64 %0, %2 ; CHECK-NEXT: br i1 %min.iters.check, label %scalar.ph, label %vector.scevcheck -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:vector.scevcheck +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: vector.scevcheck ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: vector.scevcheck: ; preds = %for.body.preheader ; CHECK-NEXT: %3 = add nsw i64 %0, -1 @@ -297,8 +297,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %9 = icmp ugt i64 %3, 4294967295 ; CHECK-NEXT: %10 = or i1 %8, %9 ; CHECK-NEXT: br i1 %10, label %scalar.ph, label %vector.memcheck -; CHECK-NEXT: LV: draw edge fromfor.body.preheader -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:vector.memcheck +; CHECK-NEXT: LV: draw edge from for.body.preheader +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: vector.memcheck ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: vector.memcheck: ; preds = %vector.scevcheck ; CHECK-NEXT: %11 = call i64 @llvm.vscale.i64() @@ -307,8 +307,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %14 = sub i64 %B1, %A2 ; CHECK-NEXT: %diff.check = icmp ult i64 %14, %13 ; CHECK-NEXT: br i1 %diff.check, label %scalar.ph, label %vector.ph -; CHECK-NEXT: LV: draw edge fromvector.scevcheck -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:vector.ph +; CHECK-NEXT: LV: draw edge from vector.scevcheck +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: vector.ph ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: vector.ph: ; preds = %vector.memcheck ; CHECK-NEXT: %15 = call i64 @llvm.vscale.i64() @@ -321,10 +321,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %.cast = trunc i64 %n.vec to i32 ; CHECK-NEXT: %20 = sub i32 %n, %.cast ; CHECK-NEXT: br -; CHECK-NEXT: LV: draw edge fromvector.memcheck +; CHECK-NEXT: LV: draw edge from vector.memcheck ; CHECK-NEXT: LV: created vector.body -; CHECK-NEXT: LV: draw edge fromvector.ph -; CHECK-NEXT: LV: vectorizing VPBB:vector.body in BB:vector.body +; CHECK-NEXT: LV: draw edge from vector.ph +; CHECK-NEXT: LV: vectorizing VPBB: vector.body in BB: vector.body ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: vector.body: ; preds = %vector.body, %vector.ph ; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ] @@ -351,28 +351,28 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %34 = icmp eq i64 %index.next, %n.vec ; CHECK-NEXT: br i1 %34, , label %vector.body ; CHECK-NEXT: LV: created middle.block -; CHECK-NEXT: LV: draw edge fromvector.body -; CHECK-NEXT: LV: vectorizing VPBB:middle.block in BB:middle.block +; CHECK-NEXT: LV: draw edge from vector.body +; CHECK-NEXT: LV: vectorizing VPBB: middle.block in BB: middle.block ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: middle.block: ; preds = %vector.body ; CHECK-NEXT: %cmp.n = icmp eq i64 %0, %n.vec ; CHECK-NEXT: br i1 %cmp.n, , -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:for.cond.cleanup.loopexit +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: for.cond.cleanup.loopexit ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: for.cond.cleanup.loopexit: ; preds = %for.body ; CHECK-NEXT: br label %for.cond.cleanup -; CHECK-NEXT: LV: draw edge frommiddle.block -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:scalar.ph +; CHECK-NEXT: LV: draw edge from middle.block +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: scalar.ph ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: scalar.ph: ; preds = %vector.memcheck, %vector.scevcheck, %for.body.preheader ; CHECK-NEXT: %bc.resume.val = phi i64 [ %19, %middle.block ], [ %0, %for.body.preheader ], [ %0, %vector.scevcheck ], [ %0, %vector.memcheck ] ; CHECK-NEXT: %bc.resume.val5 = phi i32 [ %20, %middle.block ], [ %n, %for.body.preheader ], [ %n, %vector.scevcheck ], [ %n, %vector.memcheck ] ; CHECK-NEXT: br label %for.body -; CHECK-NEXT: LV: draw edge frommiddle.block -; CHECK-NEXT: LV: draw edge fromfor.body.preheader -; CHECK-NEXT: LV: draw edge fromvector.scevcheck -; CHECK-NEXT: LV: draw edge fromvector.memcheck -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:for.body +; CHECK-NEXT: LV: draw edge from middle.block +; CHECK-NEXT: LV: draw edge from for.body.preheader +; CHECK-NEXT: LV: draw edge from vector.scevcheck +; CHECK-NEXT: LV: draw edge from vector.memcheck +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: for.body ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: for.body: ; preds = %for.body, %scalar.ph ; CHECK-NEXT: %indvars.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %indvars.iv.next, %for.body ] @@ -387,7 +387,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 -; CHECK-NEXT: LV: draw edge fromscalar.ph +; CHECK-NEXT: LV: draw edge from scalar.ph ; CHECK-NEXT: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Vectorizing: innermost loop. ; CHECK-EMPTY: @@ -683,7 +683,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:for.body.preheader +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: for.body.preheader ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: for.body.preheader: ; preds = %entry ; CHECK-NEXT: %0 = zext i32 %n to i64 @@ -691,7 +691,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %2 = mul nuw i64 %1, 4 ; CHECK-NEXT: %min.iters.check = icmp ult i64 %0, %2 ; CHECK-NEXT: br i1 %min.iters.check, label %scalar.ph, label %vector.scevcheck -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:vector.scevcheck +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: vector.scevcheck ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: vector.scevcheck: ; preds = %for.body.preheader ; CHECK-NEXT: %3 = add nsw i64 %0, -1 @@ -706,8 +706,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %9 = icmp ugt i64 %3, 4294967295 ; CHECK-NEXT: %10 = or i1 %8, %9 ; CHECK-NEXT: br i1 %10, label %scalar.ph, label %vector.memcheck -; CHECK-NEXT: LV: draw edge fromfor.body.preheader -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:vector.memcheck +; CHECK-NEXT: LV: draw edge from for.body.preheader +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: vector.memcheck ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: vector.memcheck: ; preds = %vector.scevcheck ; CHECK-NEXT: %11 = call i64 @llvm.vscale.i64() @@ -716,8 +716,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %14 = sub i64 %B1, %A2 ; CHECK-NEXT: %diff.check = icmp ult i64 %14, %13 ; CHECK-NEXT: br i1 %diff.check, label %scalar.ph, label %vector.ph -; CHECK-NEXT: LV: draw edge fromvector.scevcheck -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:vector.ph +; CHECK-NEXT: LV: draw edge from vector.scevcheck +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: vector.ph ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: vector.ph: ; preds = %vector.memcheck ; CHECK-NEXT: %15 = call i64 @llvm.vscale.i64() @@ -730,10 +730,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %.cast = trunc i64 %n.vec to i32 ; CHECK-NEXT: %20 = sub i32 %n, %.cast ; CHECK-NEXT: br -; CHECK-NEXT: LV: draw edge fromvector.memcheck +; CHECK-NEXT: LV: draw edge from vector.memcheck ; CHECK-NEXT: LV: created vector.body -; CHECK-NEXT: LV: draw edge fromvector.ph -; CHECK-NEXT: LV: vectorizing VPBB:vector.body in BB:vector.body +; CHECK-NEXT: LV: draw edge from vector.ph +; CHECK-NEXT: LV: vectorizing VPBB: vector.body in BB: vector.body ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: vector.body: ; preds = %vector.body, %vector.ph ; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ] @@ -760,28 +760,28 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %34 = icmp eq i64 %index.next, %n.vec ; CHECK-NEXT: br i1 %34, , label %vector.body ; CHECK-NEXT: LV: created middle.block -; CHECK-NEXT: LV: draw edge fromvector.body -; CHECK-NEXT: LV: vectorizing VPBB:middle.block in BB:middle.block +; CHECK-NEXT: LV: draw edge from vector.body +; CHECK-NEXT: LV: vectorizing VPBB: middle.block in BB: middle.block ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: middle.block: ; preds = %vector.body ; CHECK-NEXT: %cmp.n = icmp eq i64 %0, %n.vec ; CHECK-NEXT: br i1 %cmp.n, , -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:for.cond.cleanup.loopexit +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: for.cond.cleanup.loopexit ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: for.cond.cleanup.loopexit: ; preds = %for.body ; CHECK-NEXT: br label %for.cond.cleanup -; CHECK-NEXT: LV: draw edge frommiddle.block -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:scalar.ph +; CHECK-NEXT: LV: draw edge from middle.block +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: scalar.ph ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: scalar.ph: ; preds = %vector.memcheck, %vector.scevcheck, %for.body.preheader ; CHECK-NEXT: %bc.resume.val = phi i64 [ %19, %middle.block ], [ %0, %for.body.preheader ], [ %0, %vector.scevcheck ], [ %0, %vector.memcheck ] ; CHECK-NEXT: %bc.resume.val5 = phi i32 [ %20, %middle.block ], [ %n, %for.body.preheader ], [ %n, %vector.scevcheck ], [ %n, %vector.memcheck ] ; CHECK-NEXT: br label %for.body -; CHECK-NEXT: LV: draw edge frommiddle.block -; CHECK-NEXT: LV: draw edge fromfor.body.preheader -; CHECK-NEXT: LV: draw edge fromvector.scevcheck -; CHECK-NEXT: LV: draw edge fromvector.memcheck -; CHECK-NEXT: LV: vectorizing VPBB:ir-bb in BB:for.body +; CHECK-NEXT: LV: draw edge from middle.block +; CHECK-NEXT: LV: draw edge from for.body.preheader +; CHECK-NEXT: LV: draw edge from vector.scevcheck +; CHECK-NEXT: LV: draw edge from vector.memcheck +; CHECK-NEXT: LV: vectorizing VPBB: ir-bb in BB: for.body ; CHECK-NEXT: LV: filled BB: ; CHECK-NEXT: for.body: ; preds = %for.body, %scalar.ph ; CHECK-NEXT: %indvars.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %indvars.iv.next, %for.body ] @@ -796,7 +796,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 -; CHECK-NEXT: LV: draw edge fromscalar.ph +; CHECK-NEXT: LV: draw edge from scalar.ph ; CHECK-NEXT: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Vectorizing: innermost loop. ; diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll deleted file mode 100644 index 3e88672f29242..0000000000000 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll +++ /dev/null @@ -1,407 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 -; RUN: opt -p loop-vectorize -force-vector-width=2 -S %s | FileCheck %s - -declare void @llvm.assume(i1) - -; %a is known dereferenceable via assume for the whole loop. -define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { -; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8( -; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[N]]) ] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i8> poison, i8 [[TMP17]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i8> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i8> [[TMP9]], i8 [[TMP13]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i8> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i8> [[WIDE_LOAD]], <2 x i8> [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <2 x i8> [[PREDPHI]], ptr [[TMP7]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i8, ptr [[GEP_B]], align 1 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i8 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i8, ptr [[GEP_A]], align 1 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i8 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i8 [[MERGE]], ptr [[GEP_C]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void -; -entry: - call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %n) ] - br label %loop.header - -loop.header: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] - %gep.a = getelementptr inbounds i8, ptr %a, i64 %iv - %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv - %l.b = load i8, ptr %gep.b, align 1 - %c.1 = icmp sge i8 %l.b, 0 - br i1 %c.1, label %loop.latch, label %loop.then - -loop.then: - %l.a = load i8, ptr %gep.a, align 1 - br label %loop.latch - -loop.latch: - %merge = phi i8 [ %l.a, %loop.then ], [ %l.b, %loop.header ] - %gep.c = getelementptr inbounds i8, ptr %c, i64 %iv - store i8 %merge, ptr %gep.c, align 1 - %iv.next = add nuw nsw i64 %iv, 1 - %ec = icmp eq i64 %iv.next, %n - br i1 %ec, label %exit, label %loop.header - -exit: - ret void -} - -; %a is known dereferenceable via assume for the whole loop. -define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { -; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32( -; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[N]], 4 -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[MUL]]) ] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void -; -entry: - %mul = mul nsw nuw i64 %n, 4 - call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %mul) ] - br label %loop.header - -loop.header: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] - %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv - %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv - %l.b = load i32, ptr %gep.b, align 1 - %c.1 = icmp sge i32 %l.b, 0 - br i1 %c.1, label %loop.latch, label %loop.then - -loop.then: - %l.a = load i32, ptr %gep.a, align 1 - br label %loop.latch - -loop.latch: - %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] - %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv - store i32 %merge, ptr %gep.c, align 1 - %iv.next = add nuw nsw i64 %iv, 1 - %ec = icmp eq i64 %iv.next, %n - br i1 %ec, label %exit, label %loop.header - -exit: - ret void -} - - -; %a is NOT known dereferenceable via assume for the whole loop. -define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { -; CHECK-LABEL: define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_access_i32( -; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[N]]) ] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void -; -entry: - call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 %n) ] - br label %loop.header - -loop.header: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] - %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv - %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv - %l.b = load i32, ptr %gep.b, align 1 - %c.1 = icmp sge i32 %l.b, 0 - br i1 %c.1, label %loop.latch, label %loop.then - -loop.then: - %l.a = load i32, ptr %gep.a, align 1 - br label %loop.latch - -loop.latch: - %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] - %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv - store i32 %merge, ptr %gep.c, align 1 - %iv.next = add nuw nsw i64 %iv, 1 - %ec = icmp eq i64 %iv.next, %n - br i1 %ec, label %exit, label %loop.header - -exit: - ret void -} - -; %a is NOT known dereferenceable via assume for the whole loop. -define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { -; CHECK-LABEL: define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_access_i32( -; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 100) ] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 1 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void -; -entry: - call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 100) ] - br label %loop.header - -loop.header: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] - %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv - %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv - %l.b = load i32, ptr %gep.b, align 1 - %c.1 = icmp sge i32 %l.b, 0 - br i1 %c.1, label %loop.latch, label %loop.then - -loop.then: - %l.a = load i32, ptr %gep.a, align 1 - br label %loop.latch - -loop.latch: - %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] - %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv - store i32 %merge, ptr %gep.c, align 1 - %iv.next = add nuw nsw i64 %iv, 1 - %ec = icmp eq i64 %iv.next, %n - br i1 %ec, label %exit, label %loop.header - -exit: - ret void -} diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll index 3a08681a2fb92..7c80dad006952 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll @@ -165,35 +165,6 @@ exit: ret void } -define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) { -; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. -entry: - %n_bytes = mul nuw nsw i32 %n, 2 - call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i32 %n_bytes) ] - %tc = sext i32 %n to i64 - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] - %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv - %data = load i16, ptr %st.addr, align 2 - %inc = add nsw i16 %data, 1 - store i16 %inc, ptr %st.addr, align 2 - %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv - %ee.val = load i16, ptr %ee.addr, align 2 - %ee.cond = icmp sgt i16 %ee.val, 500 - br i1 %ee.cond, label %exit, label %for.inc - -for.inc: - %iv.next = add nuw nsw i64 %iv, 1 - %counted.cond = icmp eq i64 %iv.next, %tc - br i1 %counted.cond, label %exit, label %for.body - -exit: - ret void -} - define void @loop_contains_store_to_pointer_with_no_deref_info(ptr align 2 dereferenceable(40) readonly %load.array, ptr align 2 noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_to_pointer_with_no_deref_info' ; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll index a0e52c13ec621..d2617a1986764 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll @@ -274,14 +274,11 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX-NEXT: ret void ; ; AVX2-LABEL: @gather_load_3( -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 24 -; AVX2-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP1]], i32 4, <16 x i1> , <16 x i32> poison), !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr nonnull [[TMP14]], i32 4, <16 x i1> , <16 x i32> poison), !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP7]], <8 x i32> -; AVX2-NEXT: [[TMP26:%.*]] = add <8 x i32> [[TMP25]], -; AVX2-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> +; AVX2-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], +; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> +; AVX2-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX2-NEXT: ret void ; ; AVX512F-LABEL: @gather_load_3( @@ -412,14 +409,11 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX-NEXT: ret void ; ; AVX2-LABEL: @gather_load_4( -; AVX2-NEXT: [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 24 -; AVX2-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[T1]], i32 4, <16 x i1> , <16 x i32> poison), !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr nonnull [[T26]], i32 4, <16 x i1> , <16 x i32> poison), !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP3]], <16 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> -; AVX2-NEXT: [[TMP9:%.*]] = add <8 x i32> [[TMP8]], -; AVX2-NEXT: store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], +; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> +; AVX2-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX2-NEXT: ret void ; ; AVX512F-LABEL: @gather_load_4( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll index 6c5638819dcea..8f31200a3683d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll @@ -274,14 +274,11 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX-NEXT: ret void ; ; AVX2-LABEL: @gather_load_3( -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 24 -; AVX2-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP1]], i32 4, <16 x i1> , <16 x i32> poison), !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr nonnull [[TMP14]], i32 4, <16 x i1> , <16 x i32> poison), !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP7]], <8 x i32> -; AVX2-NEXT: [[TMP26:%.*]] = add <8 x i32> [[TMP25]], -; AVX2-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP3:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[TMP1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <22 x i32> [[TMP3]], <22 x i32> poison, <8 x i32> +; AVX2-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], +; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> +; AVX2-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX2-NEXT: ret void ; ; AVX512F-LABEL: @gather_load_3( @@ -412,14 +409,11 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read ; AVX-NEXT: ret void ; ; AVX2-LABEL: @gather_load_4( -; AVX2-NEXT: [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 24 -; AVX2-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[T1]], i32 4, <16 x i1> , <16 x i32> poison), !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr nonnull [[T26]], i32 4, <16 x i1> , <16 x i32> poison), !tbaa [[TBAA0]] -; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP3]], <16 x i32> poison, <4 x i32> -; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> -; AVX2-NEXT: [[TMP9:%.*]] = add <8 x i32> [[TMP8]], -; AVX2-NEXT: store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP1:%.*]] = call <22 x i32> @llvm.masked.load.v22i32.p0(ptr [[T1:%.*]], i32 4, <22 x i1> , <22 x i32> poison), !tbaa [[TBAA0]] +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <22 x i32> [[TMP1]], <22 x i32> poison, <8 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], +; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> +; AVX2-NEXT: store <8 x i32> [[TMP4]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX2-NEXT: ret void ; ; AVX512F-LABEL: @gather_load_4( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark-masked-loads-consecutive-loads-same-ptr.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark-masked-loads-consecutive-loads-same-ptr.ll index 09a5ace101e64..3fd9e126f4685 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark-masked-loads-consecutive-loads-same-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark-masked-loads-consecutive-loads-same-ptr.ll @@ -15,15 +15,9 @@ define void @test(ptr noalias %p, ptr noalias %p1) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 32 -; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P]], i64 33 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARRAYIDX11]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call <35 x i32> @llvm.masked.load.v35i32.p0(ptr [[P:%.*]], i32 4, <35 x i1> , <35 x i32> poison) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <35 x i32> [[TMP0]], <35 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[I2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP3]], <2 x i32> [[TMP0]], i64 2) ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP1]] ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[P1:%.*]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll index fb607c72a0e35..0f78e236b4248 100644 --- a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll +++ b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll @@ -38,45 +38,12 @@ Z: ret void } -; Make sure the metadata name string is "branch_weights" before propagating it. - -define void @fake_weights(i1 %a, i1 %b) { -; CHECK-LABEL: @fake_weights( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_NOT:%.*]] = xor i1 [[A:%.*]], true -; CHECK-NEXT: [[C:%.*]] = or i1 [[B:%.*]], false -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A_NOT]], i1 [[C]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF1:![0-9]+]] -; CHECK: common.ret: -; CHECK-NEXT: ret void -; CHECK: Y: -; CHECK-NEXT: call void @helper(i32 0) -; CHECK-NEXT: br label [[COMMON_RET:%.*]] -; CHECK: Z: -; CHECK-NEXT: call void @helper(i32 1) -; CHECK-NEXT: br label [[COMMON_RET]] -; -entry: - br i1 %a, label %Y, label %X, !prof !12 -X: - %c = or i1 %b, false - br i1 %c, label %Z, label %Y, !prof !1 - -Y: - call void @helper(i32 0) - ret void - -Z: - call void @helper(i32 1) - ret void -} - define void @test2(i1 %a, i1 %b) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = or i1 [[B:%.*]], false ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: Y: @@ -107,7 +74,7 @@ define void @test3(i1 %a, i1 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = or i1 [[B:%.*]], false ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF2:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: Y: @@ -138,7 +105,7 @@ define void @test4(i1 %a, i1 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = or i1 [[B:%.*]], false ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[A:%.*]], i1 [[C]], i1 false -; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[Z:%.*]], label [[Y:%.*]], !prof [[PROF2]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: Y: @@ -1120,7 +1087,6 @@ exit: !9 = !{!"branch_weights", i32 7, i32 6} !10 = !{!"branch_weights", i32 672646, i32 21604207} !11 = !{!"branch_weights", i32 6960, i32 21597248} -!12 = !{!"these_are_not_the_branch_weights_you_are_looking_for", i32 3, i32 5} !13 = !{!"branch_weights", i32 2, i32 3} !14 = !{!"branch_weights", i32 4, i32 7} !15 = !{!"branch_weights", i32 99, i32 1} @@ -1136,8 +1102,8 @@ exit: ; CHECK: attributes #[[ATTR2:[0-9]+]] = { noredzone nounwind ssp memory(none) } ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 5, i32 11} -; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 3} -; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 5} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 5} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 3} ; CHECK: [[PROF3]] = !{!"branch_weights", i32 7, i32 1, i32 2} ; CHECK: [[PROF4]] = !{!"branch_weights", i32 49, i32 12, i32 24, i32 35} ; CHECK: [[PROF5]] = !{!"branch_weights", i32 11, i32 5} diff --git a/llvm/test/Verifier/assume-bundles.ll b/llvm/test/Verifier/assume-bundles.ll index 4b6971d6be832..047e8d9bba3ed 100644 --- a/llvm/test/Verifier/assume-bundles.ll +++ b/llvm/test/Verifier/assume-bundles.ll @@ -30,8 +30,7 @@ define void @func(ptr %P, i32 %P1, ptr %P2, ptr %P3) { call void @llvm.assume(i1 true) ["separate_storage"(ptr %P)] ; CHECK: arguments to separate_storage assumptions should be pointers call void @llvm.assume(i1 true) ["separate_storage"(ptr %P, i32 123)] -; FIXME: The dereferenceable bundle is invalid. -; CHECK-NOT: call {{.+}}dereferenceable +; CHECK: this attribute should have 2 arguments call void @llvm.assume(i1 true) ["align"(ptr %P, i32 4), "dereferenceable"(ptr %P)] ret void } diff --git a/llvm/test/Verifier/branch-weight.ll b/llvm/test/Verifier/branch-weight.ll new file mode 100644 index 0000000000000..e3b0f340e31bc --- /dev/null +++ b/llvm/test/Verifier/branch-weight.ll @@ -0,0 +1,39 @@ +; Test MD_prof validation + +; RUN: split-file %s %t +; RUN: opt -passes=verify %t/valid.ll --disable-output +; RUN: not opt -passes=verify %t/invalid1.ll --disable-output 2>&1 | FileCheck %s +; RUN: not opt -passes=verify %t/invalid2.ll --disable-output 2>&1 | FileCheck %s + +;--- valid.ll +define void @test(i1 %0) { + br i1 %0, label %2, label %3, !prof !0 +2: + ret void +3: + ret void +} +!0 = !{!"branch_weights", i32 1, i32 2} + +;--- invalid1.ll +define void @test(i1 %0) { + br i1 %0, label %2, label %3, !prof !0 +2: + ret void +3: + ret void +} +!0 = !{!"invalid", i32 1, i32 2} + +;--- invalid2.ll +define void @test(i1 %0) { + br i1 %0, label %2, label %3, !prof !0 +2: + ret void +3: + ret void +} + +!0 = !{!"function_entry_count", i32 1} + +; CHECK: expected either branch_weights or VP profile name diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected index b8779b9d54ea7..f466b1de9fb5a 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected @@ -19,11 +19,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct ; CHECK-NEXT: st.param.v2.b64 [param0], {%rd2, %rd1}; ; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd4, %rd3}; ; CHECK-NEXT: .param .align 16 .b8 retval0[32]; -; CHECK-NEXT: call.uni (retval0), -; CHECK-NEXT: callee_St8x4, -; CHECK-NEXT: ( -; CHECK-NEXT: param0 -; CHECK-NEXT: ); +; CHECK-NEXT: call.uni (retval0), callee_St8x4, (param0); ; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [retval0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [retval0+16]; ; CHECK-NEXT: } // callseq 0 diff --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml b/llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml new file mode 100644 index 0000000000000..d318ea01b4c30 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml @@ -0,0 +1,67 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000 +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000 + +## Expect to find the branch labels and global variable name. +# CHECK: <_start>: +# CHECK-NEXT: ldr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] +# CHECK-NEXT: adr x2, +# CHECK-NEXT: cmp x1, x2 +# CHECK-NEXT: b.eq +# CHECK-NEXT: b +# CHECK-NEXT: : +# CHECK-NEXT: cbz x2, +# CHECK-NEXT: ret + +## Machine code generated with: +# llvm-mc --arch=aarch64 --filetype=obj -o tmp.o <: +# CHECK-NEXT: b +# CHECK-NEXT: tbz x0, #0x2c, +# CHECK-NEXT: : +# CHECK-NEXT: b.eq +# CHECK-NEXT: : +# CHECK-NEXT: cbz x1, +# CHECK-NEXT: : +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: R_AARCH64_CALL26 fn2 +# CHECK-NEXT: bl +# CHECK-NEXT: adr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adr x1, +# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: : +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: add x3, x3, #0x0 +# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2 +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: ldr x0, [x3] +# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: ret + + .p2align 4 + .global fn1 +fn1: + b 0f + tbz x0, 44, 2f +0: b.eq 1f +1: cbz x1, 0b +2: nop + bl fn2 + bl .Lfn2 + adr x0, 2b + adr x1, fn2 + adr x2, .Lfn2 + ldr w0, 2b + ldr w0, fn2 + ret + + .p2align 4 + .global fn2 +fn2: +.Lfn2: ## Local label for non-interposable call. + bl .Lfn3 + ## In future, we might identify the pairs and symbolize the operands properly. + adrp x3, fn2 + add x3, x3, :lo12:fn2 + adrp x3, fn2 + ldr x0, [x3, :lo12:fn2] + ret + + .p2align 4 +.Lfn3: ## Private function + ret diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.cpp b/llvm/tools/llvm-exegesis/lib/Analysis.cpp index be10c32cf08d5..fb843285ada2a 100644 --- a/llvm/tools/llvm-exegesis/lib/Analysis.cpp +++ b/llvm/tools/llvm-exegesis/lib/Analysis.cpp @@ -137,9 +137,9 @@ void Analysis::printInstructionRowCsv(const size_t PointId, std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId( State_.getSubtargetInfo(), State_.getInstrInfo(), MCI); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - const MCSchedClassDesc *const SCDesc = - State_.getSubtargetInfo().getSchedModel().getSchedClassDesc(SchedClassId); - writeEscaped(OS, SCDesc->Name); + StringRef SCDescName = + State_.getSubtargetInfo().getSchedModel().getSchedClassName(SchedClassId); + writeEscaped(OS, SCDescName); #else OS << SchedClassId; #endif @@ -563,7 +563,8 @@ Error Analysis::run( OS << "

Sched Class "; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - writeEscaped(OS, RSCAndPoints.RSC.SCDesc->Name); + writeEscaped(OS, SI.getSchedModel().getSchedClassName( + RSCAndPoints.RSC.SchedClassId)); #else OS << RSCAndPoints.RSC.SchedClassId; #endif diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 5ecb33375943f..c5967cd090eec 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1495,8 +1495,9 @@ collectLocalBranchTargets(ArrayRef Bytes, MCInstrAnalysis *MIA, // Supported by certain targets. const bool isPPC = STI->getTargetTriple().isPPC(); const bool isX86 = STI->getTargetTriple().isX86(); + const bool isAArch64 = STI->getTargetTriple().isAArch64(); const bool isBPF = STI->getTargetTriple().isBPF(); - if (!isPPC && !isX86 && !isBPF) + if (!isPPC && !isX86 && !isAArch64 && !isBPF) return; if (MIA) diff --git a/llvm/unittests/ADT/ArrayRefTest.cpp b/llvm/unittests/ADT/ArrayRefTest.cpp index 39a4a9b6a178c..3858d9064f9ca 100644 --- a/llvm/unittests/ADT/ArrayRefTest.cpp +++ b/llvm/unittests/ADT/ArrayRefTest.cpp @@ -8,10 +8,16 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" #include #include +#if __has_include() +#include +#endif +#ifdef __cpp_lib_span +#include +#endif + using namespace llvm; // Check that the ArrayRef-of-pointer converting constructor only allows adding @@ -406,4 +412,15 @@ TEST(ArrayRefTest, MutableArrayRefDeductionGuides) { } } +#ifdef __cpp_lib_span +static_assert(std::is_constructible_v, std::span>, + "should be able to construct ArrayRef from const std::span"); +static_assert(std::is_constructible_v, ArrayRef>, + "should be able to construct const std::span from ArrayRef"); +static_assert(std::is_constructible_v, std::span>, + "should be able to construct ArrayRef from mutable std::span"); +static_assert(!std::is_constructible_v, ArrayRef>, + "cannot construct mutable std::span from ArrayRef"); +#endif + } // end anonymous namespace diff --git a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp index 6eef0b5f91719..aa4d712cde09e 100644 --- a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp +++ b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/SourceMgr.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -23,7 +24,7 @@ using namespace llvm; using namespace llvm::memprof; -extern cl::opt MemProfKeepAllNotColdContexts; +LLVM_ABI extern cl::opt MemProfKeepAllNotColdContexts; namespace { diff --git a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp index 0c2c06ec75d6e..8a6549b1b594e 100644 --- a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp @@ -19,6 +19,8 @@ #include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" +#include "OrcTestCommon.h" + using namespace llvm; using namespace llvm::jitlink; using namespace llvm::orc; diff --git a/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h b/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h index 0981f4b8132bd..6675921c29eb4 100644 --- a/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h +++ b/llvm/unittests/ExecutionEngine/Orc/OrcTestCommon.h @@ -82,6 +82,47 @@ class CoreAPIsBasedStandardTest : public testing::Test { unique_function)> DispatchOverride; }; +/// A ExecutorProcessControl instance that asserts if any of its methods are +/// used. Suitable for use is unit tests, and by ORC clients who haven't moved +/// to ExecutorProcessControl-based APIs yet. +class UnsupportedExecutorProcessControl : public ExecutorProcessControl, + private InProcessMemoryAccess { +public: + UnsupportedExecutorProcessControl( + std::shared_ptr SSP = nullptr, + std::unique_ptr D = nullptr, const std::string &TT = "", + unsigned PageSize = 0) + : ExecutorProcessControl( + SSP ? std::move(SSP) : std::make_shared(), + D ? std::move(D) : std::make_unique()), + InProcessMemoryAccess(Triple(TT).isArch64Bit()) { + this->TargetTriple = Triple(TT); + this->PageSize = PageSize; + this->MemAccess = this; + } + + Expected runAsMain(ExecutorAddr MainFnAddr, + ArrayRef Args) override { + llvm_unreachable("Unsupported"); + } + + Expected runAsVoidFunction(ExecutorAddr VoidFnAddr) override { + llvm_unreachable("Unsupported"); + } + + Expected runAsIntFunction(ExecutorAddr IntFnAddr, int Arg) override { + llvm_unreachable("Unsupported"); + } + + void callWrapperAsync(ExecutorAddr WrapperFnAddr, + IncomingWFRHandler OnComplete, + ArrayRef ArgBuffer) override { + llvm_unreachable("Unsupported"); + } + + Error disconnect() override { return Error::success(); } +}; + } // end namespace orc class OrcNativeTarget { diff --git a/llvm/unittests/Frontend/CMakeLists.txt b/llvm/unittests/Frontend/CMakeLists.txt index 4048143b36819..2412cc9d26c7a 100644 --- a/llvm/unittests/Frontend/CMakeLists.txt +++ b/llvm/unittests/Frontend/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_unittest(LLVMFrontendTests OpenMPParsingTest.cpp OpenMPCompositionTest.cpp OpenMPDecompositionTest.cpp + OpenMPDirectiveNameTest.cpp DEPENDS acc_gen diff --git a/llvm/unittests/Frontend/OpenMPDirectiveNameTest.cpp b/llvm/unittests/Frontend/OpenMPDirectiveNameTest.cpp new file mode 100644 index 0000000000000..da648157ee9b6 --- /dev/null +++ b/llvm/unittests/Frontend/OpenMPDirectiveNameTest.cpp @@ -0,0 +1,96 @@ +//===- llvm/unittests/Frontend/OpenMPDirectiveNameTest.cpp ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Frontend/OpenMP/OMP.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::omp; + +const DenseMap &Expected52() { + static const DenseMap Names{ + {OMPD_begin_declare_target, "begin declare target"}, + {OMPD_begin_declare_variant, "begin declare variant"}, + {OMPD_cancellation_point, "cancellation point"}, + {OMPD_declare_mapper, "declare mapper"}, + {OMPD_declare_reduction, "declare reduction"}, + {OMPD_declare_simd, "declare simd"}, + {OMPD_declare_target, "declare target"}, + {OMPD_declare_variant, "declare variant"}, + {OMPD_end_declare_target, "end declare target"}, + {OMPD_end_declare_variant, "end declare variant"}, + {OMPD_target_data, "target data"}, + {OMPD_target_enter_data, "target enter data"}, + {OMPD_target_exit_data, "target exit data"}, + {OMPD_target_update, "target update"}, + }; + return Names; +} + +const DenseMap &Expected60() { + static const DenseMap Names{ + {OMPD_begin_declare_target, "begin declare_target"}, + {OMPD_begin_declare_variant, "begin declare_variant"}, + {OMPD_cancellation_point, "cancellation_point"}, + {OMPD_declare_mapper, "declare_mapper"}, + {OMPD_declare_reduction, "declare_reduction"}, + {OMPD_declare_simd, "declare_simd"}, + {OMPD_declare_target, "declare_target"}, + {OMPD_declare_variant, "declare_variant"}, + {OMPD_end_declare_target, "end declare_target"}, + {OMPD_end_declare_variant, "end declare_variant"}, + {OMPD_target_data, "target_data"}, + {OMPD_target_enter_data, "target_enter_data"}, + {OMPD_target_exit_data, "target_exit_data"}, + {OMPD_target_update, "target_update"}, + }; + return Names; +} + +class VersionTest : public testing::TestWithParam { +public: + void SetUp() override { + Version = GetParam(); + + if (Version < 60) + KindToName = &Expected52(); + else + KindToName = &Expected60(); + } + + const DenseMap *KindToName; + unsigned Version; +}; + +INSTANTIATE_TEST_SUITE_P(OpenMPDirectiveNames, VersionTest, + testing::ValuesIn(getOpenMPVersions())); + +TEST_P(VersionTest, DirectiveName) { + for (auto [Kind, Name] : *KindToName) + ASSERT_EQ(Name, getOpenMPDirectiveName(Kind, Version)); +} + +TEST(OpenMPDirectiveNames, DirectiveKind52) { + for (auto [Kind, Name] : Expected52()) { + auto [K, R] = getOpenMPDirectiveKindAndVersions(Name); + ASSERT_EQ(K, Kind); + // Expect the name to be valid in 5.2, but not in 6.0. + EXPECT_TRUE(52 <= R.Max && R.Max < 60); + } +} + +TEST(OpenMPDirectiveNames, DirectiveKind60) { + for (auto [Kind, Name] : Expected60()) { + auto [K, R] = getOpenMPDirectiveKindAndVersions(Name); + ASSERT_EQ(K, Kind); + // Expect the name to be valid in 6.0 and later. + EXPECT_TRUE(60 <= R.Min); + } +} diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp index d7aa584bb8cb4..35bdbf8cc8321 100644 --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -1301,4 +1301,33 @@ TEST(DIBuilder, CompositeTypes) { EXPECT_EQ(Enum->getTag(), dwarf::DW_TAG_enumeration_type); } +TEST(DIBuilder, DynamicOffsetAndSize) { + LLVMContext Ctx; + auto M = std::make_unique("MyModule", Ctx); + DIBuilder DIB(*M); + DIScope *Scope = DISubprogram::getDistinct( + Ctx, nullptr, "", "", nullptr, 0, nullptr, 0, nullptr, 0, 0, + DINode::FlagZero, DISubprogram::SPFlagZero, nullptr); + DIFile *F = DIB.createFile("main.adb", "/"); + + DIVariable *Len = DIB.createAutoVariable(Scope, "length", F, 0, nullptr, + false, DINode::FlagZero, 0); + + DICompositeType *Struct = DIB.createStructType( + Scope, "some_record", F, 18, Len, 8, DINode::FlagZero, nullptr, {}); + EXPECT_EQ(Struct->getTag(), dwarf::DW_TAG_structure_type); + + SmallVector ops; + ops.push_back(llvm::dwarf::DW_OP_push_object_address); + DIExpression::appendOffset(ops, 3); + ops.push_back(llvm::dwarf::DW_OP_deref); + DIExpression *Expr = DIB.createExpression(ops); + + DIDerivedType *Field = DIB.createMemberType(Scope, "field", F, 23, Len, 0, + Expr, DINode::FlagZero, Struct); + + EXPECT_EQ(Field->getRawOffsetInBits(), Expr); + EXPECT_EQ(Field->getRawSizeInBits(), Len); +} + } // end namespace diff --git a/llvm/unittests/IR/DebugTypeODRUniquingTest.cpp b/llvm/unittests/IR/DebugTypeODRUniquingTest.cpp index e1ce671852c8b..6716796e71c5a 100644 --- a/llvm/unittests/IR/DebugTypeODRUniquingTest.cpp +++ b/llvm/unittests/IR/DebugTypeODRUniquingTest.cpp @@ -141,12 +141,12 @@ TEST(DebugTypeODRUniquingTest, buildODRTypeFields) { DO_FOR_FIELD(BaseType) \ DO_FOR_FIELD(Elements) \ DO_FOR_FIELD(VTableHolder) \ - DO_FOR_FIELD(TemplateParams) + DO_FOR_FIELD(TemplateParams) \ + DO_FOR_FIELD(SizeInBits) \ + DO_FOR_FIELD(OffsetInBits) #define FOR_EACH_INLINEFIELD() \ DO_FOR_FIELD(Line) \ - DO_FOR_FIELD(SizeInBits) \ DO_FOR_FIELD(AlignInBits) \ - DO_FOR_FIELD(OffsetInBits) \ DO_FOR_FIELD(NumExtraInhabitants) \ DO_FOR_FIELD(RuntimeLang) \ DO_FOR_FIELD(EnumKind) diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp index 0f6d07657c931..4d547011c1568 100644 --- a/llvm/unittests/TargetParser/TripleTest.cpp +++ b/llvm/unittests/TargetParser/TripleTest.cpp @@ -2281,6 +2281,44 @@ TEST(TripleTest, XROS) { EXPECT_EQ(VersionTuple(17), Version); } +TEST(TripleTest, BridgeOS) { + Triple T; + VersionTuple Version; + + T = Triple("arm64-apple-bridgeos"); + EXPECT_TRUE(T.isBridgeOS()); + EXPECT_FALSE(T.isXROS()); + EXPECT_TRUE(T.isOSDarwin()); + EXPECT_FALSE(T.isiOS()); + EXPECT_FALSE(T.isMacOSX()); + EXPECT_FALSE(T.isSimulatorEnvironment()); + EXPECT_EQ(T.getOSName(), "bridgeos"); + Version = T.getOSVersion(); + EXPECT_EQ(VersionTuple(0), Version); + + T = Triple("arm64-apple-bridgeos1.0"); + EXPECT_TRUE(T.isBridgeOS()); + EXPECT_FALSE(T.isXROS()); + EXPECT_TRUE(T.isOSDarwin()); + EXPECT_FALSE(T.isiOS()); + EXPECT_FALSE(T.isMacOSX()); + EXPECT_FALSE(T.isSimulatorEnvironment()); + EXPECT_EQ(T.getOSName(), "bridgeos1.0"); + Version = T.getOSVersion(); + EXPECT_EQ(VersionTuple(1), Version); + + T = Triple("arm64-apple-bridgeos9.0"); + EXPECT_TRUE(T.isBridgeOS()); + EXPECT_FALSE(T.isXROS()); + EXPECT_TRUE(T.isOSDarwin()); + EXPECT_FALSE(T.isiOS()); + EXPECT_FALSE(T.isMacOSX()); + EXPECT_FALSE(T.isSimulatorEnvironment()); + EXPECT_EQ(T.getOSName(), "bridgeos9.0"); + Version = T.getOSVersion(); + EXPECT_EQ(VersionTuple(9), Version); +} + TEST(TripleTest, getOSVersion) { Triple T; VersionTuple Version; diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 77618b24cf115..408fe544d260f 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -27,6 +27,8 @@ #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/StringToOffsetTable.h" #include "llvm/TableGen/TableGenBackend.h" #include #include @@ -1380,6 +1382,10 @@ void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables, } OS << "}; // " << Target << "ReadAdvanceTable\n"; + // Pool all SchedClass names in a string table. + StringToOffsetTable StrTab; + unsigned InvalidNameOff = StrTab.GetOrAddStringOffset("InvalidSchedClass"); + // Emit a SchedClass table for each processor. for (const auto &[Idx, Proc] : enumerate(SchedModels.procModels())) { if (!Proc.hasInstrSchedModel()) @@ -1397,14 +1403,15 @@ void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables, // name and position. assert(SchedModels.getSchedClass(0).Name == "NoInstrModel" && "invalid class not first"); - OS << " {DBGFIELD(\"InvalidSchedClass\") " + OS << " {DBGFIELD(" << InvalidNameOff << ") " << MCSchedClassDesc::InvalidNumMicroOps << ", false, false, false, 0, 0, 0, 0, 0, 0},\n"; for (unsigned SCIdx = 1, SCEnd = SCTab.size(); SCIdx != SCEnd; ++SCIdx) { MCSchedClassDesc &MCDesc = SCTab[SCIdx]; const CodeGenSchedClass &SchedClass = SchedModels.getSchedClass(SCIdx); - OS << " {DBGFIELD(\"" << SchedClass.Name << "\") "; + unsigned NameOff = StrTab.GetOrAddStringOffset(SchedClass.Name); + OS << " {DBGFIELD(/*" << SchedClass.Name << "*/ " << NameOff << ") "; if (SchedClass.Name.size() < 18) OS.indent(18 - SchedClass.Name.size()); OS << MCDesc.NumMicroOps << ", " << (MCDesc.BeginGroup ? "true" : "false") @@ -1419,6 +1426,8 @@ void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables, } OS << "}; // " << Proc.ModelName << "SchedClasses\n"; } + + StrTab.EmitStringTableDef(OS, Target + "SchedClassNames"); } void SubtargetEmitter::emitProcessorModels(raw_ostream &OS) { @@ -1472,6 +1481,8 @@ void SubtargetEmitter::emitProcessorModels(raw_ostream &OS) { else OS << " nullptr, nullptr, 0, 0," << " // No instruction-level machine model.\n"; + OS << " DBGVAL_OR_NULLPTR(&" << Target + << "SchedClassNames), // SchedClassNames\n"; if (PM.hasItineraries()) OS << " " << PM.ItinsDef->getName() << ",\n"; else @@ -1493,8 +1504,10 @@ void SubtargetEmitter::emitSchedModel(raw_ostream &OS) { << "#endif\n" << "#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)\n" << "#define DBGFIELD(x) x,\n" + << "#define DBGVAL_OR_NULLPTR(x) x\n" << "#else\n" << "#define DBGFIELD(x)\n" + << "#define DBGVAL_OR_NULLPTR(x) nullptr\n" << "#endif\n"; if (SchedModels.hasItineraries()) { @@ -1512,10 +1525,11 @@ void SubtargetEmitter::emitSchedModel(raw_ostream &OS) { } emitSchedClassTables(SchedTables, OS); - OS << "\n#undef DBGFIELD\n"; - // Emit the processor machine model emitProcessorModels(OS); + + OS << "\n#undef DBGFIELD\n"; + OS << "\n#undef DBGVAL_OR_NULLPTR\n"; } static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) { diff --git a/mlir/include/mlir/Dialect/ArmNeon/Transforms.h b/mlir/include/mlir/Dialect/ArmNeon/Transforms.h index 52ebea2d0ffd9..2f0f634a96770 100644 --- a/mlir/include/mlir/Dialect/ArmNeon/Transforms.h +++ b/mlir/include/mlir/Dialect/ArmNeon/Transforms.h @@ -13,7 +13,7 @@ namespace mlir { class RewritePatternSet; namespace arm_neon { -void populateLowerContractionToSMMLAPatternPatterns( +void populateLowerContractionToNeonI8MMPatternPatterns( RewritePatternSet &patterns); } // namespace arm_neon diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 418931b931265..6895e946b8a45 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -965,19 +965,21 @@ def NVVM_ElectSyncOp : NVVM_Op<"elect.sync"> let summary = "Elect one leader thread"; let description = [{ The `elect.sync` instruction elects one predicated active leader - thread from among a set of threads specified in membermask. - The membermask is set to `0xFFFFFFFF` for the current version - of this Op. The predicate result is set to `True` for the - leader thread, and `False` for all other threads. + thread from among a set of threads specified in the `membermask`. + When the `membermask` is not provided explicitly, a default value + of `0xFFFFFFFF` is used. The predicate result is set to `True` for + the leader thread, and `False` for all other threads. [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-elect-sync) }]; + let arguments = (ins Optional:$membermask); let results = (outs I1:$pred); - let assemblyFormat = "attr-dict `->` type(results)"; + let assemblyFormat = "($membermask^)? attr-dict `->` type(results)"; string llvmBuilder = [{ auto *resultTuple = createIntrinsicCall(builder, - llvm::Intrinsic::nvvm_elect_sync, {builder.getInt32(0xFFFFFFFF)}); + llvm::Intrinsic::nvvm_elect_sync, + {$membermask ? $membermask : builder.getInt32(0xFFFFFFFF)}); // Extract the second value into $pred $pred = builder.CreateExtractValue(resultTuple, 1); }]; diff --git a/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h b/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h index 57bf6305a469d..4f5fea107f07b 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/Linalg.h @@ -100,6 +100,20 @@ OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, #include "mlir/Dialect/Linalg/IR/LinalgOpsEnums.h.inc" +namespace mlir { +namespace linalg { + +/// Converts the given `m` and `r` parameters to a WinogradConv2DFmr enumeration +/// value. +std::optional getWinogradConv2DFmr(int64_t m, int64_t r); + +/// Converts the given WinogradConv2DFmr enumeration value to a pair of +/// m and r parameters. +std::pair getFmrFromWinogradConv2DFmr(WinogradConv2DFmr fmr); + +} // namespace linalg +} // namespace mlir + //===----------------------------------------------------------------------===// // Linalg Attributes //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgEnums.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgEnums.td index ce68afe471fe8..1109db973f522 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgEnums.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgEnums.td @@ -122,4 +122,19 @@ def TypeFn : I32EnumAttr<"TypeFn", "", [ let cppNamespace = "::mlir::linalg"; } +/// We use F(m, r) to define the size of minimal filtering algorithms. +/// m is the output dimension and r is the filter dimension. We can get +/// the input dimension, alpha, from the formula, alpha = m + r - 1. +/// +/// For example, when m = 2 and r = 3, we know its input size is 4. +/// The Conv2D will operate on 4x4 input data with 3x3 filter and get +/// 2x2 output result. +def WinogradConv2DFmr : I32EnumAttr<"WinogradConv2DFmr", "", [ + I32EnumAttrCase<"F_2_3", 0>, + I32EnumAttrCase<"F_4_3", 1>, + I32EnumAttrCase<"F_2_5", 2>, +]>{ + let cppNamespace = "mlir::linalg"; +} + #endif // LINALG_ENUMS diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td index 1b48bf5fcb237..7ff44c2e1d2ed 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -183,15 +183,13 @@ def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform", let arguments = (ins TensorRankOf<[AnyType], [4]>:$filter, TensorRankOf<[AnyType], [4]>:$output, - I64Attr:$m, - I64Attr:$r + WinogradConv2DFmr:$fmr ); let results = (outs TensorRankOf<[AnyType], [4]>:$result); let assemblyFormat = [{ attr-dict - `m` `(` $m `)` - `r` `(` $r `)` + `fmr` `(` $fmr `)` `ins` `(` $filter `:` type($filter) `)` `outs` `(` $output `:` type($output) `)` `->` type($result) @@ -254,15 +252,13 @@ def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform", let arguments = (ins TensorRankOf<[AnyType], [4]>:$input, TensorRankOf<[AnyType], [6]>:$output, - I64Attr:$m, - I64Attr:$r + WinogradConv2DFmr:$fmr ); let results = (outs TensorRankOf<[AnyType], [6]>:$result); let assemblyFormat = [{ attr-dict - `m` `(` $m `)` - `r` `(` $r `)` + `fmr` `(` $fmr `)` `ins` `(` $input `:` type($input) `)` `outs` `(` $output `:` type($output) `)` `->` type($result) @@ -343,15 +339,13 @@ def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform", let arguments = (ins TensorRankOf<[AnyType], [6]>:$value, TensorRankOf<[AnyType], [4]>:$output, - I64Attr:$m, - I64Attr:$r + WinogradConv2DFmr:$fmr ); let results = (outs TensorRankOf<[AnyType], [4]>:$result); let assemblyFormat = [{ attr-dict - `m` `(` $m `)` - `r` `(` $r `)` + `fmr` `(` $fmr `)` `ins` `(` $value `:` type($value) `)` `outs` `(` $output `:` type($output) `)` `->` type($result) diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 9d6ce653e285c..d64f94a49f781 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -9,6 +9,7 @@ #ifndef LINALG_TRANSFORM_OPS #define LINALG_TRANSFORM_OPS +include "mlir/Dialect/Linalg/IR/LinalgEnums.td" include "mlir/Dialect/Linalg/TransformOps/LinalgTransformEnums.td" include "mlir/Dialect/Transform/IR/TransformAttrs.td" include "mlir/Dialect/Transform/IR/TransformDialect.td" @@ -2902,8 +2903,7 @@ def WinogradConv2DOp : Op transposeBatchMatmul(RewriterBase &rewriter, /// F(m x m, r x r). m is the dimension size of output and r is the dimension /// size of filter. FailureOr winogradConv2D(RewriterBase &rewriter, - linalg::Conv2DNhwcFhwcOp op, int64_t m, - int64_t r); + linalg::Conv2DNhwcFhwcOp op, + WinogradConv2DFmr fmr); /// Rewrite linalg.winograd_filter_transform. The data layout of the filter is /// FHWC. The transformation matrix is 2-dimension. We need to extract H x W @@ -1968,8 +1969,8 @@ void populateBlockPackMatmulPatterns(RewritePatternSet &patterns, const ControlBlockPackMatmulFn &controlFn); /// Patterns to apply Winograd Conv2D algorithm F(m x m, r x r). -void populateWinogradConv2DPatterns(RewritePatternSet &patterns, int64_t m, - int64_t r); +void populateWinogradConv2DPatterns(RewritePatternSet &patterns, + WinogradConv2DFmr fmr); /// Patterns to decompose Winograd operators. void populateDecomposeWinogradOpsPatterns(RewritePatternSet &patterns); diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h index 7b43aa43c7517..3205da6e448fc 100644 --- a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h +++ b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h @@ -313,19 +313,23 @@ tileConsumerAndFuseProducersUsingSCF(RewriterBase &rewriter, TilingInterface consumer, const SCFTileAndFuseOptions &options); -/// Fuse the consumer of the source of `candidateSliceOp` by computing the -/// required slice of the consumer in-place. Note that the method -/// replaces the uses of `candidateSliceOp` with the tiled and fused consumer -/// value but does not delete the slice operation. +/// Fuse the consumer `candidateSlices` by computing the required slice of the +/// consumer in-place. All the entries of `candidateSlices` are expected to map +/// to the same consumer. The method returns an error if the consumer cannot be +/// tiled in a manner that is consistent for all the passed slices. Note that +/// the method replaces the uses of `candidateSlices` with the tiled and fused +/// consumer value but does not delete the slice operations. struct SCFFuseConsumerOfSliceResult { - OpOperand *origConsumerOperand; // Original untiled consumer's operand. - OpOperand - *tiledAndFusedConsumerOperand; // Tiled and fused consumer's operand. + // Original untiled consumer operands. + SmallVector origConsumerOperands; + // Tiled and fused consumer operands. + SmallVector tiledAndFusedConsumerOperands; SmallVector tiledOps; }; FailureOr -tileAndFuseConsumerOfSlice(RewriterBase &rewriter, Operation *candidateSliceOp, - MutableArrayRef loops); +tileAndFuseConsumerOfSlices(RewriterBase &rewriter, + ArrayRef candidateSlices, + MutableArrayRef loops); /// Method to lower an `op` that implements the `TilingInterface` to /// loops/scalars. diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h index 18981337742eb..87deef9ca7466 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h @@ -31,12 +31,16 @@ namespace tensor { FailureOr replaceExtractSliceWithTiledProducer( OpBuilder &builder, tensor::ExtractSliceOp sliceOp, OpResult producerOp); -/// Method to swap an `tensor.insert_slice` with its consumer when the -/// consumer implements the `TilingInterface`. +/// Method to swap `tensor.insert_slice`s with their consumers when the +/// consumer implements the `TilingInterface`. The size of `sliceOps` and +/// `consumerOperands` is expected to be the same. Every entry in +/// `consumerOperands` represents a use of the the corresponding +/// entry in `sliceOps` in the consumer. All entries of `consumerOperands` is +/// expected to be uses in the same consumer. FailureOr -replaceInsertSliceWithTiledConsumer(OpBuilder &builder, - OffsetSizeAndStrideOpInterface sliceOp, - OpOperand &consumerOp); +replaceInsertSlicesWithTiledConsumer(OpBuilder &builder, + ArrayRef sliceOps, + ArrayRef consumerOperands); //===----------------------------------------------------------------------===// // Populate functions. diff --git a/mlir/include/mlir/Dialect/Transform/DebugExtension/DebugExtensionOps.td b/mlir/include/mlir/Dialect/Transform/DebugExtension/DebugExtensionOps.td index 0275f241fda35..4a6898e36d343 100644 --- a/mlir/include/mlir/Dialect/Transform/DebugExtension/DebugExtensionOps.td +++ b/mlir/include/mlir/Dialect/Transform/DebugExtension/DebugExtensionOps.td @@ -20,7 +20,7 @@ include "mlir/Dialect/Transform/Interfaces/MatchInterfaces.td" include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.td" include "mlir/Dialect/Transform/IR/TransformDialect.td" -def DebugEmitRemarkAtOp : TransformDialectOp<"debug.emit_remark_at", +def EmitRemarkAtOp : TransformDialectOp<"debug.emit_remark_at", [MatchOpInterface, DeclareOpInterfaceMethods, MemoryEffectsOpInterface, NavigationTransformOpTrait]> { @@ -39,7 +39,7 @@ def DebugEmitRemarkAtOp : TransformDialectOp<"debug.emit_remark_at", let assemblyFormat = "$at `,` $message attr-dict `:` type($at)"; } -def DebugEmitParamAsRemarkOp +def EmitParamAsRemarkOp : TransformDialectOp<"debug.emit_param_as_remark", [MatchOpInterface, DeclareOpInterfaceMethods, diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index 02e62930a742d..d58ee84bee63d 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -2920,8 +2920,8 @@ def Vector_SplatOp : Vector_Op<"splat", [ ]> { let summary = "vector splat or broadcast operation"; let description = [{ - Broadcast the operand to all elements of the result vector. The operand is - required to be of integer/index/float type. + Broadcast the operand to all elements of the result vector. The type of the + operand must match the element type of the vector type. Example: @@ -2931,8 +2931,7 @@ def Vector_SplatOp : Vector_Op<"splat", [ ``` }]; - let arguments = (ins AnyTypeOf<[AnySignlessInteger, Index, AnyFloat], - "integer/index/float type">:$input); + let arguments = (ins AnyType:$input); let results = (outs AnyVectorOfAnyRank:$aggregate); let builders = [ diff --git a/mlir/include/mlir/IR/EnumAttr.td b/mlir/include/mlir/IR/EnumAttr.td index 3f7f747ac20d3..ff6cec6d41161 100644 --- a/mlir/include/mlir/IR/EnumAttr.td +++ b/mlir/include/mlir/IR/EnumAttr.td @@ -39,8 +39,11 @@ class EnumCase { class IntEnumAttrCaseBase : EnumCase, SignlessIntegerAttrBase { - let predicate = - CPred<"::llvm::cast<::mlir::IntegerAttr>($_self).getInt() == " # intVal>; + let predicate = CPred<[{ + ::llvm::cast<::mlir::IntegerAttr>($_self).getValue().eq(::llvm::APInt(}] + # intType.bitwidth # ", " + # intVal # + "))">; } // Cases of integer enums with a specific type. By default, the string diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h index 31f54413a5ff0..663c256c848df 100644 --- a/mlir/include/mlir/IR/OpDefinition.h +++ b/mlir/include/mlir/IR/OpDefinition.h @@ -272,7 +272,7 @@ class OpFoldResult : public PointerUnion { using PointerUnion::PointerUnion; public: - void dump() const { llvm::errs() << *this << "\n"; } + LLVM_DUMP_METHOD void dump() const { llvm::errs() << *this << "\n"; } MLIRContext *getContext() const { PointerUnion pu = *this; diff --git a/mlir/include/mlir/IR/Visitors.h b/mlir/include/mlir/IR/Visitors.h index 15abf2559e5c4..893f66ae33deb 100644 --- a/mlir/include/mlir/IR/Visitors.h +++ b/mlir/include/mlir/IR/Visitors.h @@ -14,6 +14,7 @@ #define MLIR_IR_VISITORS_H #include "mlir/Support/LLVM.h" +#include "mlir/Support/WalkResult.h" #include "llvm/ADT/STLExtras.h" namespace mlir { @@ -23,41 +24,6 @@ class Operation; class Block; class Region; -/// A utility result that is used to signal how to proceed with an ongoing walk: -/// * Interrupt: the walk will be interrupted and no more operations, regions -/// or blocks will be visited. -/// * Advance: the walk will continue. -/// * Skip: the walk of the current operation, region or block and their -/// nested elements that haven't been visited already will be skipped and will -/// continue with the next operation, region or block. -class WalkResult { - enum ResultEnum { Interrupt, Advance, Skip } result; - -public: - WalkResult(ResultEnum result = Advance) : result(result) {} - - /// Allow LogicalResult to interrupt the walk on failure. - WalkResult(LogicalResult result) - : result(failed(result) ? Interrupt : Advance) {} - - /// Allow diagnostics to interrupt the walk. - WalkResult(Diagnostic &&) : result(Interrupt) {} - WalkResult(InFlightDiagnostic &&) : result(Interrupt) {} - - bool operator==(const WalkResult &rhs) const { return result == rhs.result; } - bool operator!=(const WalkResult &rhs) const { return result != rhs.result; } - - static WalkResult interrupt() { return {Interrupt}; } - static WalkResult advance() { return {Advance}; } - static WalkResult skip() { return {Skip}; } - - /// Returns true if the walk was interrupted. - bool wasInterrupted() const { return result == Interrupt; } - - /// Returns true if the walk was skipped. - bool wasSkipped() const { return result == Skip; } -}; - /// Traversal order for region, block and operation walk utilities. enum class WalkOrder { PreOrder, PostOrder }; diff --git a/mlir/include/mlir/Interfaces/TilingInterface.td b/mlir/include/mlir/Interfaces/TilingInterface.td index 0de37338c95e4..0c0fc88aec95a 100644 --- a/mlir/include/mlir/Interfaces/TilingInterface.td +++ b/mlir/include/mlir/Interfaces/TilingInterface.td @@ -202,28 +202,28 @@ def TilingInterface : OpInterface<"TilingInterface"> { InterfaceMethod< /*desc=*/[{ Method to generate the tiled implementation of an operation that uses - exactly a tile of the given operand. + the exact tiles of the given operands. This method is required to allow operations to be "tiled and fused" - with an (already tiled) producer. Given a tile of the producer, this - method generates the tile of the consumer that uses exactly this - produced tile. In some sense it is the "reverse" of + with an (already tiled) producer. Given tiles of the producer, this + method generates the tile of the consumer that uses exactly these + produced tiles. In some sense it is the "reverse" of `generateResultTileValue`. - - `operandNumber` is the result of the producer used by the consumer. - - `offsets` is the offset of the slice of the producer result used by - the tiled implementation of the consumer. - - `sizes` is the size of the slice of the producer result used by the + - `operandNumbers` is the list of operands whose tiles are "producers". + - `allOffsets` is the offset of the slice of the producer used by the + tiled implementation of the consumer. + - `allSizes` is the size of the slice of the producer used by the consumer. - If it is illegal to fuse with a producer along the given operand for + If it is illegal to fuse with a producer along the given operand tiles for an operation, the implementation should return a failure. }], /*retType=*/"::mlir::FailureOr<::mlir::TilingResult>", - /*methodName=*/"getTiledImplementationFromOperandTile", + /*methodName=*/"getTiledImplementationFromOperandTiles", /*args=*/(ins "::mlir::OpBuilder &":$b, - "unsigned":$operandNumber, - "::mlir::ArrayRef<::mlir::OpFoldResult>":$offsets, - "::mlir::ArrayRef<::mlir::OpFoldResult>":$sizes), + "::mlir::ArrayRef":$operandNumbers, + "::mlir::ArrayRef<::mlir::SmallVector<::mlir::OpFoldResult>>":$allOffsets, + "::mlir::ArrayRef<::mlir::SmallVector<::mlir::OpFoldResult>>":$allSizes), /*methodBody=*/"", /*defaultImplementation=*/[{ return failure(); @@ -235,16 +235,17 @@ def TilingInterface : OpInterface<"TilingInterface"> { tile of the operand. This method is required to allow operations to be "tiled and fused" - with an (already tiled) producer. Given a tile of an operand, - returns the tile of the iteration space that uses this tile. - - `operandNumber` is the result of the producer used by the consumer. - - `offsets` is the offset of the slice of the producer result used by + with an (already tiled) producer. Given tiles of operands, + returns the tile of the iteration space that uses these tiles. + - `operandNumbers` is the list of operands whose tiles are "produced" + by the producer(s). + - `allOffsets` is the offset of the slice of the producers used by the tiled implementation of the consumer. - - `sizes` is the size of the slice of the producer result used by the + - `allSizes` is the size of the slice of the producers used by the consumer. - If it is illegal to fuse with a producer along the given operand for - an operation, or if this mapping cannot be computed, the - implementation should return a failure. + If it is illegal to fuse with the producer slices for an operation, + or if this mapping cannot be computed, the implementation should + return a failure. Note that unlike the "tile consumer and fuse producer" case, the "tile producer and fuse consumer" requires an additional method to get @@ -285,17 +286,17 @@ def TilingInterface : OpInterface<"TilingInterface"> { transformation. It does not provide guarantees on whether such a transformation is profitable. - For most cases `getTiledImplementationFromOperandTile` could be a - implemented using `getIterationDomainTileFromOperandTile` + + For most cases `getTiledImplementationFromOperandTiles` could be a + implemented using `getIterationDomainTileFromOperandTiles` + `getTiledImplementation` methods. }], /*retType=*/"::llvm::LogicalResult", - /*methodName=*/"getIterationDomainTileFromOperandTile", + /*methodName=*/"getIterationDomainTileFromOperandTiles", /*args=*/(ins "::mlir::OpBuilder &":$b, - "unsigned":$operandNumber, - "::mlir::ArrayRef<::mlir::OpFoldResult> ":$offsets, - "::mlir::ArrayRef<::mlir::OpFoldResult> ":$sizes, + "::mlir::ArrayRef":$operandNumbers, + "::mlir::ArrayRef<::mlir::SmallVector<::mlir::OpFoldResult>> ":$allOffsets, + "::mlir::ArrayRef<::mlir::SmallVector<::mlir::OpFoldResult>> ":$allSizes, "::mlir::SmallVectorImpl<::mlir::OpFoldResult> &":$iterDomainOffsets, "::mlir::SmallVectorImpl<::mlir::OpFoldResult> &":$iterDomainSizes), /*methodBody=*/"", diff --git a/mlir/include/mlir/IR/StateStack.h b/mlir/include/mlir/Support/StateStack.h similarity index 96% rename from mlir/include/mlir/IR/StateStack.h rename to mlir/include/mlir/Support/StateStack.h index 6a22e3b0d00a4..ef0f5d198b456 100644 --- a/mlir/include/mlir/IR/StateStack.h +++ b/mlir/include/mlir/Support/StateStack.h @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_IR_STACKFRAME_H -#define MLIR_IR_STACKFRAME_H +#ifndef MLIR_SUPPORT_STACKFRAME_H +#define MLIR_SUPPORT_STACKFRAME_H -#include "mlir/IR/Visitors.h" #include "mlir/Support/TypeID.h" +#include "mlir/Support/WalkResult.h" #include namespace mlir { @@ -125,4 +125,4 @@ struct isa_impl { }; } // namespace llvm -#endif // MLIR_IR_STACKFRAME_H +#endif // MLIR_SUPPORT_STACKFRAME_H diff --git a/mlir/include/mlir/Support/WalkResult.h b/mlir/include/mlir/Support/WalkResult.h new file mode 100644 index 0000000000000..cd3b1e1562796 --- /dev/null +++ b/mlir/include/mlir/Support/WalkResult.h @@ -0,0 +1,59 @@ +//===- WalkResult.h - Status of completed walk ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Result kind for completed walk. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_SUPPORT_WALKRESULT_H +#define MLIR_SUPPORT_WALKRESULT_H + +#include "mlir/Support/LLVM.h" + +namespace mlir { +class Diagnostic; +class InFlightDiagnostic; + +/// A utility result that is used to signal how to proceed with an ongoing walk: +/// * Interrupt: the walk will be interrupted and no more operations, regions +/// or blocks will be visited. +/// * Advance: the walk will continue. +/// * Skip: the walk of the current operation, region or block and their +/// nested elements that haven't been visited already will be skipped and will +/// continue with the next operation, region or block. +class WalkResult { + enum ResultEnum { Interrupt, Advance, Skip } result; + +public: + WalkResult(ResultEnum result = Advance) : result(result) {} + + /// Allow LogicalResult to interrupt the walk on failure. + WalkResult(LogicalResult result) + : result(failed(result) ? Interrupt : Advance) {} + + /// Allow diagnostics to interrupt the walk. + WalkResult(Diagnostic &&) : result(Interrupt) {} + WalkResult(InFlightDiagnostic &&) : result(Interrupt) {} + + bool operator==(const WalkResult &rhs) const { return result == rhs.result; } + bool operator!=(const WalkResult &rhs) const { return result != rhs.result; } + + static WalkResult interrupt() { return {Interrupt}; } + static WalkResult advance() { return {Advance}; } + static WalkResult skip() { return {Skip}; } + + /// Returns true if the walk was interrupted. + bool wasInterrupted() const { return result == Interrupt; } + + /// Returns true if the walk was skipped. + bool wasSkipped() const { return result == Skip; } +}; + +} // namespace mlir + +#endif diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h index 197be5f30b5b0..79e8bb6add0da 100644 --- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h @@ -16,9 +16,9 @@ #include "mlir/Dialect/LLVMIR/LLVMInterfaces.h" #include "mlir/IR/Operation.h" -#include "mlir/IR/StateStack.h" #include "mlir/IR/SymbolTable.h" #include "mlir/IR/Value.h" +#include "mlir/Support/StateStack.h" #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Target/LLVMIR/LLVMTranslationInterface.h" #include "mlir/Target/LLVMIR/TypeToLLVM.h" diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp index bd2846ac388fd..945d38e929e08 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp @@ -991,7 +991,7 @@ void PDLToPDLInterpPass::runOnOperation() { module.getLoc(), pdl_interp::PDLInterpDialect::getMatcherFunctionName(), builder.getFunctionType(builder.getType(), /*results=*/{}), - /*attrs=*/std::nullopt); + /*attrs=*/ArrayRef()); // Create a nested module to hold the functions invoked for rewriting the IR // after a successful match. diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 923f5f67b865a..c2be08ef40f21 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -310,8 +310,8 @@ static Value createLinalgBodyCalculationForElementwiseOp( auto shifted = rewriter.create(loc, resultTypes, args[0], subtract) ->getResults(); - auto truncated = - rewriter.create(loc, i1Ty, shifted, std::nullopt); + auto truncated = rewriter.create( + loc, i1Ty, shifted, ArrayRef()); auto isInputOdd = rewriter.create(loc, i1Ty, truncated, i1one); @@ -552,20 +552,20 @@ static Value createLinalgBodyCalculationForElementwiseOp( if (isa(srcTy) && isa(dstTy) && bitExtend) return rewriter.create(loc, resultTypes, args, - std::nullopt); + ArrayRef()); if (isa(srcTy) && isa(dstTy) && !bitExtend) return rewriter.create(loc, resultTypes, args, - std::nullopt); + ArrayRef()); // 1-bit integers need to be treated as signless. if (srcTy.isInteger(1) && arith::UIToFPOp::areCastCompatible(srcTy, dstTy)) return rewriter.create(loc, resultTypes, args, - std::nullopt); + ArrayRef()); if (srcTy.isInteger(1) && isa(dstTy) && bitExtend) return rewriter.create(loc, resultTypes, args, - std::nullopt); + ArrayRef()); // Unsigned integers need an unrealized cast so that they can be passed // to UIToFP. @@ -583,7 +583,7 @@ static Value createLinalgBodyCalculationForElementwiseOp( // All other si-to-fp conversions should be handled by SIToFP. if (arith::SIToFPOp::areCastCompatible(srcTy, dstTy)) return rewriter.create(loc, resultTypes, args, - std::nullopt); + ArrayRef()); // Casting to boolean, floats need to only be checked as not-equal to zero. if (isa(srcTy) && dstTy.isInteger(1)) { @@ -690,7 +690,7 @@ static Value createLinalgBodyCalculationForElementwiseOp( if (isa(srcTy) && isa(dstTy) && bitExtend) return rewriter.create(loc, resultTypes, args, - std::nullopt); + ArrayRef()); if (isa(srcTy) && isa(dstTy) && !bitExtend) { return rewriter.create(loc, dstTy, args[0]); diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp index 293e01a5bf4d4..67c0eca15638a 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp @@ -85,7 +85,7 @@ void ConvertVectorToLLVMPass::runOnOperation() { populateVectorGatherLoweringPatterns(patterns); if (armI8MM) { if (armNeon) - arm_neon::populateLowerContractionToSMMLAPatternPatterns(patterns); + arm_neon::populateLowerContractionToNeonI8MMPatternPatterns(patterns); if (armSVE) populateLowerContractionToSVEI8MMPatternPatterns(patterns); } diff --git a/mlir/lib/Dialect/ArmNeon/TransformOps/ArmNeonVectorTransformOps.cpp b/mlir/lib/Dialect/ArmNeon/TransformOps/ArmNeonVectorTransformOps.cpp index e81fc6a8b5980..d07e6a52d8b5f 100644 --- a/mlir/lib/Dialect/ArmNeon/TransformOps/ArmNeonVectorTransformOps.cpp +++ b/mlir/lib/Dialect/ArmNeon/TransformOps/ArmNeonVectorTransformOps.cpp @@ -20,7 +20,7 @@ using namespace mlir; void transform::ApplyArmNeonContractionToI8MMPatternsOp::populatePatterns( RewritePatternSet &patterns) { - arm_neon::populateLowerContractionToSMMLAPatternPatterns(patterns); + arm_neon::populateLowerContractionToNeonI8MMPatternPatterns(patterns); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/ArmNeon/Transforms/CMakeLists.txt b/mlir/lib/Dialect/ArmNeon/Transforms/CMakeLists.txt index 84fb1b0116d2a..06bafde451cbb 100644 --- a/mlir/lib/Dialect/ArmNeon/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/ArmNeon/Transforms/CMakeLists.txt @@ -1,5 +1,5 @@ add_mlir_dialect_library(MLIRArmNeonTransforms - LowerContractionToSMMLAPattern.cpp + LowerContractionToNeonI8MMPattern.cpp DEPENDS MLIRArmNeonIncGen diff --git a/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp b/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToNeonI8MMPattern.cpp similarity index 59% rename from mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp rename to mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToNeonI8MMPattern.cpp index 5ce3d2b28aeb3..7180884c77e98 100644 --- a/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp +++ b/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToNeonI8MMPattern.cpp @@ -1,4 +1,4 @@ -//===- LowerContractionToSMMLAPattern.cpp - Contract to SMMLA ---*- C++ -*-===// +//===- LowerContractionToNeonI8MMPattern.cpp - Contract to I8MM -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,10 +6,15 @@ // //===----------------------------------------------------------------------===// // -// This file implements lowering patterns from vector.contract to -// arm_neon.intr.smmla +// This file implements lowering patterns from vector.contract to operations +// that map to instructions from the Neon FEAT_I8MM extension. // -//===--- +// TODO: There may be opportunities to unify this with a similar pattern +// for SVE. See: +// https://github.com/llvm/llvm-project/issues/145559 +// LowerContractionToSVEI8MMPattern.cpp +// +//===----------------------------------------------------------------------===// #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/ArmNeon/ArmNeonDialect.h" @@ -37,12 +42,87 @@ static Type matchContainerType(Type element, Type container) { return element; } +// Get the operand of a `vector.contract`. This function is intended to abstract +// away from the particular way a value is extended before feeding it into the +// `vector.contract` - via zero-extend or an explicit or implicit sign-extend +// (for implicit sign-extension see `vector.contract` documentation). +// +// The template parameter `Op` indicates the extension operation (explicit or +// implicit) for which we are checking. +// +// Return success only for extensions from `iN` (N <= 8) to `i32`. +template +std::optional getExtOperand(Value v) { + + static_assert(llvm::is_one_of::value, + "Must be instantiated with either sign- or zero- extension op"); + + // If the operand is not defined by an explicit extend operation of the + // accepted operation type allow for an implicit sign-extension. + auto extOp = dyn_cast_or_null(v.getDefiningOp()); + if (!extOp) { + if constexpr (std::is_same::value) { + auto eltTy = cast(v.getType()).getElementType(); + if (!eltTy.isSignlessInteger() || eltTy.getIntOrFloatBitWidth() > 8) + return {}; + return v; + } + return {}; + } + + // If the operand is defined by an explicit extend operation of the accepted + // operation type, check it's extended from `iN` (N <= 8) to `i32`. + auto inOp = extOp.getIn(); + auto inTy = dyn_cast(inOp.getType()); + if (!inTy) + return {}; + auto inEltTy = inTy.getElementType(); + if (!inEltTy.isSignlessInteger() || inEltTy.getIntOrFloatBitWidth() > 8) + return {}; + + auto outTy = dyn_cast(extOp.getType()); + if (!(outTy && outTy.getElementType().isSignlessInteger(32))) + return {}; + + return inOp; +} + +// Designate the operation (resp. instruction) used to do sub-tile matrix +// multiplications. +enum class MMLA { + Signed, // smmla + Unsigned, // ummla + Mixed, // usmmla + MixedSwapped // usmmla with LHS and RHS swapped +}; + +// Create the matrix mulitply and accumulate operation according to `op`. +Value createMMLA(PatternRewriter &rewriter, MMLA op, Location loc, + mlir::Type accType, Value acc, Value lhs, Value rhs) { + switch (op) { + case MMLA::Signed: + return rewriter.createOrFold(loc, accType, acc, lhs, + rhs); + case MMLA::Unsigned: + return rewriter.createOrFold(loc, accType, acc, lhs, + rhs); + case MMLA::Mixed: + return rewriter.createOrFold(loc, accType, acc, lhs, + rhs); + case MMLA::MixedSwapped: + // The accumulator comes transposed and the result will be transposed + // later, so all we have to do here is swap the operands. + return rewriter.createOrFold(loc, accType, acc, rhs, + lhs); + } +} + /// Lowering from a vector::contractOp arm neon smmla intrinsic. This will tile /// any vector.contract into multiple smmla instructions with unrolling so long /// as [2,2,8] is a divisor of its shape. It can also process vecmats with dimM /// = 1 (either explicitly or inferred if LHS has only dimK) If no unrolling is /// necessary, a single smmla instruction is emitted. -class LowerContractionToSMMLAPattern +class LowerContractionToNeonI8MMPattern : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; @@ -88,39 +168,64 @@ class LowerContractionToSMMLAPattern return failure(); } - // Check two extsi inputs Rhs Lhs for contract. - arith::ExtSIOp origLhsExtOp = - dyn_cast_or_null(op.getLhs().getDefiningOp()); - arith::ExtSIOp origRhsExtOp = - dyn_cast_or_null(op.getRhs().getDefiningOp()); - if (!origLhsExtOp || !origRhsExtOp) { + // Check inputs are sign-/zero- extensions from iN (N <= 8) to i32. Get the + // values before the extension. All four signed/unsigned combinations for + // input operands are supported, but they are lowered to different + // operations. Determine which is the appropriate operation to lower to. + MMLA mmlaOp = MMLA::Signed; + auto maybeLhs = getExtOperand(op.getLhs()); + if (!maybeLhs) { + mmlaOp = MMLA::Unsigned; + maybeLhs = getExtOperand(op.getLhs()); + } + if (!maybeLhs) return failure(); + + auto maybeRhs = getExtOperand(op.getRhs()); + if (maybeRhs) { + if (mmlaOp == MMLA::Unsigned) + mmlaOp = MMLA::Mixed; + } else { + if (mmlaOp == MMLA::Signed) + mmlaOp = MMLA::MixedSwapped; + maybeRhs = getExtOperand(op.getRhs()); } + if (!maybeRhs) + return failure(); + + Value origLhs = *maybeLhs; + Value origRhs = *maybeRhs; // Match any iX to i32 for X<8 then turn into an i8 output. Feed into // following neon instruction. Check inputs for extsi are <=i8 - Value extsiLhs; - Value extsiRhs; - if (auto lhsExtInType = - dyn_cast(origLhsExtOp.getIn().getType())) { + Value extLhs; + Value extRhs; + if (auto lhsExtInType = dyn_cast(origLhs.getType())) { if (lhsExtInType.getElementTypeBitWidth() <= 8) { Type targetLhsExtTy = matchContainerType(rewriter.getI8Type(), lhsExtInType); - extsiLhs = rewriter.createOrFold(loc, targetLhsExtTy, - origLhsExtOp.getIn()); + if (mmlaOp == MMLA::Signed || mmlaOp == MMLA::Mixed) + extLhs = rewriter.createOrFold(loc, targetLhsExtTy, + origLhs); + else + extLhs = rewriter.createOrFold(loc, targetLhsExtTy, + origLhs); } } - if (auto rhsExtInType = - dyn_cast(origRhsExtOp.getIn().getType())) { + if (auto rhsExtInType = dyn_cast(origRhs.getType())) { if (rhsExtInType.getElementTypeBitWidth() <= 8) { Type targetRhsExtTy = matchContainerType(rewriter.getI8Type(), rhsExtInType); - extsiRhs = rewriter.createOrFold(loc, targetRhsExtTy, - origRhsExtOp.getIn()); + if (mmlaOp == MMLA::Unsigned || mmlaOp == MMLA::Mixed) + extRhs = rewriter.createOrFold(loc, targetRhsExtTy, + origRhs); + else + extRhs = rewriter.createOrFold(loc, targetRhsExtTy, + origRhs); } } - if (!extsiLhs || !extsiRhs) { + if (!extLhs || !extRhs) { return failure(); } @@ -155,11 +260,11 @@ class LowerContractionToSMMLAPattern AffineMap lhsPermutationMap = op.getIndexingMapsArray()[0]; SmallVector lhsOffsets = applyPermutationMap(lhsPermutationMap, ArrayRef(offsets)); - Value tiledLhs = extractOperand(extsiLhs, lhsPermutationMap, lhsOffsets); + Value tiledLhs = extractOperand(extLhs, lhsPermutationMap, lhsOffsets); AffineMap rhsPermutationMap = op.getIndexingMapsArray()[1]; SmallVector rhsOffsets = applyPermutationMap(rhsPermutationMap, ArrayRef(offsets)); - Value tiledRhs = extractOperand(extsiRhs, rhsPermutationMap, rhsOffsets); + Value tiledRhs = extractOperand(extRhs, rhsPermutationMap, rhsOffsets); AffineMap accPermutationMap = op.getIndexingMapsArray()[2]; SmallVector accOffsets = applyPermutationMap(accPermutationMap, ArrayRef(offsets)); @@ -191,6 +296,13 @@ class LowerContractionToSMMLAPattern tiledAcc = expandForSMMLA(tiledAcc, outputExpandedType); } + // Transpose ACC if doing signed by unsigned multiplication, because we're + // using the instruction for unsigned by signed multiplication with + // reversed operands. + if (mmlaOp == MMLA::MixedSwapped) + tiledAcc = rewriter.create( + loc, tiledAcc, ArrayRef({1, 0})); + // Collapse tiled operands to 1D vectors required by smmla intrinsic auto collapsedInputType = VectorType::get(inputExpandedType.getNumElements(), inputElementType); @@ -211,15 +323,21 @@ class LowerContractionToSMMLAPattern } // Insert contract op - kAcc = rewriter.createOrFold( - op.getLoc(), collapsedRes.getType(), collapsedRes, collapsedLhs, - collapsedRhs); + kAcc = createMMLA(rewriter, mmlaOp, op.getLoc(), collapsedRes.getType(), + collapsedRes, collapsedLhs, collapsedRhs); // Reshape output back to 2D Value tiledRes = rewriter.createOrFold( kAcc.getLoc(), tiledAcc.getType(), kAcc); - // With vecmat, only one row of tiled ACC can be inserted into file result + // Because of the reversed operands the result is obtained transposed. + // Transpose it back, + if (mmlaOp == MMLA::MixedSwapped) + tiledRes = rewriter.create( + loc, tiledRes, ArrayRef({1, 0})); + + // With vecmat, only one row of tiled ACC can be inserted into the final + // result if (isVecmat) { tiledRes = rewriter.createOrFold(loc, tiledRes, 0); } @@ -239,8 +357,8 @@ class LowerContractionToSMMLAPattern } // namespace -void mlir::arm_neon::populateLowerContractionToSMMLAPatternPatterns( +void mlir::arm_neon::populateLowerContractionToNeonI8MMPatternPatterns( RewritePatternSet &patterns) { MLIRContext *context = patterns.getContext(); - patterns.add(context, /*benefit=*/2); + patterns.add(context, /*benefit=*/2); } diff --git a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp index 95965872f4098..1e8e1265affa0 100644 --- a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp +++ b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp @@ -724,59 +724,6 @@ struct LiftIllegalVectorTransposeToMemory } }; -/// A rewrite to turn unit dim transpose-like vector.shape_casts into -/// vector.transposes. The shape_cast has to be from an illegal vector type to a -/// legal one (as defined by isLegalVectorType). -/// -/// The reasoning for this is if we've got to this pass and we still have -/// shape_casts of illegal types, then they likely will not cancel out. Turning -/// them into transposes gives LiftIllegalVectorTransposeToMemory a chance to -/// eliminate them. -/// -/// Example: -/// -/// BEFORE: -/// ```mlir -/// %0 = vector.shape_cast %a : vector<[4]x1xf32> to vector<1x[4]xf32> -/// ``` -/// -/// AFTER: -/// ```mlir -/// %0 = vector.transpose %0, [1, 0] : vector<[4]x1xf32> to vector<1x[4]xf32> -/// ``` -struct ConvertIllegalShapeCastOpsToTransposes - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::ShapeCastOp shapeCastOp, - PatternRewriter &rewriter) const override { - auto sourceType = shapeCastOp.getSourceVectorType(); - auto resultType = shapeCastOp.getResultVectorType(); - if (isLegalVectorType(sourceType) || !isLegalVectorType(resultType)) - return rewriter.notifyMatchFailure(shapeCastOp, - kMatchFailureNotIllegalToLegal); - - // Note: If we know that `sourceType` is an illegal vector type (and 2D) - // then dim 0 is scalable and dim 1 is fixed. - if (sourceType.getRank() != 2 || sourceType.getDimSize(1) != 1) - return rewriter.notifyMatchFailure( - shapeCastOp, "expected source to be a 2D scalable vector with a " - "trailing unit dim"); - - auto loc = shapeCastOp.getLoc(); - auto transpose = rewriter.create( - loc, shapeCastOp.getSource(), ArrayRef{1, 0}); - - if (resultType.getRank() == 1) - rewriter.replaceOpWithNewOp(shapeCastOp, resultType, - transpose); - else - rewriter.replaceOp(shapeCastOp, transpose); - - return success(); - } -}; - /// Rewrites an illegal/unsupported SVE transfer_write(transpose) to instead use /// the ZA state. This workaround rewrite to support these transposes when ZA is /// available. @@ -920,6 +867,116 @@ struct LowerIllegalTransposeStoreViaZA } }; +/// Lower `vector.transfer_read` of a scalable column to `scf::for` +/// +/// Lowers a "read" of a scalable column from a MemRef for which there is no +/// hardware pperation that we could use to a loop over the rows to read and +/// loads one element at a time. +/// +/// BEFORE: +/// ``` +/// %res = vector.transfer_read %mem[%a, %b] (...) +/// : memref, vector<[4]x1xf32> +/// ``` +/// +/// AFTER: +/// ``` +/// %cst = arith.constant (...) : vector<[4]xf32> +/// %vscale = vector.vscale +/// %c4_vscale = arith.muli %vscale, %c4 : index +/// %scf = scf.for %lb = %c0 to %c4_vscale step %c1 iter_args(%arg4 = %cst) +/// -> (vector<[4]xf32>) { +/// +/// %load = memref.load %mem[%arg3 + %a, %b] : memref +/// %vec = vector.insert %load, %cst [%arg3] : f32 into vector<[4]xf32> +/// scf.yield %vec : vector<[4]xf32> +/// } +/// %res = vector.shape_cast %scf : vector<[4]xf32> to vector<[4]x1xf32> +/// ``` +/// +/// TODO: This transformation isn't specific to SME - move it to the SVE +/// dialect. +/// TODO: Check the in_bounds attribute and generate vector.maskedload if +/// required. +struct LowerColumnTransferReadToLoops + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, + PatternRewriter &rewriter) const override { + // NOTE: This is a fairly low-level transformation, so we shouldn't be + // adding support for Tensors without good rationale. + if (readOp.hasPureTensorSemantics()) + return rewriter.notifyMatchFailure( + readOp, "Tensor semantics are unsupported (either bufferize or " + "extend this pattern)"); + + auto resType = readOp.getVectorType(); + + if (resType.getRank() != 2) + return rewriter.notifyMatchFailure(readOp, + "Only 2D vectors are supported!"); + + if (resType.getShape()[1] != 1) + return rewriter.notifyMatchFailure( + readOp, "The trailing output dim is != 1 (not supported ATM)"); + + if (!resType.getScalableDims()[0] || resType.getScalableDims()[1]) + return rewriter.notifyMatchFailure( + readOp, "Expected the leading dim to be scalable and the trailing " + "dim to be fixed."); + + // Create new result type - similar to the original vector with the + // trailing unit dim collapsed. + int64_t numRows = resType.getShape()[0]; + VectorType newResType = VectorType::get(numRows, resType.getElementType(), + /*scalableDims=*/{true}); + + // Create a loop over all rows and load one element at a time. + auto loc = readOp.getLoc(); + auto lowerBound = rewriter.create(loc, 0); + auto createVscaleMultiple = + vector::makeVscaleConstantBuilder(rewriter, loc); + auto upperBound = createVscaleMultiple(numRows); + auto step = rewriter.create(loc, 1); + Value init = rewriter.create( + loc, newResType, DenseElementsAttr::get(newResType, 0.0f)); + + scf::ForOp loadLoop; + { + OpBuilder::InsertionGuard g(rewriter); + loadLoop = rewriter.create(loc, lowerBound, upperBound, step, + ValueRange{init}); + rewriter.setInsertionPointToStart(loadLoop.getBody()); + + auto tileSliceIndex = loadLoop.getInductionVar(); + + auto idx0 = rewriter.create(loc, tileSliceIndex, + readOp.getIndices()[0]); + auto idx1 = readOp.getIndices()[1]; + + Value scalar = rewriter.create( + loc, readOp.getBase(), SmallVector({idx0, idx1})); + + Operation *updateInit = rewriter.create( + loc, scalar, loadLoop.getRegionIterArg(0), tileSliceIndex); + + rewriter.create(loc, updateInit->getResult(0)); + } + + // The read operation has been "legalized", but since the original result + // type was a 2D vector, we need to cast before returning the result. This + // ShapeCast should cancel-out with some other ShapeCast (i.e. it's a + // no-op). + auto sc = rewriter.create( + loc, readOp.getResult().getType(), loadLoop.getResult(0)); + + rewriter.replaceOp(readOp, sc); + + return success(); + } +}; + struct VectorLegalizationPass : public arm_sme::impl::VectorLegalizationBase { void runOnOperation() override { @@ -941,10 +998,10 @@ struct VectorLegalizationPass // Apply preprocessing patterns. RewritePatternSet rewritePatterns(context); - rewritePatterns.add(context); + rewritePatterns + .add(context); if (failed( applyPatternsGreedily(getOperation(), std::move(rewritePatterns)))) return signalPassFailure(); diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp index d2ac850a5f70b..d52ff4d4257c7 100644 --- a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp +++ b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp @@ -298,16 +298,156 @@ struct LegalizeSVEMaskLoadConversion : public OpRewritePattern { } }; +/// Transforms a `transfer_read` operation so it reads vector of a type that +/// can be mapped to an LLVM type ("LLVM-legal" type). This is done by +/// collapsing trailing dimensions so we obtain a vector type with a single +/// scalable dimension in the rightmost position. +/// +/// Example: +/// ``` +/// %v = vector.transfer_read %M[%i, %j, %c0, %c0], %c0_i8 +/// {in_bounds = [false, true, true, true]} +/// : memref, vector<2x[4]x2x8xi8> +/// ``` +/// is rewritten to +/// ``` +/// %collapse_shape = memref.collapse_shape %M [[0], [1, 2, 3]] +/// : memref into memref +/// %0 = vector.transfer_read %collapse_shape[%i, %j], %c0_i8 +/// {in_bounds = [false, true]} +/// : memref, vector<2x[64]xi8> +/// %1 = vector.shape_cast %0 : vector<2x[64]xi8> to vector<2x[4]x2x8xi8> +/// ``` +struct LegalizeTransferRead : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, + PatternRewriter &rewriter) const override { + + // Do not try to transform masked reads. For example, if we have a transfer + // to a `vector<[4]x4xi8>` we could have a mask like + // 1 1 1 0 + // 1 1 1 0 + // 1 1 1 0 + // 0 0 0 0 + // Flattening this mask would look like + // 1 1 1 0 1 1 1 0 1 1 1 0 0 0 0 0 + // and we have not yet figured out an efficient way to build such a mask, + // neither from the mask operand, nor from the original `vector.create_mask` + // operation (if visible at all). + if (readOp.isMasked() || readOp.getMask()) + return rewriter.notifyMatchFailure(readOp, + "masked transfers not-supported"); + + // General permutation maps are not supported. The issue is with transpose, + // broadcast, and other forms of non-identify mapping in the minor + // dimensions which is impossible to represent after collapsing (at least + // because the resulting "collapsed" maps would have smaller number of + // dimension indices). + // TODO: We have not had yet the need for it, but some forms of permutation + // maps with identity in the minor dimensions voukld be supported, for + // example `(i, j, k, p) -> (j, i, k, p)` where we need to collapse only `k` + // and `p`. + if (!readOp.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(readOp, "non-identity permutation"); + + // We handle transfers of vectors with rank >= 2 and a single scalable + // dimension. This transformation aims to transform an LLVM-illegal type + // into an LLVM-legal type and one dimensional vectors are already + // LLVM-legal, even if scalable. A value of a vector type with more than one + // scalable dimension is impossible to represent using a vector type with no + // scalable dimensions or a single one. For example a `vector<[4]x[4]xi8>` + // would have `4 * 4 * vscale * vscale` elements and this quantity is + // impossible to represent as `N` or `N * vscale` (where `N` is a constant). + VectorType origVT = readOp.getVectorType(); + ArrayRef origScalableDims = origVT.getScalableDims(); + const int64_t origVRank = origVT.getRank(); + if (origVRank < 2 || origVT.getNumScalableDims() != 1) + return rewriter.notifyMatchFailure(readOp, "wrong dimensions"); + + // Number of trailing dimensions to collapse, including the scalable + // dimension. Nothing to do if the single scalable dimension is already the + // last one. + const int64_t numCollapseDims = std::distance( + llvm::find(origScalableDims, true), origScalableDims.end()); + if (numCollapseDims < 2) + return rewriter.notifyMatchFailure(readOp, + "scalable dimension is trailing"); + + // We want a simple memref (not a tensor) with contiguous elements for at + // least all the trailing dimensions up to and including the scalable one. + auto memTy = dyn_cast(readOp.getBase().getType()); + if (!(memTy && memTy.areTrailingDimsContiguous(numCollapseDims))) + return rewriter.notifyMatchFailure( + readOp, "non-contiguous memref dimensions to collapse"); + + // The dimensions to collapse (excluding the scalable one) of the vector and + // the memref must match. A dynamic memref dimension is considered + // non-matching. The transfers from the dimensions to collapse must be + // in-bounds (it follows the corresponding indices would be zero). This + // guarantees that the operation transfers a contiguous block + // and no padding is necessary. + if (!llvm::equal(memTy.getShape().take_back(numCollapseDims - 1), + origVT.getShape().take_back(numCollapseDims - 1))) + return rewriter.notifyMatchFailure( + readOp, "memref and vector dimensions do not match"); + + SmallVector origInBounds = readOp.getInBoundsValues(); + if (!llvm::all_of( + ArrayRef(origInBounds).take_back(numCollapseDims - 1), + [](bool v) { return v; })) + return rewriter.notifyMatchFailure( + readOp, "out-of-bounds transfer from a dimension to collapse"); + + // Collapse the trailing dimensions of the memref. + SmallVector reassoc; + for (int64_t i = 0; i < memTy.getRank() - numCollapseDims + 1; ++i) + reassoc.push_back({i}); + for (int64_t i = memTy.getRank() - numCollapseDims + 1; i < memTy.getRank(); + ++i) + reassoc.back().push_back(i); + if (!memref::CollapseShapeOp::isGuaranteedCollapsible(memTy, reassoc)) + return failure(); + Value collapsedMem = rewriter.create( + readOp.getLoc(), readOp.getBase(), reassoc); + + // Get a vector type with collapsed trailing dimensions. + SmallVector shape(origVT.getShape()); + for (int64_t i = origVRank - numCollapseDims + 1; i < origVRank; ++i) + shape[origVRank - numCollapseDims] *= shape[i]; + shape.pop_back_n(numCollapseDims - 1); + auto collapsedVT = + VectorType::get(shape, origVT.getElementType(), + origScalableDims.drop_back(numCollapseDims - 1)); + + // Drop the extra (zero) indices. + auto indices = readOp.getIndices().drop_back(numCollapseDims - 1); + + // Create the new `transfer_read`. + auto newReadOp = rewriter.create( + readOp.getLoc(), collapsedVT, collapsedMem, indices, + ArrayRef(origInBounds).drop_back(numCollapseDims - 1)); + + // Cast back to the original vector type. + auto toOrigShape = rewriter.create(readOp.getLoc(), + origVT, newReadOp); + + rewriter.replaceOp(readOp, toOrigShape); + return success(); + } +}; + } // namespace void mlir::arm_sve::populateLegalizeVectorStoragePatterns( RewritePatternSet &patterns) { - patterns.add, - LegalizeSVEMaskAllocation, - LegalizeSVEMaskTypeCastConversion, - LegalizeSVEMaskStoreConversion, LegalizeSVEMaskLoadConversion>( - patterns.getContext()); + patterns + .add, + LegalizeSVEMaskAllocation, + LegalizeSVEMaskTypeCastConversion, LegalizeSVEMaskStoreConversion, + LegalizeSVEMaskLoadConversion, LegalizeTransferRead>( + patterns.getContext()); } namespace { diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/LowerContractionToSVEI8MMPattern.cpp b/mlir/lib/Dialect/ArmSVE/Transforms/LowerContractionToSVEI8MMPattern.cpp index a1209fe8230e2..b7703ff0393eb 100644 --- a/mlir/lib/Dialect/ArmSVE/Transforms/LowerContractionToSVEI8MMPattern.cpp +++ b/mlir/lib/Dialect/ArmSVE/Transforms/LowerContractionToSVEI8MMPattern.cpp @@ -1,4 +1,4 @@ -//===- LowerContractionToSMMLAPattern.cpp - Contract to SMMLA ---*- C++ -*-===// +//===- LowerContractionToSVEI8MMPattern.cpp - Contract to I8MM --*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,6 +9,11 @@ // This file implements lowering patterns from vector.contract to operations // that map to instructions from the SVE FEAT_I8MM extension. // +// TODO: There may be opportunities to unify this with a similar pattern +// for Neon. See: +// https://github.com/llvm/llvm-project/issues/145559 +// LowerContractionToNeonI8MMPattern.cpp +// //===----------------------------------------------------------------------===// #include "mlir/Dialect/Arith/IR/Arith.h" diff --git a/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp b/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp index 695d43b04cff0..f63af8da28087 100644 --- a/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp @@ -111,7 +111,8 @@ static Value getFlatMemref(OpBuilder &rewriter, Location loc, Value source, getFlatOffsetAndStrides(rewriter, loc, source, offsetsTemp); MemRefType retType = inferCastResultType(base, offset); return rewriter.create(loc, retType, base, offset, - std::nullopt, std::nullopt); + ArrayRef(), + ArrayRef()); } static bool needFlatten(Value val) { diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 5dbb2403eddbd..b2639edb0d0f5 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -2989,8 +2989,9 @@ LogicalResult WinogradFilterTransformOp::verify() { ArrayRef filterShape = filterType.getShape(); int64_t filterH = filterShape[getFilterHDim()]; int64_t filterW = filterShape[getFilterWDim()]; - int64_t r = getR(); - int64_t m = getM(); + WinogradConv2DFmr fmr = getFmr(); + int64_t m, r; + std::tie(m, r) = getFmrFromWinogradConv2DFmr(fmr); if (filterH != r && filterH != 1) return emitOpError("expect filter height either equals to r or 1"); @@ -3046,8 +3047,9 @@ LogicalResult WinogradFilterTransformOp::getResultTilePosition( ArrayRef filterShape = filterType.getShape(); int64_t filterH = filterShape[getFilterHDim()]; int64_t filterW = filterShape[getFilterWDim()]; - int64_t m = getM(); - int64_t r = getR(); + WinogradConv2DFmr fmr = getFmr(); + int64_t m, r; + std::tie(m, r) = getFmrFromWinogradConv2DFmr(fmr); int64_t alpha = m + r - 1; int64_t alphaH = filterH != 1 ? alpha : 1; int64_t alphaW = filterW != 1 ? alpha : 1; @@ -3124,8 +3126,9 @@ LogicalResult WinogradInputTransformOp::verify() { ArrayRef inputShape = inputType.getShape(); int64_t inputH = inputShape[getInputHDim()]; int64_t inputW = inputShape[getInputWDim()]; - int m = getM(); - int r = getR(); + WinogradConv2DFmr fmr = getFmr(); + int64_t m, r; + std::tie(m, r) = getFmrFromWinogradConv2DFmr(fmr); int64_t tileSize = m + r - 1; auto outputType = cast(getOutput().getType()); @@ -3194,8 +3197,9 @@ LogicalResult WinogradInputTransformOp::getResultTilePosition( int64_t outputAlphaH = outputShape[getOutputAlphaHDim()]; int64_t outputAlphaW = outputShape[getOutputAlphaWDim()]; - int64_t m = getM(); - int64_t r = getR(); + WinogradConv2DFmr fmr = getFmr(); + int64_t m, r; + std::tie(m, r) = getFmrFromWinogradConv2DFmr(fmr); int64_t alpha = m + r - 1; int64_t alphaH = outputAlphaH != 1 ? alpha : 1; int64_t alphaW = outputAlphaW != 1 ? alpha : 1; @@ -3224,8 +3228,9 @@ WinogradInputTransformOp::getTiledImplementation(OpBuilder &builder, ArrayRef offsets, ArrayRef sizes) { IntegerAttr oneAttr = builder.getI64IntegerAttr(1); - int64_t m = getM(); - int64_t r = getR(); + WinogradConv2DFmr fmr = getFmr(); + int64_t m, r; + std::tie(m, r) = getFmrFromWinogradConv2DFmr(fmr); ShapedType outputType = getOutputOperandType(); ArrayRef outputShape = outputType.getShape(); @@ -3303,8 +3308,9 @@ LogicalResult WinogradOutputTransformOp::verify() { int64_t valueW = valueShape[getValueAlphaWDim()]; int64_t valueTileH = valueShape[getValueTileHDim()]; int64_t valueTileW = valueShape[getValueTileWDim()]; - int m = getM(); - int r = getR(); + WinogradConv2DFmr fmr = getFmr(); + int64_t m, r; + std::tie(m, r) = getFmrFromWinogradConv2DFmr(fmr); bool leftTransform = valueH != 1; bool rightTransform = valueW != 1; @@ -3365,7 +3371,9 @@ LogicalResult WinogradOutputTransformOp::getResultTilePosition( OpBuilder &builder, unsigned resultNumber, ArrayRef offsets, ArrayRef sizes, SmallVector &resultOffsets, SmallVector &resultSizes) { - int64_t m = getM(); + WinogradConv2DFmr fmr = getFmr(); + int64_t m, r; + std::tie(m, r) = getFmrFromWinogradConv2DFmr(fmr); Location loc = getLoc(); MLIRContext *context = builder.getContext(); @@ -3623,6 +3631,27 @@ verifyExtendedBatchVariantMatmulSemantic(OpTy batchVariantMatmulOp, namespace mlir { namespace linalg { +std::optional getWinogradConv2DFmr(int64_t m, int64_t r) { + if (m == 2 && r == 3) + return WinogradConv2DFmr::F_2_3; + if (m == 4 && r == 3) + return WinogradConv2DFmr::F_4_3; + if (m == 2 && r == 5) + return WinogradConv2DFmr::F_2_5; + return std::nullopt; +} + +std::pair getFmrFromWinogradConv2DFmr(WinogradConv2DFmr fmr) { + switch (fmr) { + case WinogradConv2DFmr::F_2_3: + return {2, 3}; + case WinogradConv2DFmr::F_4_3: + return {4, 3}; + case WinogradConv2DFmr::F_2_5: + return {2, 5}; + } +} + //===----------------------------------------------------------------------===// // MatMulOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 2b78e31558ea2..8571d641e26d1 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -4250,7 +4250,7 @@ DiagnosedSilenceableFailure transform::WinogradConv2DOp::applyToOne( bool supported = TypeSwitch(target) .Case([&](linalg::Conv2DNhwcFhwcOp op) { maybeTransformed = - winogradConv2D(rewriter, op, getM(), getR()); + winogradConv2D(rewriter, op, getFmr()); return true; }) .Default([&](Operation *op) { return false; }); diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp index 19d484a3bb701..513cecef29b61 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp @@ -22,8 +22,11 @@ #include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/Interfaces/TilingInterface.h" #include "mlir/Interfaces/ValueBoundsOpInterface.h" +#include "llvm/Support/Debug.h" #include +#define DEBUG_TYPE "linalg-tiling-interface-impl" + using namespace mlir; using namespace mlir::linalg; @@ -148,55 +151,82 @@ struct LinalgOpTilingInterface /// Utility to fetch the offsets and sizes when applied as per the indexing /// map of the linalg op. This helps in fusing the linalg op as a consumer of /// a given slice op. - void - getMappedOffsetAndSize(LinalgOp linalgOp, OpBuilder &b, AffineMap indexingMap, - ArrayRef offsets, - ArrayRef sizes, - SmallVectorImpl &mappedOffsets, - SmallVectorImpl &mappedSizes) const { - unsigned numLoops = linalgOp.getNumLoops(); - auto tilingInterfaceOp = cast(linalgOp.getOperation()); - mappedOffsets.resize(numLoops); - mappedSizes.resize(numLoops); - if (!indexingMap.isPermutation()) { - SmallVector iterationDomain = - tilingInterfaceOp.getIterationDomain(b); - for (const auto &&[index, value] : llvm::enumerate(iterationDomain)) { - mappedOffsets[index] = value.offset; - mappedSizes[index] = value.size; + static LogicalResult + getMappedOffsetAndSize(LinalgOp linalgOp, OpBuilder &b, + ArrayRef indexingMaps, + ArrayRef> allOffsets, + ArrayRef> allSizes, + SmallVectorImpl &mappedOffsetsVec, + SmallVectorImpl &mappedSizesVec) { + DenseMap mappedOffsets, mappedSizes; + + for (auto [indexingMap, offsets, sizes] : + llvm::zip_equal(indexingMaps, allOffsets, allSizes)) { + for (auto [resultExpr, offset, size] : + llvm::zip_equal(indexingMap.getResults(), offsets, sizes)) { + auto dimExpr = dyn_cast(resultExpr); + if (!dimExpr) + continue; + unsigned position = dimExpr.getPosition(); + auto it = mappedOffsets.find(position); + if (it != mappedOffsets.end()) { + OpFoldResult seenOffset = it->second; + OpFoldResult seenSize = mappedSizes.lookup(position); + if (seenOffset != offset || seenSize != size) { + LLVM_DEBUG({ + llvm::dbgs() << "inconsistent iteration space mapping from " + "offsets/sizes of operands/results"; + }); + return failure(); + } + } else { + mappedOffsets[position] = offset; + mappedSizes[position] = size; + } } } - for (const auto &&[index, value] : - llvm::enumerate(indexingMap.getResults())) { - unsigned dimPosition = cast(value).getPosition(); - mappedOffsets[dimPosition] = offsets[index]; - mappedSizes[dimPosition] = sizes[index]; + + // Aggregate from the given operand offsets and sizes, or default to + // iteration space values. + SmallVector iterationDomain = + cast(linalgOp.getOperation()).getIterationDomain(b); + mappedOffsetsVec.resize(iterationDomain.size()); + mappedSizesVec.resize(iterationDomain.size()); + for (auto [index, domain] : llvm::enumerate(iterationDomain)) { + auto it = mappedOffsets.find(index); + if (it != mappedOffsets.end()) { + mappedOffsetsVec[index] = it->second; + mappedSizesVec[index] = mappedSizes.lookup(index); + continue; + } + mappedOffsetsVec[index] = domain.offset; + mappedSizesVec[index] = domain.size; } + return success(); } /// Method to return the position of the result tile computed by the tiled /// operation. - LogicalResult getIterationDomainTileFromOperandTile( - Operation *op, OpBuilder &b, unsigned operandNumber, - ArrayRef offsets, ArrayRef sizes, + LogicalResult getIterationDomainTileFromOperandTiles( + Operation *op, OpBuilder &b, ArrayRef operandNumbers, + ArrayRef> allOffsets, + ArrayRef> allSizes, SmallVectorImpl &iterDomainOffsets, SmallVectorImpl &iterDomainSizes) const { auto linalgOp = cast(op); - // Check that the indexing map used for the operand is a projected - // permutation. This could be relaxed with a more general approach that can - // map the offsets and sizes from the operand to iteration space tiles - // (filling in full extent for dimensions not used to access the result). - AffineMap indexingMap = - linalgOp.getMatchingIndexingMap(&op->getOpOperand(operandNumber)); - if (!indexingMap.isProjectedPermutation()) { - return op->emitError() - << "unhandled get iter domain position when operand is not " - "accessed using a permuted projection"; + std::optional> iterationSpaceOffsets, + iterationSpaceSizes; + SmallVector indexingMaps = + llvm::map_to_vector(operandNumbers, [&](unsigned operandNumber) { + OpOperand &opOperand = linalgOp->getOpOperand(operandNumber); + return linalgOp.getMatchingIndexingMap(&opOperand); + }); + if (failed(getMappedOffsetAndSize(linalgOp, b, indexingMaps, allOffsets, + allSizes, iterDomainOffsets, + iterDomainSizes))) { + return failure(); } - - getMappedOffsetAndSize(linalgOp, b, indexingMap, offsets, sizes, - iterDomainOffsets, iterDomainSizes); return success(); } @@ -247,8 +277,13 @@ struct LinalgOpTilingInterface "accessed using a permuted projection"); } - getMappedOffsetAndSize(linalgOp, b, indexingMap, offsets, sizes, - iterDomainOffsets, iterDomainSizes); + SmallVector allOffsets = llvm::to_vector(offsets); + SmallVector allSizes = llvm::to_vector(sizes); + auto status = + getMappedOffsetAndSize(linalgOp, b, indexingMap, {allOffsets}, + {allSizes}, iterDomainOffsets, iterDomainSizes); + (void)status; + assert(succeeded(status) && "unexpected error in offset calculation"); return success(); } @@ -279,12 +314,13 @@ struct LinalgOpTilingInterface /// Method to generate the tiled implementation of an operation from the tile /// of the operand. - FailureOr getTiledImplementationFromOperandTile( - Operation *op, OpBuilder &b, unsigned operandNumber, - ArrayRef offsets, ArrayRef sizes) const { + FailureOr getTiledImplementationFromOperandTiles( + Operation *op, OpBuilder &b, ArrayRef operandNumbers, + ArrayRef> allOffsets, + ArrayRef> allSizes) const { SmallVector mappedOffsets, mappedSizes; - if (failed(getIterationDomainTileFromOperandTile( - op, b, operandNumber, offsets, sizes, mappedOffsets, + if (failed(getIterationDomainTileFromOperandTiles( + op, b, operandNumbers, allOffsets, allSizes, mappedOffsets, mappedSizes))) { return failure(); } @@ -837,13 +873,20 @@ struct PackOpTiling /// Method to return the position of iteration domain tile computed by the /// tiled operation. In current `tensor.pack` context, the `resultOffsets` and /// `resultSizes` only cover outer dimensions. - LogicalResult getIterationDomainTileFromOperandTile( - Operation *op, OpBuilder &b, unsigned operandNumber, - ArrayRef offsets, ArrayRef sizes, + LogicalResult getIterationDomainTileFromOperandTiles( + Operation *op, OpBuilder &b, ArrayRef operandNumbers, + ArrayRef> allOffsets, + ArrayRef> allSizes, SmallVectorImpl &resultOffsets, SmallVectorImpl &resultSizes) const { - if (operandNumber != 0) + if (operandNumbers.size() != 1 || operandNumbers[0] != 0) { + LLVM_DEBUG( + { llvm::dbgs() << "unsupported operands for consumer fusion"; }); return failure(); + } + + ArrayRef offsets(allOffsets[0]); + ArrayRef sizes(allSizes[0]); auto packOp = cast(op); // It is not trivial to infer dest tile from source tile if `packOp` has @@ -904,11 +947,18 @@ struct PackOpTiling } /// Method to return the tiled implementation of tensor.pack as a consumer. - FailureOr getTiledImplementationFromOperandTile( - Operation *op, OpBuilder &b, unsigned operandNumber, - ArrayRef offsets, ArrayRef sizes) const { - if (operandNumber != 0) + FailureOr getTiledImplementationFromOperandTiles( + Operation *op, OpBuilder &b, ArrayRef operandNumbers, + ArrayRef> allOffsets, + ArrayRef> allSizes) const { + if (operandNumbers.size() != 1 || operandNumbers[0] != 0) { + LLVM_DEBUG( + { llvm ::dbgs() << "unhandled operands for consumer fusion"; }); return failure(); + } + + ArrayRef offsets(allOffsets[0]); + ArrayRef sizes(allSizes[0]); auto packOp = cast(op); Location loc = packOp.getLoc(); @@ -923,8 +973,8 @@ struct PackOpTiling tiledOperands.push_back(sourceSlice); SmallVector outerDimOffsets, outerDimSizes; - if (failed(getIterationDomainTileFromOperandTile( - op, b, /*operandNumber=*/0, offsets, sizes, outerDimOffsets, + if (failed(getIterationDomainTileFromOperandTiles( + op, b, operandNumbers, allOffsets, allSizes, outerDimOffsets, outerDimSizes))) return failure(); @@ -1182,12 +1232,21 @@ struct UnPackOpTiling /// Method to return the position of iteration domain tile computed by the /// tiled operation. - LogicalResult getIterationDomainTileFromOperandTile( - Operation *op, OpBuilder &b, unsigned operandNumber, - ArrayRef offsets, ArrayRef sizes, + LogicalResult getIterationDomainTileFromOperandTiles( + Operation *op, OpBuilder &b, ArrayRef operandNumbers, + ArrayRef> allOffsets, + ArrayRef> allSizes, SmallVectorImpl &resultOffsets, SmallVectorImpl &resultSizes) const { + if (operandNumbers.size() != 1) { + LLVM_DEBUG({ llvm::dbgs() << "unable to handle multiple operands"; }); + return failure(); + } auto unPackOp = cast(op); + unsigned operandNumber = operandNumbers[0]; + ArrayRef offsets(allOffsets[0]); + ArrayRef sizes(allSizes[0]); + // If the operand tile is the dest, then no adjustment is needed. if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) { resultOffsets = llvm::to_vector(offsets); @@ -1241,10 +1300,18 @@ struct UnPackOpTiling } /// Method to return the tiled implementation of tensor.unpack as a consumer. - FailureOr getTiledImplementationFromOperandTile( - Operation *op, OpBuilder &b, unsigned operandNumber, - ArrayRef offsets, ArrayRef sizes) const { + FailureOr getTiledImplementationFromOperandTiles( + Operation *op, OpBuilder &b, ArrayRef operandNumbers, + ArrayRef> allOffsets, + ArrayRef> allSizes) const { + if (operandNumbers.size() != 1 || operandNumbers[0] != 0) { + LLVM_DEBUG({ llvm::dbgs() << "unhandled operands for consumer fusion"; }); + return failure(); + } auto unPackOp = cast(op); + ArrayRef offsets(allOffsets[0]); + ArrayRef sizes(allSizes[0]); + // tensor.unpack op is fusible (as a consumer) only if inner dims are not // tiled. int64_t numTiles = unPackOp.getInnerDimsPos().size(); @@ -1259,8 +1326,8 @@ struct UnPackOpTiling // Fetch offset/size for creating the slice of the dest operand of // unpack op. SmallVector outputOffsets, outputSizes; - if (failed(getIterationDomainTileFromOperandTile( - op, b, /*operandNumber=*/0, offsets, sizes, outputOffsets, + if (failed(getIterationDomainTileFromOperandTiles( + op, b, operandNumbers, allOffsets, allSizes, outputOffsets, outputSizes))) return failure(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp b/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp index e4221d4748415..4e90defebcf5c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp @@ -38,6 +38,15 @@ namespace { /// /// The following tables define these constant transformation matrices for /// F(2 x 2, 3 x 3), F(4 x 4, 3 x 3), and F(2 x 2, 5 x 5) +/// +/// To add more transformation matrices, we need to add the following +/// items: +/// 1. Add the constant transformation matrix to the corresponding +/// G, GT, BT, B, AT, or A array. +/// 2. Add the corresponding TransformMatrix to the GMatrices, GTMatrices, +/// BTMatrices, BMatrices, ATMatrices, or AMatrices map. +/// 3. Add a enum value F_m_r to WinogradConv2DFmr enum. +/// constexpr float G_2x2_3x3[] = { -1, 0, 0, 1./2, -1./2, 1./2, @@ -176,19 +185,6 @@ constexpr float A_2x2_5x5[] = { }; // clang-format on -using TransformMapKeyTy = std::pair; - -/// We use F(m, r) to define the size of minimal filtering algorithms. -/// m is the output dimension and r is the filter dimension. We can get -/// the input dimension, alpha, from the formula, alpha = m + r - 1. -/// -/// For example, when m = 2 and r = 3, we know its input size is 4. -/// The Conv2D will operate on 4x4 input data with 3x3 filter and get -/// 2x2 output result. -constexpr TransformMapKeyTy F_2_3{2, 3}; -constexpr TransformMapKeyTy F_4_3{4, 3}; -constexpr TransformMapKeyTy F_2_5{2, 5}; - /// Structure to keep information of constant transform matrices. struct TransformMatrix { TransformMatrix(const float *table, int64_t rows, int64_t cols, @@ -344,22 +340,22 @@ Value insert2DDataTo6D(OpBuilder &builder, Location loc, Value source, /// %ret = linalg.matmul %ret, GT /// %inserted = insert %ret into filter Value filterTransform(RewriterBase &rewriter, Location loc, Value filter, - Value retValue, int64_t m, int64_t r, + Value retValue, WinogradConv2DFmr fmr, bool leftTransform = true, bool rightTransform = true) { // Map from (m, r) to G transform matrix. - static const llvm::SmallDenseMap + static const llvm::SmallDenseMap GMatrices = { - {F_2_3, TransformMatrix(G_2x2_3x3, 4, 3)}, - {F_4_3, TransformMatrix(G_4x4_3x3, 6, 3)}, - {F_2_5, TransformMatrix(G_2x2_5x5, 6, 5)}, + {WinogradConv2DFmr::F_2_3, TransformMatrix(G_2x2_3x3, 4, 3)}, + {WinogradConv2DFmr::F_4_3, TransformMatrix(G_4x4_3x3, 6, 3)}, + {WinogradConv2DFmr::F_2_5, TransformMatrix(G_2x2_5x5, 6, 5)}, }; // Map from (m, r) to GT transform matrix. - static const llvm::SmallDenseMap + static const llvm::SmallDenseMap GTMatrices = { - {F_2_3, TransformMatrix(GT_2x2_3x3, 3, 4)}, - {F_4_3, TransformMatrix(GT_4x4_3x3, 3, 6)}, - {F_2_5, TransformMatrix(GT_2x2_5x5, 5, 6)}, + {WinogradConv2DFmr::F_2_3, TransformMatrix(GT_2x2_3x3, 3, 4)}, + {WinogradConv2DFmr::F_4_3, TransformMatrix(GT_4x4_3x3, 3, 6)}, + {WinogradConv2DFmr::F_2_5, TransformMatrix(GT_2x2_5x5, 5, 6)}, }; auto filterType = cast(filter.getType()); @@ -370,6 +366,8 @@ Value filterTransform(RewriterBase &rewriter, Location loc, Value filter, int64_t filterW = filterShape[2]; int64_t filterC = filterShape[3]; + int64_t m, r; + std::tie(m, r) = getFmrFromWinogradConv2DFmr(fmr); if (filterH != r && filterH != 1) return Value(); if (filterW != r && filterW != 1) @@ -387,14 +385,13 @@ Value filterTransform(RewriterBase &rewriter, Location loc, Value filter, zeroIdx, filterH, filterW, /*loopNorFIdx=*/0, /*loopCorFIdx=*/3, /*heightIdx=*/1, /*widthIdx=*/2); - TransformMapKeyTy key = {m, r}; int64_t retRows = 1; Value matmulRetValue = extractFilter; Value zero = builder.create( loc, rewriter.getZeroAttr(elementType)); if (leftTransform) { // Get constant transform matrix G. - auto it = GMatrices.find(key); + auto it = GMatrices.find(fmr); if (it == GMatrices.end()) return {}; const TransformMatrix &GMatrix = it->second; @@ -416,7 +413,7 @@ Value filterTransform(RewriterBase &rewriter, Location loc, Value filter, if (rightTransform) { // Get constant transform matrix GT. - auto it = GTMatrices.find(key); + auto it = GTMatrices.find(fmr); if (it == GTMatrices.end()) return {}; const TransformMatrix >Matrix = it->second; @@ -476,24 +473,26 @@ Value filterTransform(RewriterBase &rewriter, Location loc, Value filter, /// %output /// at [0, 0, %h, %w, %n, %c] Value inputTransform(RewriterBase &rewriter, Location loc, Value input, - Value retValue, int64_t m, int64_t r, + Value retValue, WinogradConv2DFmr fmr, bool leftTransform = true, bool rightTransform = true) { // Map from (m, r) to BT transform matrix. - static const llvm::SmallDenseMap + static const llvm::SmallDenseMap BTMatrices = { - {F_2_3, TransformMatrix(BT_2x2_3x3, 4, 4)}, - {F_4_3, TransformMatrix(BT_4x4_3x3, 6, 6)}, - {F_2_5, TransformMatrix(BT_2x2_5x5, 6, 6)}, + {WinogradConv2DFmr::F_2_3, TransformMatrix(BT_2x2_3x3, 4, 4)}, + {WinogradConv2DFmr::F_4_3, TransformMatrix(BT_4x4_3x3, 6, 6)}, + {WinogradConv2DFmr::F_2_5, TransformMatrix(BT_2x2_5x5, 6, 6)}, }; // Map from (m, r) to B transform matrix. - static const llvm::SmallDenseMap + static const llvm::SmallDenseMap BMatrices = { - {F_2_3, TransformMatrix(B_2x2_3x3, 4, 4)}, - {F_4_3, TransformMatrix(B_4x4_3x3, 6, 6)}, - {F_2_5, TransformMatrix(B_2x2_5x5, 6, 6)}, + {WinogradConv2DFmr::F_2_3, TransformMatrix(B_2x2_3x3, 4, 4)}, + {WinogradConv2DFmr::F_4_3, TransformMatrix(B_4x4_3x3, 6, 6)}, + {WinogradConv2DFmr::F_2_5, TransformMatrix(B_2x2_5x5, 6, 6)}, }; + int64_t m, r; + std::tie(m, r) = getFmrFromWinogradConv2DFmr(fmr); auto inputType = cast(input.getType()); Type elementType = inputType.getElementType(); auto inputShape = inputType.getShape(); // N, H, W, C @@ -529,7 +528,6 @@ Value inputTransform(RewriterBase &rewriter, Location loc, Value input, widthOffset, alphaH, alphaW, /*loopNorFIdx=*/0, /*loopCorFIdx=*/3, /*heightIdx=*/1, /*widthIdx=*/2); - TransformMapKeyTy key = {m, r}; int64_t retRows = 1; int64_t retCols = 1; Value matmulRetValue = extractInput; @@ -537,7 +535,7 @@ Value inputTransform(RewriterBase &rewriter, Location loc, Value input, loc, rewriter.getZeroAttr(elementType)); if (leftTransform) { // Get constant transform matrix BT. - auto it = BTMatrices.find(key); + auto it = BTMatrices.find(fmr); if (it == BTMatrices.end()) return {}; const TransformMatrix &BTMatrix = it->second; @@ -560,7 +558,7 @@ Value inputTransform(RewriterBase &rewriter, Location loc, Value input, if (rightTransform) { // Get constant transform matrix B. - auto it = BMatrices.find(key); + auto it = BMatrices.find(fmr); if (it == BMatrices.end()) return {}; const TransformMatrix &BMatrix = it->second; @@ -696,24 +694,26 @@ static Value matrixMultiply(RewriterBase &rewriter, Location loc, /// output /// at [%n, (%h x m), (%w x m), %f] Value outputTransform(RewriterBase &rewriter, Location loc, Value value, - Value output, int64_t m, int64_t r, + Value output, WinogradConv2DFmr fmr, bool leftTransform = true, bool rightTransform = true) { // Map from (m, r) to AT transform matrix. - static const llvm::SmallDenseMap + static const llvm::SmallDenseMap ATMatrices = { - {F_2_3, TransformMatrix(AT_2x2_3x3, 2, 4)}, - {F_4_3, TransformMatrix(AT_4x4_3x3, 4, 6, 32)}, - {F_2_5, TransformMatrix(AT_2x2_5x5, 2, 6, 16)}, + {WinogradConv2DFmr::F_2_3, TransformMatrix(AT_2x2_3x3, 2, 4)}, + {WinogradConv2DFmr::F_4_3, TransformMatrix(AT_4x4_3x3, 4, 6, 32)}, + {WinogradConv2DFmr::F_2_5, TransformMatrix(AT_2x2_5x5, 2, 6, 16)}, }; // Map from (m, r) to A transform matrix. - static const llvm::SmallDenseMap + static const llvm::SmallDenseMap AMatrices = { - {F_2_3, TransformMatrix(A_2x2_3x3, 4, 2)}, - {F_4_3, TransformMatrix(A_4x4_3x3, 6, 4, 32)}, - {F_2_5, TransformMatrix(A_2x2_5x5, 6, 2, 16)}, + {WinogradConv2DFmr::F_2_3, TransformMatrix(A_2x2_3x3, 4, 2)}, + {WinogradConv2DFmr::F_4_3, TransformMatrix(A_4x4_3x3, 6, 4, 32)}, + {WinogradConv2DFmr::F_2_5, TransformMatrix(A_2x2_5x5, 6, 2, 16)}, }; + int64_t m, r; + std::tie(m, r) = getFmrFromWinogradConv2DFmr(fmr); auto valueType = cast(value.getType()); Type elementType = valueType.getElementType(); auto valueShape = valueType.getShape(); // H, W, TileH, TileW, N, F @@ -743,9 +743,8 @@ Value outputTransform(RewriterBase &rewriter, Location loc, Value value, FIter, 2, 3, /*loopNorFIdx=*/4, /*loopCorFIdx=*/5, /*heightIdx=*/0, /*widthIdx=*/1); - const TransformMapKeyTy key = {m, r}; - const TransformMatrix &AMatrix = AMatrices.at(key); - const TransformMatrix &ATMatrix = ATMatrices.at(key); + const TransformMatrix &AMatrix = AMatrices.at(fmr); + const TransformMatrix &ATMatrix = ATMatrices.at(fmr); int64_t scalarFactor = (rightTransform ? AMatrix.scalarFactor : 1) * (leftTransform ? ATMatrix.scalarFactor : 1); int64_t retCols = rightTransform ? AMatrix.cols : 1; @@ -903,7 +902,7 @@ static bool hasAllOneValues(DenseIntElementsAttr attr) { /// linalg.winograd_*_transform ops. static FailureOr winogradConv2DHelper(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp convOp, - int64_t m, int64_t r) { + WinogradConv2DFmr fmr) { if (!convOp.hasPureTensorSemantics()) return rewriter.notifyMatchFailure( convOp, "expected pure tensor semantics for linalg.conv_2d_nhwc_fhwc"); @@ -946,6 +945,8 @@ winogradConv2DHelper(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp convOp, int64_t outputW = outputShape[2]; int64_t outputF = outputShape[3]; + int64_t m, r; + std::tie(m, r) = getFmrFromWinogradConv2DFmr(fmr); // Only support F(m x m, r x r), F(m x 1, r x 1) or F(1 x m, 1 x r). bool isSupportedFilter = false; if (filterH == filterW && filterH == r) @@ -959,17 +960,6 @@ winogradConv2DHelper(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp convOp, return rewriter.notifyMatchFailure( convOp, "only support filter (r x r), (r x 1) or (1 x r)"); - // Currently, we support (m, r) = (2, 3) or (4, 3) or (2, 5). - static const llvm::SmallVector validConfigs = { - F_2_3, F_4_3, F_2_5}; - - TransformMapKeyTy key = {m, r}; - auto it = llvm::find(validConfigs, key); - // If we cannot find the constant transformation matrix, it means we do - // not support this configuration yet. - if (it == validConfigs.end()) - return failure(); - // All the criterias are satisfied. We can do Winograd Conv2D. Location loc = convOp.getLoc(); @@ -993,7 +983,7 @@ winogradConv2DHelper(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp convOp, Value retValue = rewriter.create(loc, retType.getShape(), filterElementType); auto transformedFilter = rewriter.create( - loc, retType, filter, retValue, m, r); + loc, retType, filter, retValue, fmr); // --- Create operation for input transform --- @@ -1012,7 +1002,7 @@ winogradConv2DHelper(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp convOp, retValue = rewriter.create(loc, retType.getShape(), inputElementType); auto transformedInput = rewriter.create( - loc, retType, input, retValue, m, r); + loc, retType, input, retValue, fmr); Type outputElementType = outputType.getElementType(); Value matmulRet = matrixMultiply(rewriter, loc, transformedFilter, @@ -1035,7 +1025,7 @@ winogradConv2DHelper(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp convOp, } Value transformedOutput = rewriter.create( - loc, outputType, matmulRet, output, m, r); + loc, outputType, matmulRet, output, fmr); // When output size is not aligned with output tile size, extract the // value from the padded buffer. @@ -1067,8 +1057,8 @@ decomposeWinogradFilterTransformHelper(RewriterBase &rewriter, // For F(1 x m, 1 x r), we only need to do right side transform. bool rightTransform = filterW != 1; Value transformedFilter = - filterTransform(rewriter, loc, filter, op.getOutput(), op.getM(), - op.getR(), leftTransform, rightTransform); + filterTransform(rewriter, loc, filter, op.getOutput(), op.getFmr(), + leftTransform, rightTransform); if (!transformedFilter) return failure(); @@ -1094,8 +1084,8 @@ decomposeWinogradInputTransformHelper(RewriterBase &rewriter, // For F(1 x m, 1 x r), we only need to do right side transform. bool rightTransform = outputW != 1; Value transformedInput = - inputTransform(rewriter, loc, op.getInput(), op.getOutput(), op.getM(), - op.getR(), leftTransform, rightTransform); + inputTransform(rewriter, loc, op.getInput(), op.getOutput(), op.getFmr(), + leftTransform, rightTransform); if (!transformedInput) return failure(); @@ -1120,8 +1110,8 @@ decomposeWinogradOutputTransformHelper(RewriterBase &rewriter, // For F(1 x m, 1 x r), we only need to do right side transform. bool rightTransform = valueW != 1; Value transformedOutput = - outputTransform(rewriter, loc, value, op.getOutput(), op.getM(), - op.getR(), leftTransform, rightTransform); + outputTransform(rewriter, loc, value, op.getOutput(), op.getFmr(), + leftTransform, rightTransform); if (!transformedOutput) return failure(); @@ -1171,28 +1161,28 @@ class WinogradConv2DNhwcFhwc final : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; - WinogradConv2DNhwcFhwc(mlir::MLIRContext *context, int64_t m, int64_t r) - : OpRewritePattern(context), m(m), r(r) {} + WinogradConv2DNhwcFhwc(mlir::MLIRContext *context, WinogradConv2DFmr fmr) + : OpRewritePattern(context), fmr(fmr) {} LogicalResult matchAndRewrite(linalg::Conv2DNhwcFhwcOp convOp, PatternRewriter &rewriter) const override { - if (failed(winogradConv2DHelper(rewriter, convOp, m, r))) + if (failed(winogradConv2DHelper(rewriter, convOp, fmr))) return failure(); return success(); } private: - int64_t m; - int64_t r; + WinogradConv2DFmr fmr; }; + } // end anonymous namespace //===----------------------------------------------------------------------===// FailureOr winogradConv2D(RewriterBase &rewriter, - linalg::Conv2DNhwcFhwcOp op, int64_t m, - int64_t r) { - return winogradConv2DHelper(rewriter, op, m, r); + linalg::Conv2DNhwcFhwcOp op, + linalg::WinogradConv2DFmr fmr) { + return winogradConv2DHelper(rewriter, op, fmr); } FailureOr @@ -1213,11 +1203,11 @@ decomposeWinogradOutputTransformOp(RewriterBase &rewriter, return decomposeWinogradOutputTransformHelper(rewriter, op); } -void populateWinogradConv2DPatterns(RewritePatternSet &patterns, int64_t m, - int64_t r) { +void populateWinogradConv2DPatterns(RewritePatternSet &patterns, + WinogradConv2DFmr fmr) { MLIRContext *context = patterns.getContext(); // TODO: Support more Conv2D data layout, e.g., conv_2d_nchw_fchw - patterns.insert(context, m, r); + patterns.insert(context, fmr); } void populateDecomposeWinogradOpsPatterns(RewritePatternSet &patterns) { diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index d56b32193765e..372e83a98ee52 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -1567,11 +1567,23 @@ LogicalResult GlobalOp::verify() { // Check that the type of the initial value is compatible with the type of // the global variable. if (auto elementsAttr = llvm::dyn_cast(initValue)) { - Type initType = elementsAttr.getType(); - Type tensorType = getTensorTypeFromMemRefType(memrefType); - if (initType != tensorType) - return emitOpError("initial value expected to be of type ") - << tensorType << ", but was of type " << initType; + // Check the element types match. + auto initElementType = + cast(elementsAttr.getType()).getElementType(); + auto memrefElementType = memrefType.getElementType(); + + if (initElementType != memrefElementType) + return emitOpError("initial value element expected to be of type ") + << memrefElementType << ", but was of type " << initElementType; + + // Check the shapes match, given that memref globals can only produce + // statically shaped memrefs and elements literal type must have a static + // shape we can assume both types are shaped. + auto initShape = elementsAttr.getShapedType().getShape(); + auto memrefShape = memrefType.getShape(); + if (initShape != memrefShape) + return emitOpError("initial value shape expected to be ") + << memrefShape << " but was " << initShape; } } diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp index ddcae8481a5b4..995120ad8680e 100644 --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -2047,53 +2047,119 @@ getUntiledConsumerFromSlice(RewriterBase &rewriter, /// A utility to fetch an untiled consumer of /// tensor.insert_slice/tensor.parallel_insert_slice. -static FailureOr -getUntiledConsumerFromSlice(RewriterBase &rewriter, Operation *sliceOp, - MutableArrayRef loops) { +static FailureOr> getUntiledConsumerOperandsFromSlices( + RewriterBase &rewriter, ArrayRef sliceOps, + MutableArrayRef loops) { assert(!loops.empty() && "unexpected empty loops"); - if (auto insertSlice = dyn_cast(sliceOp)) { - return getUntiledConsumerFromSlice(rewriter, insertSlice, loops); - } else if (auto parallelInsertSlice = - dyn_cast(sliceOp)) { - return getUntiledConsumerFromSlice(rewriter, parallelInsertSlice, loops); - } else { - return failure(); + assert(!sliceOps.empty() && "unexpected empty list of candidate slices"); + SmallVector fusedOperands; + for (auto sliceOp : sliceOps) { + FailureOr fusedOperand = + TypeSwitch>(sliceOp) + .Case( + [&](auto op) { + return getUntiledConsumerFromSlice(rewriter, op, loops); + }) + .Default([&](Operation *op) { + return rewriter.notifyMatchFailure(op, "unhandled slice type"); + }); + if (failed(fusedOperand)) { + return failure(); + } + if (!fusedOperands.empty() && + fusedOperand.value()->getOwner() != fusedOperands.front()->getOwner()) { + return rewriter.notifyMatchFailure( + fusedOperand.value()->getOwner(), + "all candidate slices must be to the same consumer"); + } + fusedOperands.push_back(fusedOperand.value()); } + return fusedOperands; +} + +template +static tensor::InsertSliceOp cloneAsInsertSlice(RewriterBase &rewriter, + InsertSliceOpTy sliceOp); + +template <> +tensor::InsertSliceOp +cloneAsInsertSlice(RewriterBase &rewriter, + tensor::InsertSliceOp insertSliceOp) { + return cast( + rewriter.clone(*insertSliceOp.getOperation())); +} + +template <> +tensor::InsertSliceOp cloneAsInsertSlice( + RewriterBase &rewriter, tensor::ParallelInsertSliceOp insertSliceOp) { + return rewriter.create( + insertSliceOp->getLoc(), insertSliceOp.getSource(), + insertSliceOp.getDest(), insertSliceOp.getMixedOffsets(), + insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides()); +} + +static SmallVector +cloneAsInsertSlices(RewriterBase &rewriter, + ArrayRef candidateSlices) { + assert(!candidateSlices.empty() && + "unexpected empty list of slices to clone"); + SmallVector clonedSlices; + for (auto sliceOp : candidateSlices) { + TypeSwitch(sliceOp) + .Case( + [&](auto op) { + auto clonedOp = cloneAsInsertSlice(rewriter, op); + clonedSlices.push_back(clonedOp); + }) + .Default([&](Operation *op) { + // Assert here assuming this has already been checked. + assert(0 && "unexpected slice type while cloning as insert slice"); + }); + } + return clonedSlices; } /// Implementation of fusing consumer of a single slice by computing the /// slice of the consumer in-place for scf loop. FailureOr -mlir::scf::tileAndFuseConsumerOfSlice( - RewriterBase &rewriter, Operation *candidateSliceOp, +mlir::scf::tileAndFuseConsumerOfSlices( + RewriterBase &rewriter, ArrayRef candidateSlices, MutableArrayRef loops) { + if (candidateSlices.empty()) { + return rewriter.notifyMatchFailure( + rewriter.getUnknownLoc(), + "no candidate slices provided for consumer fusion"); + } // Return if `loops` is empty, return an error for now. Caller is expected // to handle this case. if (loops.empty()) { - return candidateSliceOp->emitOpError( + return rewriter.notifyMatchFailure( + candidateSlices.front(), "cannot call tile and fuse consumer with an empty loop nest"); } - if (!isa( - candidateSliceOp)) - return failure(); + + if (!(llvm::all_of(candidateSlices, llvm::IsaPred) || + llvm::all_of(candidateSlices, + llvm::IsaPred))) { + return rewriter.notifyMatchFailure( + candidateSlices.front(), + "candidates slices need to be all `tensor.extract_slice`s or " + "`tensor.parallel_insert_slice`s"); + } // 1. Get the consumer of scf.for for the result yielded by // tensor.insert_slice/parallel_insert_slice. - FailureOr maybeConsumerOpOperand = - getUntiledConsumerFromSlice(rewriter, candidateSliceOp, loops); - if (failed(maybeConsumerOpOperand)) { - return rewriter.notifyMatchFailure(candidateSliceOp, - "could not fetch consumer to fuse"); - } - OpOperand *consumerOpOperand = *maybeConsumerOpOperand; - Operation *consumerOp = consumerOpOperand->getOwner(); - unsigned operandNumber = consumerOpOperand->getOperandNumber(); - unsigned resultNumber = 0; - if (auto producerResult = dyn_cast(consumerOpOperand->get())) { - resultNumber = producerResult.getResultNumber(); - } else { - return rewriter.notifyMatchFailure( - consumerOp, "consumer op's operand doesn't seem to be an OpResult"); + SmallVector consumerOpOperands; + Operation *consumerOp; + { + FailureOr> maybeConsumerOpOperand = + getUntiledConsumerOperandsFromSlices(rewriter, candidateSlices, loops); + if (failed(maybeConsumerOpOperand)) { + return rewriter.notifyMatchFailure(candidateSlices.front(), + "could not fetch consumer to fuse"); + } + std::swap(consumerOpOperands, maybeConsumerOpOperand.value()); + consumerOp = consumerOpOperands.front()->getOwner(); } LoopLikeOpInterface outerMostLoop = loops.front(); @@ -2113,16 +2179,14 @@ mlir::scf::tileAndFuseConsumerOfSlice( if (!dstOp) return rewriter.notifyMatchFailure(consumerOp, "consumer op is not DPS operation"); - SmallVector dpsInits = - llvm::map_to_vector(dstOp.getDpsInits(), [](Value v) { return v; }); - if (llvm::is_contained(dpsInits, outerMostLoop->getResult(resultNumber))) { + if (llvm::any_of(consumerOpOperands, [&](OpOperand *opOperand) { + return dstOp.isDpsInit(opOperand); + })) { return rewriter.notifyMatchFailure( consumerOp, "consumer op taking the result of scf.for as init is not supported"); } - SmallVector newInits = dpsInits; - - Location loc = outerMostLoop->getLoc(); + SmallVector newInits = llvm::to_vector(dstOp.getDpsInits()); // 3. Move the whole loop structure right before firstUserOfLoop, the // dominance should be already ensured by `checkAssumptionForLoop`. @@ -2137,43 +2201,52 @@ mlir::scf::tileAndFuseConsumerOfSlice( // tensor.insert_slice. In the scf.for case this is a clone of the // candidateSliceOp whereas in the scf.forall case this is created from the // operands of tensor.parallel_insert_slice. - tensor::InsertSliceOp clonedInsertSliceOp; if (auto sliceOp = - dyn_cast(candidateSliceOp)) { + dyn_cast(candidateSlices.front())) { auto newForallOp = cast(innerMostLoop.getOperation()); rewriter.setInsertionPoint(newForallOp.getTerminator()); - clonedInsertSliceOp = rewriter.create( - loc, sliceOp.getSource(), sliceOp.getDest(), sliceOp.getMixedOffsets(), - sliceOp.getMixedSizes(), sliceOp.getMixedStrides()); } else { - rewriter.setInsertionPoint(candidateSliceOp); - clonedInsertSliceOp = - cast(rewriter.clone(*candidateSliceOp)); + rewriter.setInsertionPoint(candidateSlices.front()); } + // 5.a. Clone all the candidate slices as equivalent insert slice ops. + SmallVector clonedInsertSlices = + cloneAsInsertSlices(rewriter, candidateSlices); - // 5.a. Clone consumer op. + // 5.b. Clone consumer op. auto clonedConsumerOp = cast(rewriter.clone(*consumerOp)); + SmallVector operandNumbers = + llvm::map_to_vector(consumerOpOperands, [](OpOperand *opOperand) { + return opOperand->getOperandNumber(); + }); + SmallVector clonedOpFusedOperandsList = + llvm::map_to_vector(operandNumbers, [&](unsigned operandNum) { + return &clonedConsumerOp->getOpOperand(operandNum); + }); - // 5.b. Replace all uses of the loop result with the result of the cloned + // 5.c. Replace all uses of the loop result with the result of the cloned // tensor.insert_slice. - OpOperand &operandToReplace = clonedConsumerOp->getOpOperand(operandNumber); rewriter.modifyOpInPlace(clonedConsumerOp, [&]() { - operandToReplace.set(clonedInsertSliceOp.getResult()); + for (auto [operandToReplace, clonedSliceOp] : + llvm::zip_equal(clonedOpFusedOperandsList, clonedInsertSlices)) { + operandToReplace->set(clonedSliceOp.getResult()); + } }); // 6. Perform tiling of the cloned consumer and replace the operand at // `operandNumber` with the source of the cloned tensor.insert_slice op. - auto ossSliceOp = - cast(clonedInsertSliceOp.getOperation()); FailureOr tileAndFuseResult = - tensor::replaceInsertSliceWithTiledConsumer( - rewriter, ossSliceOp, clonedConsumerOp->getOpOperand(operandNumber)); + tensor::replaceInsertSlicesWithTiledConsumer(rewriter, clonedInsertSlices, + clonedOpFusedOperandsList); if (failed(tileAndFuseResult)) { return failure(); } + auto tiledConsumerOp = cast(tileAndFuseResult->tiledOps[0]); - rewriter.replaceAllUsesWith(tiledConsumerOp->getOperand(operandNumber), - clonedInsertSliceOp.getSource()); + for (auto [operandNum, clonedSliceOp] : + llvm::zip_equal(operandNumbers, clonedInsertSlices)) { + rewriter.replaceAllUsesWith(tiledConsumerOp->getOperand(operandNum), + clonedSliceOp.getSource()); + } // 7. Reconstruct [nested] loop with new inits. YieldTiledValuesFn newYieldValuesFn = @@ -2185,14 +2258,20 @@ mlir::scf::tileAndFuseConsumerOfSlice( // 8. Set inner insertPoint right before tiled consumer op. innerRewriter.setInsertionPoint(tiledConsumerOp); - SmallVector offsets = ossSliceOp.getMixedOffsets(); - SmallVector sizes = ossSliceOp.getMixedSizes(); - SmallVector strides = ossSliceOp.getMixedStrides(); + SmallVector> allOffsets, allSizes; + for (auto candidateSliceOp : clonedInsertSlices) { + SmallVector offsets = candidateSliceOp.getMixedOffsets(); + SmallVector sizes = candidateSliceOp.getMixedSizes(); + SmallVector strides = candidateSliceOp.getMixedStrides(); - // 9. Check all insert stride is 1. - if (!llvm::all_of(strides, isOneInteger)) { - return rewriter.notifyMatchFailure( - candidateSliceOp, "containingOp's result yield with stride"); + // 9. Check all insert stride is 1. + if (!llvm::all_of(strides, isOneInteger)) { + return rewriter.notifyMatchFailure( + candidateSliceOp, "containingOp's result yield with stride"); + } + + allOffsets.emplace_back(std::move(offsets)); + allSizes.emplace_back(std::move(sizes)); } // 10. Try to get iter domain position from input position. Use @@ -2202,8 +2281,8 @@ mlir::scf::tileAndFuseConsumerOfSlice( // tiledConsumerOp could lead to some chained unnecessary extra index // computation. SmallVector iterDomainOffsets, iterDomainSizes; - if (failed(clonedConsumerOp.getIterationDomainTileFromOperandTile( - rewriter, operandNumber, offsets, sizes, iterDomainOffsets, + if (failed(clonedConsumerOp.getIterationDomainTileFromOperandTiles( + rewriter, operandNumbers, allOffsets, allSizes, iterDomainOffsets, iterDomainSizes))) { return rewriter.notifyMatchFailure( clonedConsumerOp, @@ -2279,10 +2358,13 @@ mlir::scf::tileAndFuseConsumerOfSlice( // 16. Need to erase the old scf loop and the cloned consumer op. rewriter.eraseOp(clonedConsumerOp); + SmallVector tiledAndFusedOpOperands = + llvm::map_to_vector(operandNumbers, [&](unsigned operandNum) { + return &tileAndFuseResult->tiledOps[0]->getOpOperand(operandNum); + }); return scf::SCFFuseConsumerOfSliceResult{ - consumerOpOperand, - &(tileAndFuseResult->tiledOps[0]->getOpOperand(operandNumber)), - tileAndFuseResult->tiledOps}; + std::move(consumerOpOperands), std::move(tiledAndFusedOpOperands), + std::move(tileAndFuseResult->tiledOps)}; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tensor/Transforms/SwapExtractSliceWithProducerPatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/SwapExtractSliceWithProducerPatterns.cpp index 6f33f9b55ceb6..4392a2c0eb839 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/SwapExtractSliceWithProducerPatterns.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/SwapExtractSliceWithProducerPatterns.cpp @@ -17,6 +17,9 @@ #include "mlir/Dialect/Tensor/Transforms/Transforms.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Interfaces/TilingInterface.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "tensor-swap-slices" using namespace mlir; @@ -39,21 +42,55 @@ FailureOr tensor::replaceExtractSliceWithTiledProducer( return *tiledResult; } -FailureOr tensor::replaceInsertSliceWithTiledConsumer( - OpBuilder &builder, OffsetSizeAndStrideOpInterface sliceOp, - OpOperand &consumer) { - auto consumerOp = dyn_cast(consumer.getOwner()); +FailureOr tensor::replaceInsertSlicesWithTiledConsumer( + OpBuilder &builder, ArrayRef sliceOps, + ArrayRef consumerOperands) { + if (sliceOps.empty()) { + LLVM_DEBUG( + { llvm::dbgs() << "expected candidate slices list to be non-empty"; }); + return failure(); + } + if (sliceOps.size() != consumerOperands.size()) { + LLVM_DEBUG({ + llvm::dbgs() + << "expected as many operands as the number of slices passed"; + }); + return failure(); + } + auto consumerOp = + dyn_cast(consumerOperands.front()->getOwner()); if (!consumerOp) return failure(); + for (auto opOperand : consumerOperands.drop_front()) { + if (opOperand->getOwner() != consumerOp) { + LLVM_DEBUG({ + llvm::dbgs() + << "expected all consumer operands to be from the same operation"; + }); + return failure(); + } + } - // `TilingInterface` currently only supports strides being 1. - if (!llvm::all_of(sliceOp.getMixedStrides(), isOneInteger)) - return failure(); + auto consumerOperandNums = llvm::map_to_vector( + consumerOperands, [](OpOperand *opOperand) -> unsigned { + return opOperand->getOperandNumber(); + }); + SmallVector> allOffsets; + SmallVector> allSizes; + for (auto sliceOp : sliceOps) { + + // `TilingInterface` currently only supports strides being 1. + if (!llvm::all_of(sliceOp.getMixedStrides(), isOneInteger)) + return failure(); + SmallVector offsets = sliceOp.getMixedOffsets(); + SmallVector sizes = sliceOp.getMixedSizes(); + allOffsets.emplace_back(std::move(offsets)); + allSizes.emplace_back(std::move(sizes)); + } FailureOr tiledResult = - consumerOp.getTiledImplementationFromOperandTile( - builder, consumer.getOperandNumber(), sliceOp.getMixedOffsets(), - sliceOp.getMixedSizes()); + consumerOp.getTiledImplementationFromOperandTiles( + builder, consumerOperandNums, allOffsets, allSizes); if (failed(tiledResult)) return failure(); diff --git a/mlir/lib/Dialect/Transform/DebugExtension/DebugExtensionOps.cpp b/mlir/lib/Dialect/Transform/DebugExtension/DebugExtensionOps.cpp index 7a9f8f4b1b528..12257da878a40 100644 --- a/mlir/lib/Dialect/Transform/DebugExtension/DebugExtensionOps.cpp +++ b/mlir/lib/Dialect/Transform/DebugExtension/DebugExtensionOps.cpp @@ -19,9 +19,9 @@ using namespace mlir; #include "mlir/Dialect/Transform/DebugExtension/DebugExtensionOps.cpp.inc" DiagnosedSilenceableFailure -transform::DebugEmitRemarkAtOp::apply(transform::TransformRewriter &rewriter, - transform::TransformResults &results, - transform::TransformState &state) { +transform::EmitRemarkAtOp::apply(transform::TransformRewriter &rewriter, + transform::TransformResults &results, + transform::TransformState &state) { if (isa(getAt().getType())) { auto payload = state.getPayloadOps(getAt()); for (Operation *op : payload) @@ -52,9 +52,10 @@ transform::DebugEmitRemarkAtOp::apply(transform::TransformRewriter &rewriter, return DiagnosedSilenceableFailure::success(); } -DiagnosedSilenceableFailure transform::DebugEmitParamAsRemarkOp::apply( - transform::TransformRewriter &rewriter, - transform::TransformResults &results, transform::TransformState &state) { +DiagnosedSilenceableFailure +transform::EmitParamAsRemarkOp::apply(transform::TransformRewriter &rewriter, + transform::TransformResults &results, + transform::TransformState &state) { std::string str; llvm::raw_string_ostream os(str); if (getMessage()) diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 5e0f36064be3b..862ed7bae1fbb 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -5856,18 +5856,7 @@ OpFoldResult ShapeCastOp::fold(FoldAdaptor adaptor) { // shape_cast(transpose(x)) -> shape_cast(x) if (auto transpose = getSource().getDefiningOp()) { - // This folder does - // shape_cast(transpose) -> shape_cast - // But another pattern, ConvertIllegalShapeCastOpsToTransposes, does - // shape_cast -> shape_cast(transpose) - // i.e. the complete opposite. When paired, these 2 patterns can cause - // infinite cycles in pattern rewriting. - // ConvertIllegalShapeCastOpsToTransposes only matches on scalable - // vectors, so by disabling this folder for scalable vectors the - // cycle is avoided. - // TODO: Check if ConvertIllegalShapeCastOpsToTransposes is - // still needed. If it's not, then we can fold here. - if (!transpose.getType().isScalable() && isOrderPreserving(transpose)) { + if (isOrderPreserving(transpose)) { setOperand(transpose.getVector()); return getResult(); } diff --git a/mlir/lib/IR/CMakeLists.txt b/mlir/lib/IR/CMakeLists.txt index 997782df8c5f3..4cabac185171c 100644 --- a/mlir/lib/IR/CMakeLists.txt +++ b/mlir/lib/IR/CMakeLists.txt @@ -32,7 +32,6 @@ add_mlir_library(MLIRIR PatternMatch.cpp Region.cpp RegionKindInterface.cpp - StateStack.cpp SymbolTable.cpp TensorEncoding.cpp Types.cpp diff --git a/mlir/lib/Support/CMakeLists.txt b/mlir/lib/Support/CMakeLists.txt index 488decd52ae64..02b6c694a28fd 100644 --- a/mlir/lib/Support/CMakeLists.txt +++ b/mlir/lib/Support/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_library(MLIRSupport FileUtilities.cpp InterfaceSupport.cpp RawOstreamExtras.cpp + StateStack.cpp StorageUniquer.cpp Timing.cpp ToolUtilities.cpp diff --git a/mlir/lib/IR/StateStack.cpp b/mlir/lib/Support/StateStack.cpp similarity index 92% rename from mlir/lib/IR/StateStack.cpp rename to mlir/lib/Support/StateStack.cpp index 22fdcd73c625b..a9bb3ffb2e1b0 100644 --- a/mlir/lib/IR/StateStack.cpp +++ b/mlir/lib/Support/StateStack.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "mlir/IR/StateStack.h" +#include "mlir/Support/StateStack.h" namespace mlir { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 9272f6572fda3..23140f22555a5 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -498,7 +498,13 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder, allocaInsertPoint = frame.allocaInsertPoint; return WalkResult::interrupt(); }); - if (walkResult.wasInterrupted()) + // In cases with multiple levels of outlining, the tree walk might find an + // alloca insertion point that is inside the original function while the + // builder insertion point is inside the outlined function. We need to make + // sure that we do not use it in those cases. + if (walkResult.wasInterrupted() && + allocaInsertPoint.getBlock()->getParent() == + builder.GetInsertBlock()->getParent()) return allocaInsertPoint; // Otherwise, insert to the entry block of the surrounding function. @@ -4378,6 +4384,9 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true, /*SeparateBeginEndCalls=*/true); + bool isTargetDevice = ompBuilder->Config.isTargetDevice(); + bool isOffloadEntry = + isTargetDevice || !ompBuilder->Config.TargetTriples.empty(); LogicalResult result = llvm::TypeSwitch(op) @@ -4467,6 +4476,9 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, if (failed(result)) return failure(); + // Pretend we have IF(false) if we're not doing offload. + if (!isOffloadEntry) + ifCond = builder.getFalse(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; MapInfoData mapData; diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index e8ce528bd185e..baf7a82b1c24a 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -777,7 +777,7 @@ ModuleTranslation::ModuleTranslation(Operation *module, } ModuleTranslation::~ModuleTranslation() { - if (ompBuilder) + if (ompBuilder && !ompBuilder->isFinalized()) ompBuilder->finalize(); } @@ -2331,6 +2331,10 @@ mlir::translateModuleToLLVMIR(Operation *module, llvm::LLVMContext &llvmContext, // beforehand. translator.debugTranslation->addModuleFlagsIfNotPresent(); + // Call the OpenMP IR Builder callbacks prior to verifying the module + if (auto *ompBuilder = translator.getOpenMPBuilder()) + ompBuilder->finalize(); + if (!disableVerification && llvm::verifyModule(*translator.llvmModule, &llvm::errs())) return nullptr; diff --git a/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp b/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp index 29bc49b78f15d..824201d17b5ab 100644 --- a/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp +++ b/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp @@ -350,8 +350,9 @@ Value CodeGen::genNonInitializerVar(const ast::VariableDecl *varDecl, Value results = builder.create( loc, pdl::RangeType::get(builder.getType()), /*types=*/ArrayAttr()); - return builder.create( - loc, opType.getName(), operands, std::nullopt, ValueRange(), results); + return builder.create(loc, opType.getName(), operands, + ArrayRef(), ValueRange(), + results); } if (ast::RangeType rangeTy = dyn_cast(type)) { diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index ee07081246fc7..b2daabb2a5957 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -171,6 +171,15 @@ ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" DIALECT_NAME transform EXTENSION_NAME transform_pdl_extension) +declare_mlir_dialect_extension_python_bindings( +ADD_TO_PARENT MLIRPythonSources.Dialects +ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" + TD_FILE dialects/TransformDebugExtensionOps.td + SOURCES + dialects/transform/debug.py + DIALECT_NAME transform + EXTENSION_NAME transform_debug_extension) + declare_mlir_dialect_python_bindings( ADD_TO_PARENT MLIRPythonSources.Dialects ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" diff --git a/mlir/python/mlir/dialects/TransformDebugExtensionOps.td b/mlir/python/mlir/dialects/TransformDebugExtensionOps.td new file mode 100644 index 0000000000000..22a85d2366994 --- /dev/null +++ b/mlir/python/mlir/dialects/TransformDebugExtensionOps.td @@ -0,0 +1,19 @@ +//===-- TransformDebugExtensionOps.td - Binding entry point *- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Entry point of the generated Python bindings for the Debug extension of the +// Transform dialect. +// +//===----------------------------------------------------------------------===// + +#ifndef PYTHON_BINDINGS_TRANSFORM_DEBUG_EXTENSION_OPS +#define PYTHON_BINDINGS_TRANSFORM_DEBUG_EXTENSION_OPS + +include "mlir/Dialect/Transform/DebugExtension/DebugExtensionOps.td" + +#endif // PYTHON_BINDINGS_TRANSFORM_DEBUG_EXTENSION_OPS diff --git a/mlir/python/mlir/dialects/transform/debug.py b/mlir/python/mlir/dialects/transform/debug.py new file mode 100644 index 0000000000000..f7c04268dc03d --- /dev/null +++ b/mlir/python/mlir/dialects/transform/debug.py @@ -0,0 +1,81 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from typing import Optional + +from ...ir import Attribute, Operation, Value, StringAttr +from .._transform_debug_extension_ops_gen import * +from .._transform_pdl_extension_ops_gen import _Dialect + +try: + from .._ods_common import _cext as _ods_cext +except ImportError as e: + raise RuntimeError("Error loading imports from extension module") from e + +from typing import Union + + +@_ods_cext.register_operation(_Dialect, replace=True) +class EmitParamAsRemarkOp(EmitParamAsRemarkOp): + def __init__( + self, + param: Attribute, + *, + anchor: Optional[Operation] = None, + message: Optional[Union[StringAttr, str]] = None, + loc=None, + ip=None, + ): + if isinstance(message, str): + message = StringAttr.get(message) + + super().__init__( + param, + anchor=anchor, + message=message, + loc=loc, + ip=ip, + ) + + +def emit_param_as_remark( + param: Attribute, + *, + anchor: Optional[Operation] = None, + message: Optional[Union[StringAttr, str]] = None, + loc=None, + ip=None, +): + return EmitParamAsRemarkOp(param, anchor=anchor, message=message, loc=loc, ip=ip) + + +@_ods_cext.register_operation(_Dialect, replace=True) +class EmitRemarkAtOp(EmitRemarkAtOp): + def __init__( + self, + at: Union[Operation, Value], + message: Optional[Union[StringAttr, str]] = None, + *, + loc=None, + ip=None, + ): + if isinstance(message, str): + message = StringAttr.get(message) + + super().__init__( + at, + message, + loc=loc, + ip=ip, + ) + + +def emit_remark_at( + at: Union[Operation, Value], + message: Optional[Union[StringAttr, str]] = None, + *, + loc=None, + ip=None, +): + return EmitRemarkAtOp(at, message, loc=loc, ip=ip) diff --git a/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir b/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir index e4f7ea150c850..5fc29c6442602 100644 --- a/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir +++ b/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir @@ -17,14 +17,28 @@ func.func @vector_arm_neon_mixed_types(%lhs: vector<2x8xi8>, %rhs: vector<2x8xi4 // ----- -// CHECK-LABEL: vector_arm_neon_same_types -// CHECK-SAME: %[[A0:.*]]: vector<2x8xi8>, %[[A1:.*]]: vector<2x8xi8>, %[[A2:.*]]: vector<2x2xi32> -// CHECK-DAG: %[[D0:.*]] = vector.shape_cast %[[A0]] : vector<2x8xi8> to vector<16xi8> -// CHECK-DAG: %[[D1:.*]] = vector.shape_cast %[[A1]] : vector<2x8xi8> to vector<16xi8> -// CHECK-DAG: %[[D2:.*]] = vector.shape_cast %[[A2]] : vector<2x2xi32> to vector<4xi32> -// CHECK-DAG: %[[D3:.*]] = arm_neon.intr.smmla %[[D2]], %[[D0]], %[[D1]] : vector<16xi8> to vector<4xi32> -// CHECK-DAG: %[[D4:.*]] = vector.shape_cast %[[D3]] : vector<4xi32> to vector<2x2xi32> -func.func @vector_arm_neon_same_types(%lhs: vector<2x8xi8>, %rhs: vector<2x8xi8>, %acc : vector<2x2xi32>) -> vector<2x2xi32> { +// CHECK-LABEL: vector_arm_neon_implicit_extsi +// CHECK-SAME: %[[LHS:.+]]: vector<2x8xi8>, %[[RHS:.+]]: vector<2x8xi8>, %[[ACC:.+]]: vector<2x2xi32> +// CHECK: %[[L:.+]] = vector.shape_cast %[[LHS]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[R:.+]] = vector.shape_cast %[[RHS]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[A:.+]] = vector.shape_cast %[[ACC]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[M:.+]] = arm_neon.intr.smmla %[[A]], %[[L]], %[[R]] : vector<16xi8> to vector<4xi32> +// CHECK: %{{.+}} = vector.shape_cast %[[M]] : vector<4xi32> to vector<2x2xi32> +func.func @vector_arm_neon_implicit_extsi(%lhs: vector<2x8xi8>, %rhs: vector<2x8xi8>, %acc : vector<2x2xi32>) -> vector<2x2xi32> { + %res = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs, %rhs, %acc : vector<2x8xi8>, vector<2x8xi8> into vector<2x2xi32> + return %res : vector<2x2xi32> +} + +// ----- + +// CHECK-LABEL: vector_arm_neon_signed_signed +// CHECK-SAME: %[[LHS:.+]]: vector<2x8xi8>, %[[RHS:.+]]: vector<2x8xi8>, %[[ACC:.+]]: vector<2x2xi32> +// CHECK: %[[L:.+]] = vector.shape_cast %[[LHS]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[R:.+]] = vector.shape_cast %[[RHS]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[A:.+]] = vector.shape_cast %[[ACC]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[M:.+]] = arm_neon.intr.smmla %[[A]], %[[L]], %[[R]] : vector<16xi8> to vector<4xi32> +// CHECK: %{{.+}} = vector.shape_cast %[[M]] : vector<4xi32> to vector<2x2xi32> +func.func @vector_arm_neon_signed_signed(%lhs: vector<2x8xi8>, %rhs: vector<2x8xi8>, %acc : vector<2x2xi32>) -> vector<2x2xi32> { %lhs_extsi = arith.extsi %lhs : vector<2x8xi8> to vector<2x8xi32> %rhs_extsi = arith.extsi %rhs : vector<2x8xi8> to vector<2x8xi32> %res = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_extsi, %rhs_extsi, %acc : vector<2x8xi32>, vector<2x8xi32> into vector<2x2xi32> @@ -33,11 +47,51 @@ func.func @vector_arm_neon_same_types(%lhs: vector<2x8xi8>, %rhs: vector<2x8xi8> // ----- -// CHECK-LABEL: vector_arm_neon_without_extsi -// CHECK-SAME: %[[A0:.*]]: vector<2x8xi32>, %[[A1:.*]]: vector<2x8xi32>, %[[A2:.*]]: vector<2x2xi32> -// CHECK-DAG: %[[D0:.*]] = vector.contract -func.func @vector_arm_neon_without_extsi(%lhs: vector<2x8xi32>, %rhs: vector<2x8xi32>, %acc : vector<2x2xi32>) -> vector<2x2xi32> { - %res = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs, %rhs, %acc : vector<2x8xi32>, vector<2x8xi32> into vector<2x2xi32> +// CHECK-LABEL: vector_arm_neon_unsigned_signed +// CHECK-SAME: %[[LHS:.+]]: vector<2x8xi8>, %[[RHS:.+]]: vector<2x8xi8>, %[[ACC:.+]]: vector<2x2xi32> +// CHECK: %[[L:.+]] = vector.shape_cast %[[LHS]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[R:.+]] = vector.shape_cast %[[RHS]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[A:.+]] = vector.shape_cast %[[ACC]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[M:.+]] = arm_neon.intr.usmmla %[[A]], %[[L]], %[[R]] : vector<16xi8> to vector<4xi32> +// CHECK: %{{.+}} = vector.shape_cast %[[M]] : vector<4xi32> to vector<2x2xi32> +func.func @vector_arm_neon_unsigned_signed(%lhs: vector<2x8xi8>, %rhs: vector<2x8xi8>, %acc : vector<2x2xi32>) -> vector<2x2xi32> { + %lhs_extsi = arith.extui %lhs : vector<2x8xi8> to vector<2x8xi32> + %rhs_extsi = arith.extsi %rhs : vector<2x8xi8> to vector<2x8xi32> + %res = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_extsi, %rhs_extsi, %acc : vector<2x8xi32>, vector<2x8xi32> into vector<2x2xi32> + return %res : vector<2x2xi32> +} + +// ----- + +// CHECK-LABEL: vector_arm_neon_unsigned_unsigned +// CHECK-SAME: %[[LHS:.+]]: vector<2x8xi8>, %[[RHS:.+]]: vector<2x8xi8>, %[[ACC:.+]]: vector<2x2xi32> +// CHECK: %[[L:.+]] = vector.shape_cast %[[LHS]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[R:.+]] = vector.shape_cast %[[RHS]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[A:.+]] = vector.shape_cast %[[ACC]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[M:.+]] = arm_neon.intr.ummla %[[A]], %[[L]], %[[R]] : vector<16xi8> to vector<4xi32> +// CHECK: %{{.+}} = vector.shape_cast %[[M]] : vector<4xi32> to vector<2x2xi32> +func.func @vector_arm_neon_unsigned_unsigned(%lhs: vector<2x8xi8>, %rhs: vector<2x8xi8>, %acc : vector<2x2xi32>) -> vector<2x2xi32> { + %lhs_extsi = arith.extui %lhs : vector<2x8xi8> to vector<2x8xi32> + %rhs_extsi = arith.extui %rhs : vector<2x8xi8> to vector<2x8xi32> + %res = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_extsi, %rhs_extsi, %acc : vector<2x8xi32>, vector<2x8xi32> into vector<2x2xi32> + return %res : vector<2x2xi32> +} + +// ----- + +// CHECK-LABEL: vector_arm_neon_signed_unsigned +// CHECK-SAME: %[[LHS:.+]]: vector<2x8xi8>, %[[RHS:.+]]: vector<2x8xi8>, %[[ACC:.+]]: vector<2x2xi32> +// CHECK: %[[ACC_T:.+]] = vector.transpose %[[ACC]], [1, 0] : vector<2x2xi32> to vector<2x2xi32> +// CHECK: %[[L:.+]] = vector.shape_cast %[[LHS]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[R:.+]] = vector.shape_cast %[[RHS]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[A:.+]] = vector.shape_cast %[[ACC_T]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[M:.+]] = arm_neon.intr.usmmla %[[A]], %[[R]], %[[L]] : vector<16xi8> to vector<4xi32> +// CHECK: %[[OUT_T:.+]] = vector.shape_cast %[[M]] : vector<4xi32> to vector<2x2xi32> +// CHECK: %{{.+}} = vector.transpose %[[OUT_T]], [1, 0] : vector<2x2xi32> to vector<2x2xi32> +func.func @vector_arm_neon_signed_unsigned(%lhs: vector<2x8xi8>, %rhs: vector<2x8xi8>, %acc : vector<2x2xi32>) -> vector<2x2xi32> { + %lhs_extsi = arith.extsi %lhs : vector<2x8xi8> to vector<2x8xi32> + %rhs_extsi = arith.extui %rhs : vector<2x8xi8> to vector<2x8xi32> + %res = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_extsi, %rhs_extsi, %acc : vector<2x8xi32>, vector<2x8xi32> into vector<2x2xi32> return %res : vector<2x2xi32> } diff --git a/mlir/test/Dialect/ArmSME/vector-legalization.mlir b/mlir/test/Dialect/ArmSME/vector-legalization.mlir index d56df9814f173..6cdf576272ebc 100644 --- a/mlir/test/Dialect/ArmSME/vector-legalization.mlir +++ b/mlir/test/Dialect/ArmSME/vector-legalization.mlir @@ -491,51 +491,6 @@ func.func @illegal_transpose_no_defining_source_op(%vec: vector<[4]x1xf32>) -> v // ----- -// CHECK-LABEL: @illegal_shape_cast_to_transpose_2d( -// CHECK-SAME: %[[VEC:.*]]: vector<[4]x1xf32>) -func.func @illegal_shape_cast_to_transpose_2d(%vec: vector<[4]x1xf32>) -> vector<1x[4]xf32> { - // CHECK: vector.transpose %[[VEC]], [1, 0] : vector<[4]x1xf32> to vector<1x[4]xf32> - %0 = vector.shape_cast %vec : vector<[4]x1xf32> to vector<1x[4]xf32> - return %0 : vector<1x[4]xf32> -} - -// ----- - -// CHECK-LABEL: @illegal_shape_cast_to_transpose_1d( -// CHECK-SAME: %[[VEC:.*]]: vector<[4]x1xf32>) -func.func @illegal_shape_cast_to_transpose_1d(%vec: vector<[4]x1xf32>) -> vector<[4]xf32> { - // CHECK: %[[TRANSPOSE:.*]] = vector.transpose %[[VEC]], [1, 0] : vector<[4]x1xf32> to vector<1x[4]xf32> - // CHECK: vector.shape_cast %[[TRANSPOSE]] : vector<1x[4]xf32> to vector<[4]xf32> - %0 = vector.shape_cast %vec : vector<[4]x1xf32> to vector<[4]xf32> - return %0 : vector<[4]xf32> -} - -// ----- - -// CHECK-LABEL: @lift_illegal_2d_shape_cast_to_memory -func.func @lift_illegal_2d_shape_cast_to_memory(%a: index, %b: index, %memref: memref) -> vector<1x[4]xf32> { - // CHECK: vector.transfer_read {{.*}} : memref, vector<1x[4]xf32> - // CHECK-NOT: vector.shape_cast - %pad = arith.constant 0.0 : f32 - %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = [false, true]}: memref, vector<[4]x1xf32> - %cast = vector.shape_cast %illegalRead : vector<[4]x1xf32> to vector<1x[4]xf32> - return %cast : vector<1x[4]xf32> -} - -// ----- - -// CHECK-LABEL: @lift_illegal_1d_shape_cast_to_memory -func.func @lift_illegal_1d_shape_cast_to_memory(%a: index, %b: index, %memref: memref) -> vector<[4]xf32> { - // CHECK: vector.transfer_read {{.*}} : memref, vector<1x[4]xf32> - // CHECK-NOT: vector.shape_cast {{.*}} : vector<[4]x1xf32> to vector<[4]xf32> - %pad = arith.constant 0.0 : f32 - %illegalRead = vector.transfer_read %memref[%a, %b], %pad {in_bounds = [false, true]}: memref, vector<[4]x1xf32> - %cast = vector.shape_cast %illegalRead : vector<[4]x1xf32> to vector<[4]xf32> - return %cast : vector<[4]xf32> -} - -// ----- - // CHECK-LABEL: @multi_tile_splat func.func @multi_tile_splat() -> vector<[8]x[8]xi32> { @@ -656,3 +611,59 @@ func.func @vector_mask_without_maskable_op(%mask: vector<16x2xi1>, %vec: vector< %0 = vector.mask %mask { vector.yield %vec : vector<16x16xf32> } : vector<16x2xi1> -> vector<16x16xf32> return %0 : vector<16x16xf32> } + +// ----- + +//============================================================================= +// 1D examples - to be moved to the SVE dialect +//============================================================================= + +/// TODO: Handle in_bounds + +// CHECK-LABEL: func.func @xfer_read_scalable_column( +// CHECK-SAME: %[[IDX_0:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[IDX_1:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[PAD:.*]]: f32, +// CHECK-SAME: %[[SRC:.*]]: memref) -> vector<[4]x1xf32> { +func.func @xfer_read_scalable_column(%a: index, %b: index, %pad: f32, %src: memref) -> (vector<[4]x1xf32>) { + // CHECK: %[[INIT:.*]] = arith.constant dense<0.000000e+00> : vector<[4]xf32> + // CHECK: %[[STEP:.*]] = arith.constant 1 : index + // CHECK: %[[C4:.*]] = arith.constant 4 : index + // CHECK: %[[LB:.*]] = arith.constant 0 : index + // CHECK: %[[VSCALE:.*]] = vector.vscale + // CHECK: %[[C4_VSCALE:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index + + // + // CHECK: %[[SCF:.*]] = scf.for %[[IND_VAR:.*]] = %[[LB]] to %[[C4_VSCALE]] step %[[STEP]] iter_args(%[[SCF_RES:.*]] = %[[INIT]]) -> (vector<[4]xf32>) { + // CHECK: %[[IDX_0_UPDATED:.*]] = arith.addi %[[IND_VAR]], %[[IDX_0]] : index + // CHECK: %[[VAL_10:.*]] = memref.load %[[SRC]][%[[IDX_0_UPDATED]], %[[IDX_1]]] : memref + // CHECK: %[[RES_UPDATED:.*]] = vector.insert %[[VAL_10]], %[[SCF_RES]] [%[[IND_VAR]]] : f32 into vector<[4]xf32> + // CHECK: scf.yield %[[RES_UPDATED]] : vector<[4]xf32> + // CHECK: } + + // + // CHECK: %[[SC:.*]] = vector.shape_cast %[[SCF]] : vector<[4]xf32> to vector<[4]x1xf32> + // CHECK: return %[[SC]] + %read = vector.transfer_read %src[%a, %b], %pad : memref, vector<[4]x1xf32> + return %read : vector<[4]x1xf32> +} + +// ----- + +// CHECK-LABEL: func.func @negative_xfer_read_scalable_column_x2 +func.func @negative_xfer_read_scalable_column_x2(%a: index, %b: index, %pad: f32, %src: memref) -> (vector<[4]x2xf32>) { + // CHECK-NOT: scf.for + // CHECK-NOT: memref.load + %read = vector.transfer_read %src[%a, %b], %pad : memref, vector<[4]x2xf32> + return %read : vector<[4]x2xf32> +} + +// ----- + +// CHECK-LABEL: func.func @negative_xfer_read_scalable_column_scalable_trailing_dim +func.func @negative_xfer_read_scalable_column_scalable_trailing_dim(%a: index, %b: index, %pad: f32, %src: memref) -> (vector<4x[1]xf32>) { + // CHECK-NOT: scf.for + // CHECK-NOT: memref.load + %read = vector.transfer_read %src[%a, %b], %pad : memref, vector<4x[1]xf32> + return %read : vector<4x[1]xf32> +} diff --git a/mlir/test/Dialect/ArmSVE/legalize-transfer-read.mlir b/mlir/test/Dialect/ArmSVE/legalize-transfer-read.mlir new file mode 100644 index 0000000000000..5f923cdafb956 --- /dev/null +++ b/mlir/test/Dialect/ArmSVE/legalize-transfer-read.mlir @@ -0,0 +1,257 @@ +// RUN: mlir-opt --arm-sve-legalize-vector-storage --split-input-file %s | FileCheck %s + + +// Test the `LegalizeTransferRead` pattern +// (mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp) + +// ----- + +// This is the base case, unremarkable in any way, except that it's our main +// motivating example and use case. + +// CHECK-LABEL: @base_case +// CHECK-SAME: %[[I:.+]]: index, %[[J:.+]]: index, %[[M:.+]]: +// CHECK: %[[PAD:.+]] = arith.constant 0 : i8 +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1], [2, 3]] +// CHECK-SAME: : memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSE]][%[[I]], %[[J]], %[[C0]]], %[[PAD]] {in_bounds = [true]} +// CHECK-SAME: : memref, vector<[32]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[32]xi8> to vector<[4]x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x8xi8> + +func.func @base_case(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// ----- + +// Test the case where the scalable dimension is not the second-to-last. + +// CHECK-LABEL: @with_3d_vector +// CHECK-SAME: %[[I:.+]]: index, %[[J:.+]]: index, %[[M:.+]]: +// CHECK: %[[PAD:.+]] = arith.constant 0 : i8 +// CHECK: %[[COLLAPSED:.+]] = memref.collapse_shape %[[M]] +// CHECK-SAME{LITERAL}: [[0], [1, 2, 3]] +// CHECK-SAME: : memref into memref +// CHECK-NEXT: %[[T0:.+]] = vector.transfer_read %[[COLLAPSED]][%[[I]], %[[J]]], %[[PAD]] {in_bounds = [true]} +// CHECK-SAME: : memref, vector<[64]xi8> +// CHECK-NEXT: %[[T1:.+]] = vector.shape_cast %[[T0]] : vector<[64]xi8> to vector<[4]x2x8xi8> +// CHECK-NEXT: return %[[T1]] : vector<[4]x2x8xi8> + +func.func @with_3d_vector(%i : index, %j : index, %M : memref) -> vector<[4]x2x8xi8> { + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref, vector<[4]x2x8xi8> + + return %A : vector<[4]x2x8xi8> +} + +// ----- + +// Test the case when the vector is already LLVM-legal (fixed). + +// CHECK-LABEL: @negative_vector_legal_fixed +// CHECK-NOT: memref.collapse + +func.func @negative_vector_legal_fixed(%i : index, %j : index, %M : memref) -> vector<8x8xi8> { + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<8x8xi8> + + return %A : vector<8x8xi8> +} + +// ----- + +// Test the case when the vector is already LLVM-legal (single-dimension scalable). + +// CHECK-LABEL: @negative_vector_legal_1d_scalable +// CHECK-NOT: memref.collapse + +func.func @negative_vector_legal_1d_scalable(%i : index, %j : index, %M : memref) -> vector<[8]xi8> { + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad {in_bounds = [true]} : memref, vector<[8]xi8> + + return %A : vector<[8]xi8> +} + +// ----- + +// Test the case when the vector is already LLVM-legal (single trailing +// scalable dimension). + +// CHECK-LABEL: @negative_vector_legal_trailing_scalable_dim +// CHECK-NOT: memref.collapse + +func.func @negative_vector_legal_trailing_scalable_dim(%i : index, %j : index, %M : memref) -> vector<8x[8]xi8> { + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<8x[8]xi8> + + return %A : vector<8x[8]xi8> +} + +// ----- + +// Test the case of unsupported vector type (more than one scalable dimension) + +// CHECK-LABEL: @negative_vector_type_two_scalable_dims +// CHECK-NOT: memref.collapse + +func.func @negative_vector_type_two_scalable_dims(%i : index, %j : index, %M : memref) -> vector<[8]x[8]x8xi8> { + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref, vector<[8]x[8]x8xi8> + + return %A : vector<[8]x[8]x8xi8> +} + +// ----- + +// Test the case of reading from a tensor - not supported, since the +// transform reasons about memory layouts. + +// CHECK-LABEL: @negative_tensor_transfer +// CHECK-NOT: memref.collapse + +func.func @negative_tensor_transfer(%i : index, %j : index, %M : tensor) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad {in_bounds = [true, true]} : tensor, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// ----- + +// Test the case when the transfer is discontiguous because the memref +// is discontiguous. +// There are other ways to make a memref discontiguous. The transformation +// is not concerned with the particular reason a memref is discontiguous, but +// only with the fact. Therefore there are no variations with the memref made +// discontiguous by some other mechanism. + +// CHECK-LABEL: @negative_discontig_mem +// CHECK-NOT: memref.collapse + +#strides = strided<[?, ?, 16, 1]> + +func.func @negative_discontig_mem(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// ----- + +// Test the case when the transformation is not applied because of +// a non-trivial permutation map (broadcast). + +// CHECK-LABEL: @negative_broadcast +// CHECK-NOT: memref.collapse + +#perm = affine_map<(i, j, k, p) -> (k, 0)> + +func.func @negative_broadcast(%i : index, %j : index, %M : memref) -> vector<[4]x8xi8> { + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad {permutation_map = #perm, in_bounds = [true, true] } : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// ----- + +// Test the case of a masked read - not supported right now. +// (see mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp) + +// CHECK-LABEL: @negative_masked +// CHECK-NOT: memref.collapse + +func.func @negative_masked( + %i : index, %j : index, + %M : memref, %mask : vector<[4]x8xi1>) -> vector<[4]x8xi8> { + + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.mask %mask { + vector.transfer_read %M[%i, %j, %c0, %c0], %pad {in_bounds = [true, true] } : memref, vector<[4]x8xi8> + } : vector<[4]x8xi1> -> vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// ----- + +// Test case with a mask operand - not supported right now. +// (see mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp) + +// CHECK-LABEL: @negative_with_mask +// CHECK-NOT: memref.collapse + +func.func @negative_with_mask( + %i : index, %j : index, + %M : memref, %mask : vector<[4]x8xi1>) -> vector<[4]x8xi8> { + + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad, %mask {in_bounds = [true, true] } : memref, vector<[4]x8xi8> + + return %A : vector<[4]x8xi8> +} + +// ----- + +// Test the case when the dimensions to collapse (excluding the scalable one) +// of the vector and the memref do not match (static non matching dimension). + +// CHECK-LABEL: @negative_non_matching_dim_static +// CHECK-NOT: memref.collapse + +func.func @negative_non_matching_dim_static(%i : index, %j : index, %M : memref) -> vector<[4]x4xi8> { + + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad {in_bounds = [true, true] } : memref, vector<[4]x4xi8> + + return %A : vector<[4]x4xi8> +} + +// ----- + +// Test the case when the dimensions to collapse (excluding the scalable one) +// of the vector and the memref do not match (dynamic non matching dimension). + +// CHECK-LABEL: @negative_non_matching_dim_dynamic +// CHECK-NOT: memref.collapse + +func.func @negative_non_matching_dim_dynamic(%i : index, %j : index, %M : memref) -> vector<[4]x4xi8> { + + %c0 = arith.constant 0 : index + %pad = arith.constant 123 : i8 + + %A = vector.transfer_read %M[%i, %j, %c0, %c0], %pad {in_bounds = [true, true] } : memref, vector<[4]x4xi8> + + return %A : vector<[4]x4xi8> +} diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir index 318edca73cce1..8be4e1b79c52c 100644 --- a/mlir/test/Dialect/Linalg/hoisting.mlir +++ b/mlir/test/Dialect/Linalg/hoisting.mlir @@ -1,76 +1,210 @@ // RUN: mlir-opt -transform-interpreter -canonicalize --split-input-file --allow-unregistered-dialect %s | FileCheck %s -// CHECK-LABEL: func @hoist_vector_transfer_pairs( -// CHECK-SAME: %[[MEMREF0:[a-zA-Z0-9]*]]: memref, -// CHECK-SAME: %[[MEMREF1:[a-zA-Z0-9]*]]: memref, -// CHECK-SAME: %[[MEMREF2:[a-zA-Z0-9]*]]: memref, -// CHECK-SAME: %[[MEMREF3:[a-zA-Z0-9]*]]: memref, -// CHECK-SAME: %[[MEMREF4:[a-zA-Z0-9]*]]: memref, -// CHECK-SAME: %[[MEMREF5:[a-zA-Z0-9]*]]: memref, -// CHECK-SAME: %[[VAL:[a-zA-Z0-9]*]]: index, -// CHECK-SAME: %[[LB:[a-zA-Z0-9]*]]: index, -// CHECK-SAME: %[[UB:[a-zA-Z0-9]*]]: index, -// CHECK-SAME: %[[STEP:[a-zA-Z0-9]*]]: index, -// CHECK-SAME: %[[CMP:[a-zA-Z0-9]*]]: i1 -func.func @hoist_vector_transfer_pairs( - %memref0: memref, %memref1: memref, %memref2: memref, - %memref3: memref, %memref4: memref, %memref5: memref, - %val: index, %lb : index, %ub : index, %step: index, %cmp: i1) { +///---------------------------------------------------------------------------------------- +/// Tests for vector.transfer_read + vector.transfer_write pairs +/// +/// * Nested in double loops +// * Indices depend on induction variables +///---------------------------------------------------------------------------------------- + +// CHECK-LABEL: func @mem_use_outside +// CHECK-SAME: %[[MEM:[a-zA-Z0-9]+]]: memref, +// CHECK-SAME: %[[LB:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[UB:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[STEP:[a-zA-Z0-9]+]]: index) +func.func @mem_use_outside(%mem: memref, %lb : index, %ub : index, %step: index) { + %pad = arith.constant 0.0 : f32 + +// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +// CHECK: %[[READ:.*]] = vector.transfer_read %[[MEM]][%[[I]], %[[I]]], %[[PAD]] : memref, vector<1xf32> +// CHECK: %[[SCF:.*]] = scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[VAL_5:.*]] = %[[READ]]) -> (vector<1xf32>) { +// CHECK: %[[USE:.*]] = "val_use"(%[[VAL_5]]) : (vector<1xf32>) -> vector<1xf32> +// CHECK: scf.yield %[[USE]] : vector<1xf32> +// CHECK: } +// CHECK: vector.transfer_write %[[SCF]], %[[MEM]][%[[I]], %[[I]]] : vector<1xf32>, memref +// CHECK: "mem_use"(%[[MEM]]) : (memref) -> () +// CHECK: } + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + %read = vector.transfer_read %mem[%i, %i], %pad: memref, vector<1xf32> + %use = "val_use"(%read) : (vector<1xf32>) -> vector<1xf32> + vector.transfer_write %use, %mem[%i, %i] : vector<1xf32>, memref + } + } + "mem_use"(%mem) : (memref) -> () + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["func.func"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.hoist_redundant_vector_transfers %0 + : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @mem_use_inside_outer_loop +// CHECK-SAME: %[[MEM:[a-zA-Z0-9]+]]: memref, +// CHECK-SAME: %[[LB:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[UB:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[STEP:[a-zA-Z0-9]+]]: index) +func.func @mem_use_inside_outer_loop(%mem: memref, %lb : index, %ub : index, %step: index) { + %pad = arith.constant 0.0 : f32 + +// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +// CHECK: %[[READ:.*]] = vector.transfer_read %[[MEM]]{{\[}}%[[I]], %[[I]]], %[[PAD]] : memref, vector<1xf32> +// CHECK: %[[SCF:.*]] = scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[VAL_5:.*]] = %[[READ]]) -> (vector<1xf32>) { +// CHECK: %[[USE:.*]] = "val_use"(%[[VAL_5]]) : (vector<1xf32>) -> vector<1xf32> +// CHECK: scf.yield %[[USE]] : vector<1xf32> +// CHECK: } +// CHECK: vector.transfer_write %[[SCF]], %[[MEM]]{{\[}}%[[I]], %[[I]]] : vector<1xf32>, memref +// CHECK: "mem_use"(%[[MEM]]) : (memref) -> () +// CHECK: } + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + %read = vector.transfer_read %mem[%i, %i], %pad: memref, vector<1xf32> + %use = "val_use"(%read) : (vector<1xf32>) -> vector<1xf32> + vector.transfer_write %use, %mem[%i, %i] : vector<1xf32>, memref + } + "mem_use"(%mem) : (memref) -> () + } + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["func.func"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.hoist_redundant_vector_transfers %0 + : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +///---------------------------------------------------------------------------------------- +/// Tests for vector.transfer_read + vector.transfer_write pairs +/// +/// * Nested in double loops +// * Indices are constant +///---------------------------------------------------------------------------------------- + +// CHECK-LABEL: func @negative_mem_use_inside_inner_loop_before_write +// CHECK-SAME: %[[MEM:[a-zA-Z0-9]+]]: memref, +// CHECK-SAME: %[[LB:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[UB:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[STEP:[a-zA-Z0-9]+]]: index) +func.func @negative_mem_use_inside_inner_loop_before_write(%mem: memref, %lb : index, %ub : index, %step: index) { %c0 = arith.constant 0 : index - %cst = arith.constant 0.0 : f32 + %pad = arith.constant 0.0 : f32 + +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +// CHECK: scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +// CHECK: %[[READ:.*]] = vector.transfer_read %[[MEM]][%[[C0]], %[[C0]]], %[[PAD]] : memref, vector<1xf32> +// CHECK: %[[USE:.*]] = "val_use"(%[[READ]]) : (vector<1xf32>) -> vector<1xf32> +// CHECK: "mem_use"(%[[MEM]]) : (memref) -> () +// CHECK: vector.transfer_write %[[USE]], %[[MEM]][%[[C0]], %[[C0]]] : vector<1xf32>, memref +// CHECK: } +// CHECK: } + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + %read = vector.transfer_read %mem[%c0, %c0], %pad: memref, vector<1xf32> + %use = "val_use"(%read) : (vector<1xf32>) -> vector<1xf32> + "mem_use"(%mem) : (memref) -> () + vector.transfer_write %use, %mem[%c0, %c0] : vector<1xf32>, memref + } + } + return +} -// CHECK: vector.transfer_read %{{.*}} : memref, vector<1xf32> -// CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>) { -// CHECK: vector.transfer_read %{{.*}} : memref, vector<2xf32> -// CHECK: scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>, vector<2xf32>) { -// CHECK: vector.transfer_read %{{.*}} : memref, vector<3xf32> -// CHECK: vector.transfer_read %{{.*}} : memref, vector<4xf32> -// CHECK: "some_crippling_use"(%[[MEMREF4]]) : (memref) -> () -// CHECK: vector.transfer_read %{{.*}} : memref, vector<5xf32> -// CHECK: "some_use"(%{{.*}}) : (vector<1xf32>) -> vector<1xf32> -// CHECK: "some_use"(%{{.*}}) : (vector<2xf32>) -> vector<2xf32> -// CHECK: "some_use"(%[[MEMREF2]], %{{.*}}) : (memref, vector<3xf32>) -> vector<3xf32> -// CHECK: "some_use"(%{{.*}}) : (vector<4xf32>) -> vector<4xf32> -// CHECK: "some_use"(%{{.*}}) : (vector<5xf32>) -> vector<5xf32> -// CHECK: vector.transfer_write %{{.*}} : vector<3xf32>, memref -// CHECK: vector.transfer_write %{{.*}} : vector<4xf32>, memref -// CHECK: vector.transfer_write %{{.*}} : vector<5xf32>, memref -// CHECK: "some_crippling_use"(%[[MEMREF3]]) : (memref) -> () -// CHECK: scf.yield {{.*}} : vector<1xf32>, vector<2xf32> +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["func.func"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.hoist_redundant_vector_transfers %0 + : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @negative_mem_use_inside_inner_loop_after_write +// CHECK-SAME: %[[MEM:[a-zA-Z0-9]+]]: memref, +// CHECK-SAME: %[[LB:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[UB:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[STEP:[a-zA-Z0-9]+]]: index) +func.func @negative_mem_use_inside_inner_loop_after_write(%mem: memref, %lb : index, %ub : index, %step: index) { + %c0 = arith.constant 0 : index + %pad = arith.constant 0.0 : f32 + +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +// CHECK: scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +// CHECK: %[[READ:.*]] = vector.transfer_read %[[MEM]][%[[C0]], %[[C0]]], %[[PAD]] : memref, vector<1xf32> +// CHECK: %[[USE:.*]] = "val_use"(%[[READ]]) : (vector<1xf32>) -> vector<1xf32> +// CHECK: vector.transfer_write %[[USE]], %[[MEM]][%[[C0]], %[[C0]]] : vector<1xf32>, memref +// CHECK: "mem_use"(%[[MEM]]) : (memref) -> () +// CHECK: } +// CHECK: } + scf.for %i = %lb to %ub step %step { + scf.for %j = %lb to %ub step %step { + %r3 = vector.transfer_read %mem[%c0, %c0], %pad: memref, vector<1xf32> + %u3 = "val_use"(%r3) : (vector<1xf32>) -> vector<1xf32> + vector.transfer_write %u3, %mem[%c0, %c0] : vector<1xf32>, memref + "mem_use"(%mem) : (memref) -> () + } + } + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["func.func"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.hoist_redundant_vector_transfers %0 + : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @negative_mem_use_inside_inner_loop_before_read +// CHECK-SAME: %[[MEM:[a-zA-Z0-9]+]]: memref, +// CHECK-SAME: %[[LB:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[UB:[a-zA-Z0-9]+]]: index, +// CHECK-SAME: %[[STEP:[a-zA-Z0-9]+]]: index) +func.func @negative_mem_use_inside_inner_loop_before_read(%mem: memref, %lb : index, %ub : index, %step: index) { + %c0 = arith.constant 0 : index + %pad = arith.constant 0.0 : f32 + +// CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +// CHECK: scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +// CHECK: "mem_use"(%[[MEM]]) : (memref) -> () +// CHECK: vector.transfer_read %{{.*}} : memref, vector<1xf32> +// CHECK: "val_use"(%{{.*}}) : (vector<1xf32>) -> vector<1xf32> +// CHECK: vector.transfer_write %{{.*}} : vector<1xf32>, memref // CHECK: } -// CHECK: vector.transfer_write %{{.*}} : vector<2xf32>, memref -// CHECK: "unrelated_use"(%[[MEMREF0]]) : (memref) -> () -// CHECK: scf.yield {{.*}} : vector<1xf32> // CHECK: } -// CHECK: vector.transfer_write %{{.*}} : vector<1xf32>, memref -// CHECK: "unrelated_use"(%[[MEMREF1]]) : (memref) -> () scf.for %i = %lb to %ub step %step { scf.for %j = %lb to %ub step %step { - %r0 = vector.transfer_read %memref1[%c0, %c0], %cst: memref, vector<1xf32> - %r1 = vector.transfer_read %memref0[%i, %i], %cst: memref, vector<2xf32> - %r2 = vector.transfer_read %memref2[%c0, %c0], %cst: memref, vector<3xf32> - %r3 = vector.transfer_read %memref3[%c0, %c0], %cst: memref, vector<4xf32> - "some_crippling_use"(%memref4) : (memref) -> () - %r4 = vector.transfer_read %memref4[%c0, %c0], %cst: memref, vector<5xf32> - %r5 = vector.transfer_read %memref5[%c0, %c0], %cst: memref, vector<6xf32> - "some_crippling_use"(%memref5) : (memref) -> () - %u0 = "some_use"(%r0) : (vector<1xf32>) -> vector<1xf32> - %u1 = "some_use"(%r1) : (vector<2xf32>) -> vector<2xf32> - %u2 = "some_use"(%memref2, %r2) : (memref, vector<3xf32>) -> vector<3xf32> - %u3 = "some_use"(%r3) : (vector<4xf32>) -> vector<4xf32> - %u4 = "some_use"(%r4) : (vector<5xf32>) -> vector<5xf32> - %u5 = "some_use"(%r5) : (vector<6xf32>) -> vector<6xf32> - vector.transfer_write %u0, %memref1[%c0, %c0] : vector<1xf32>, memref - vector.transfer_write %u1, %memref0[%i, %i] : vector<2xf32>, memref - vector.transfer_write %u2, %memref2[%c0, %c0] : vector<3xf32>, memref - vector.transfer_write %u3, %memref3[%c0, %c0] : vector<4xf32>, memref - vector.transfer_write %u4, %memref4[%c0, %c0] : vector<5xf32>, memref - vector.transfer_write %u5, %memref5[%c0, %c0] : vector<6xf32>, memref - "some_crippling_use"(%memref3) : (memref) -> () + "mem_use"(%mem) : (memref) -> () + %read = vector.transfer_read %mem[%c0, %c0], %pad: memref, vector<1xf32> + %use = "val_use"(%read) : (vector<1xf32>) -> vector<1xf32> + vector.transfer_write %use, %mem[%c0, %c0] : vector<1xf32>, memref } - "unrelated_use"(%memref0) : (memref) -> () } - "unrelated_use"(%memref1) : (memref) -> () return } @@ -86,6 +220,12 @@ module attributes {transform.with_named_sequence} { // ----- +///---------------------------------------------------------------------------------------- +/// Other tests +/// +/// TODO: Document +///---------------------------------------------------------------------------------------- + // CHECK-LABEL: func @hoist_vector_transfer_pairs_disjoint( // CHECK-SAME: %[[MEMREF0:[a-zA-Z0-9]*]]: memref, // CHECK-SAME: %[[MEMREF1:[a-zA-Z0-9]*]]: memref, diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index ca40301f04fa1..cbc863699ba9e 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -1165,7 +1165,7 @@ func.func @mixed_semantics(%a: tensor, %b: tensor, %c: memref< func.func @winograd_filter_transform_height(%arg0: tensor<2x4x3x5xf32>, %arg1: tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> { // expected-error @+1 {{expect filter height either equals to r or 1}} - %0 = linalg.winograd_filter_transform m(4) r(3) ins(%arg0 : tensor<2x4x3x5xf32>) outs(%arg1 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> + %0 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg0 : tensor<2x4x3x5xf32>) outs(%arg1 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> return %0 : tensor<6x6x5x2xf32> } @@ -1173,7 +1173,7 @@ func.func @winograd_filter_transform_height(%arg0: tensor<2x4x3x5xf32>, %arg1: t func.func @winograd_filter_transform_width(%arg0: tensor<2x3x4x5xf32>, %arg1: tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> { // expected-error @+1 {{expect filter width either equals to r or 1}} - %0 = linalg.winograd_filter_transform m(4) r(3) ins(%arg0 : tensor<2x3x4x5xf32>) outs(%arg1 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> + %0 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg0 : tensor<2x3x4x5xf32>) outs(%arg1 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> return %0 : tensor<6x6x5x2xf32> } @@ -1181,7 +1181,7 @@ func.func @winograd_filter_transform_width(%arg0: tensor<2x3x4x5xf32>, %arg1: te func.func @winograd_filter_transform(%arg0: tensor<2x1x1x5xf32>, %arg1: tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> { // expected-error @+1 {{expect either filter height or width equals to r}} - %0 = linalg.winograd_filter_transform m(4) r(3) ins(%arg0 : tensor<2x1x1x5xf32>) outs(%arg1 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> + %0 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg0 : tensor<2x1x1x5xf32>) outs(%arg1 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> return %0 : tensor<6x6x5x2xf32> } @@ -1189,7 +1189,7 @@ func.func @winograd_filter_transform(%arg0: tensor<2x1x1x5xf32>, %arg1: tensor<6 func.func @winograd_filter_dyn(%arg0: tensor, %arg1: tensor<6x5x?x?xf32>) -> tensor<6x5x?x?xf32> { // expected-error @+1 {{the output shape is not expected}} - %0 = linalg.winograd_filter_transform m(4) r(3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x5x?x?xf32>) -> tensor<6x5x?x?xf32> + %0 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x5x?x?xf32>) -> tensor<6x5x?x?xf32> return %0 : tensor<6x5x?x?xf32> } @@ -1197,7 +1197,7 @@ func.func @winograd_filter_dyn(%arg0: tensor, %arg1: tensor<6x5x?x? func.func @winograd_input_transform_height(%arg0: tensor<2x13x14x5xf32>, %arg1: tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> { // expected-error @+1 {{the output shape is not expected}} - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x13x14x5xf32>) outs(%arg1 : tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x13x14x5xf32>) outs(%arg1 : tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> return %0 : tensor<6x6x3x3x2x5xf32> } @@ -1205,7 +1205,7 @@ func.func @winograd_input_transform_height(%arg0: tensor<2x13x14x5xf32>, %arg1: func.func @winograd_input_transform_width(%arg0: tensor<2x14x13x5xf32>, %arg1: tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> { // expected-error @+1 {{the output shape is not expected}} - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x14x13x5xf32>) outs(%arg1 : tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x14x13x5xf32>) outs(%arg1 : tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> return %0 : tensor<6x6x3x3x2x5xf32> } @@ -1213,7 +1213,7 @@ func.func @winograd_input_transform_width(%arg0: tensor<2x14x13x5xf32>, %arg1: t func.func @winograd_input_transform_output_tileH(%arg0: tensor<2x14x14x5xf32>, %arg1: tensor<6x6x2x3x2x5xf32>) -> tensor<6x6x2x3x2x5xf32> { // expected-error @+1 {{the output shape is not expected}} - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x14x14x5xf32>) outs(%arg1 : tensor<6x6x2x3x2x5xf32>) -> tensor<6x6x2x3x2x5xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x14x14x5xf32>) outs(%arg1 : tensor<6x6x2x3x2x5xf32>) -> tensor<6x6x2x3x2x5xf32> return %0 : tensor<6x6x2x3x2x5xf32> } @@ -1221,7 +1221,7 @@ func.func @winograd_input_transform_output_tileH(%arg0: tensor<2x14x14x5xf32>, % func.func @winograd_input_transform_output_tileW(%arg0: tensor<2x14x14x5xf32>, %arg1: tensor<6x6x3x2x2x5xf32>) -> tensor<6x6x3x2x2x5xf32> { // expected-error @+1 {{the output shape is not expected}} - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x14x14x5xf32>) outs(%arg1 : tensor<6x6x3x2x2x5xf32>) -> tensor<6x6x3x2x2x5xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x14x14x5xf32>) outs(%arg1 : tensor<6x6x3x2x2x5xf32>) -> tensor<6x6x3x2x2x5xf32> return %0 : tensor<6x6x3x2x2x5xf32> } @@ -1229,7 +1229,7 @@ func.func @winograd_input_transform_output_tileW(%arg0: tensor<2x14x14x5xf32>, % func.func @winograd_input_transform_output_height(%arg0: tensor<2x14x14x5xf32>, %arg1: tensor<5x6x3x3x2x5xf32>) -> tensor<5x6x3x3x2x5xf32> { // expected-error @+1 {{the output shape is not expected}} - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x14x14x5xf32>) outs(%arg1 : tensor<5x6x3x3x2x5xf32>) -> tensor<5x6x3x3x2x5xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x14x14x5xf32>) outs(%arg1 : tensor<5x6x3x3x2x5xf32>) -> tensor<5x6x3x3x2x5xf32> return %0 : tensor<5x6x3x3x2x5xf32> } @@ -1237,7 +1237,7 @@ func.func @winograd_input_transform_output_height(%arg0: tensor<2x14x14x5xf32>, func.func @winograd_input_transform_output_width(%arg0: tensor<2x14x14x5xf32>, %arg1: tensor<6x5x3x3x2x5xf32>) -> tensor<6x5x3x3x2x5xf32> { // expected-error @+1 {{the output shape is not expected}} - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x14x14x5xf32>) outs(%arg1 : tensor<6x5x3x3x2x5xf32>) -> tensor<6x5x3x3x2x5xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x14x14x5xf32>) outs(%arg1 : tensor<6x5x3x3x2x5xf32>) -> tensor<6x5x3x3x2x5xf32> return %0 : tensor<6x5x3x3x2x5xf32> } @@ -1245,7 +1245,7 @@ func.func @winograd_input_transform_output_width(%arg0: tensor<2x14x14x5xf32>, % func.func @winograd_input_dyn(%arg0: tensor, %arg1: tensor<6x5x?x?x?x?xf32>) -> tensor<6x5x?x?x?x?xf32> { // expected-error @+1 {{the output shape is not expected}} - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x5x?x?x?x?xf32>) -> tensor<6x5x?x?x?x?xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x5x?x?x?x?xf32>) -> tensor<6x5x?x?x?x?xf32> return %0 : tensor<6x5x?x?x?x?xf32> } @@ -1253,7 +1253,7 @@ func.func @winograd_input_dyn(%arg0: tensor, %arg1: tensor<6x5x?x?x func.func @winograd_output_transform_input_height(%arg0: tensor<5x6x3x3x2x2xf32>, %arg1: tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> { // expected-error @+1 {{expect input height equals to input tile size}} - %0 = linalg.winograd_output_transform m(4) r(3) ins(%arg0 : tensor<5x6x3x3x2x2xf32>) outs(%arg1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> + %0 = linalg.winograd_output_transform fmr(F_4_3) ins(%arg0 : tensor<5x6x3x3x2x2xf32>) outs(%arg1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> return %0 : tensor<2x12x12x2xf32> } @@ -1261,7 +1261,7 @@ func.func @winograd_output_transform_input_height(%arg0: tensor<5x6x3x3x2x2xf32> func.func @winograd_output_transform_input_width(%arg0: tensor<6x5x3x3x2x2xf32>, %arg1: tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> { // expected-error @+1 {{expect input width equals to input tile size}} - %0 = linalg.winograd_output_transform m(4) r(3) ins(%arg0 : tensor<6x5x3x3x2x2xf32>) outs(%arg1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> + %0 = linalg.winograd_output_transform fmr(F_4_3) ins(%arg0 : tensor<6x5x3x3x2x2xf32>) outs(%arg1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> return %0 : tensor<2x12x12x2xf32> } @@ -1269,7 +1269,7 @@ func.func @winograd_output_transform_input_width(%arg0: tensor<6x5x3x3x2x2xf32>, func.func @winograd_output_transform_output_height(%arg0: tensor<6x6x3x3x2x2xf32>, %arg1: tensor<2x11x12x2xf32>) -> tensor<2x11x12x2xf32> { // expected-error @+1 {{the output shape is not expected}} - %0 = linalg.winograd_output_transform m(4) r(3) ins(%arg0 : tensor<6x6x3x3x2x2xf32>) outs(%arg1 : tensor<2x11x12x2xf32>) -> tensor<2x11x12x2xf32> + %0 = linalg.winograd_output_transform fmr(F_4_3) ins(%arg0 : tensor<6x6x3x3x2x2xf32>) outs(%arg1 : tensor<2x11x12x2xf32>) -> tensor<2x11x12x2xf32> return %0 : tensor<2x11x12x2xf32> } @@ -1277,7 +1277,7 @@ func.func @winograd_output_transform_output_height(%arg0: tensor<6x6x3x3x2x2xf32 func.func @winograd_output_transform_output_width(%arg0: tensor<6x6x3x3x2x2xf32>, %arg1: tensor<2x12x11x2xf32>) -> tensor<2x12x11x2xf32> { // expected-error @+1 {{the output shape is not expected}} - %0 = linalg.winograd_output_transform m(4) r(3) ins(%arg0 : tensor<6x6x3x3x2x2xf32>) outs(%arg1 : tensor<2x12x11x2xf32>) -> tensor<2x12x11x2xf32> + %0 = linalg.winograd_output_transform fmr(F_4_3) ins(%arg0 : tensor<6x6x3x3x2x2xf32>) outs(%arg1 : tensor<2x12x11x2xf32>) -> tensor<2x12x11x2xf32> return %0 : tensor<2x12x11x2xf32> } diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index dc556761b09e5..4edbc6eda3eae 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -630,52 +630,52 @@ func.func @softmax(%arg0: tensor<2x16x32xf32>) -> tensor<2x16x32xf32> { func.func @winograd(%arg0: tensor<2x6x6x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg2: tensor<1xf32>, %arg3: tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> { %0 = tensor.empty() : tensor<6x6x5x2xf32> - %1 = linalg.winograd_filter_transform m(4) r(3) ins(%arg1 : tensor<2x3x3x5xf32>) outs(%0 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> + %1 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg1 : tensor<2x3x3x5xf32>) outs(%0 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> %2 = tensor.empty() : tensor<6x6x1x1x2x5xf32> - %3 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x6x6x5xf32>) outs(%2 : tensor<6x6x1x1x2x5xf32>) -> tensor<6x6x1x1x2x5xf32> + %3 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x6x6x5xf32>) outs(%2 : tensor<6x6x1x1x2x5xf32>) -> tensor<6x6x1x1x2x5xf32> %collapsed = tensor.collapse_shape %1 [[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> %collapsed_0 = tensor.collapse_shape %3 [[0, 1], [2, 3, 4], [5]] : tensor<6x6x1x1x2x5xf32> into tensor<36x2x5xf32> %4 = tensor.empty() : tensor<36x2x2xf32> %5 = linalg.batch_matmul ins(%collapsed_0, %collapsed : tensor<36x2x5xf32>, tensor<36x5x2xf32>) outs(%4 : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> %expanded = tensor.expand_shape %5 [[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32> - %6 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x6x1x1x2x2xf32>) outs(%arg3 : tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> + %6 = linalg.winograd_output_transform fmr(F_4_3) ins(%expanded : tensor<6x6x1x1x2x2xf32>) outs(%arg3 : tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> return %6 : tensor<2x4x4x2xf32> } // CHECK-LABEL: func @winograd -// CHECK: linalg.winograd_filter_transform m(4) r(3) -// CHECK: linalg.winograd_input_transform m(4) r(3) -// CHECK: linalg.winograd_output_transform m(4) r(3) +// CHECK: linalg.winograd_filter_transform fmr(F_4_3) +// CHECK: linalg.winograd_input_transform fmr(F_4_3) +// CHECK: linalg.winograd_output_transform fmr(F_4_3) // ----- func.func @winograd_filter_dyn(%arg0: tensor, %arg1: tensor<6x6x?x?xf32>) -> tensor<6x6x?x?xf32> { - %0 = linalg.winograd_filter_transform m(4) r(3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x6x?x?xf32>) -> tensor<6x6x?x?xf32> + %0 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x6x?x?xf32>) -> tensor<6x6x?x?xf32> return %0 : tensor<6x6x?x?xf32> } // CHECK-LABEL: func @winograd_filter_dyn -// CHECK: linalg.winograd_filter_transform m(4) r(3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x6x?x?xf32>) -> tensor<6x6x?x?xf32> +// CHECK: linalg.winograd_filter_transform fmr(F_4_3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x6x?x?xf32>) -> tensor<6x6x?x?xf32> // ----- func.func @winograd_input_dyn(%arg0: tensor, %arg1: tensor<6x6x?x?x?x?xf32>) -> tensor<6x6x?x?x?x?xf32> { - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x6x?x?x?x?xf32>) -> tensor<6x6x?x?x?x?xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x6x?x?x?x?xf32>) -> tensor<6x6x?x?x?x?xf32> return %0 : tensor<6x6x?x?x?x?xf32> } // CHECK-LABEL: func @winograd_input_dyn -// CHECK: linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x6x?x?x?x?xf32>) -> tensor<6x6x?x?x?x?xf32> +// CHECK: linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor) outs(%arg1 : tensor<6x6x?x?x?x?xf32>) -> tensor<6x6x?x?x?x?xf32> // ----- func.func @winograd_output_dyn(%arg0: tensor<6x6x?x?x?x?xf32>, %arg1: tensor) -> tensor { - %0 = linalg.winograd_output_transform m(4) r(3) ins(%arg0 : tensor<6x6x?x?x?x?xf32>) outs(%arg1 : tensor) -> tensor + %0 = linalg.winograd_output_transform fmr(F_4_3) ins(%arg0 : tensor<6x6x?x?x?x?xf32>) outs(%arg1 : tensor) -> tensor return %0 : tensor } // CHECK-LABEL: func @winograd_output_dyn -// CHECK: linalg.winograd_output_transform m(4) r(3) ins(%arg0 : tensor<6x6x?x?x?x?xf32>) outs(%arg1 : tensor) -> tensor +// CHECK: linalg.winograd_output_transform fmr(F_4_3) ins(%arg0 : tensor<6x6x?x?x?x?xf32>) outs(%arg1 : tensor) -> tensor // ----- diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir index 572a2ae70e0a4..5bdb5073ee865 100644 --- a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir @@ -653,6 +653,7 @@ module { %5 = affine.min #map2(%i)[%d0, %idx] %6 = tensor.extract_slice %o[%4] [%5] [1] : tensor to tensor + // CHECK: linalg.generic // CHECK: %[[T1:.*]] = linalg.generic {{.*}} // CHECK: %[[T2:.*]] = linalg.generic {{.*}} %7 = tensor.extract_slice %1[%4] [%5] [1] : tensor to tensor diff --git a/mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir b/mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir index cdc4b8a72a276..445ded4bfcafb 100644 --- a/mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir @@ -2,15 +2,15 @@ func.func @conv2d(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg2: tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> { %0 = tensor.empty() : tensor<6x6x5x2xf32> - %1 = linalg.winograd_filter_transform m(4) r(3) ins(%arg1 : tensor<2x3x3x5xf32>) outs(%0 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> + %1 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg1 : tensor<2x3x3x5xf32>) outs(%0 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> %2 = tensor.empty() : tensor<6x6x2x2x2x5xf32> - %3 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x10x10x5xf32>) outs(%2 : tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> + %3 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x10x10x5xf32>) outs(%2 : tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> %collapsed = tensor.collapse_shape %1 [[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> %collapsed_0 = tensor.collapse_shape %3 [[0, 1], [2, 3, 4], [5]] : tensor<6x6x2x2x2x5xf32> into tensor<36x8x5xf32> %4 = tensor.empty() : tensor<36x8x2xf32> %5 = linalg.batch_matmul ins(%collapsed_0, %collapsed : tensor<36x8x5xf32>, tensor<36x5x2xf32>) outs(%4 : tensor<36x8x2xf32>) -> tensor<36x8x2xf32> %expanded = tensor.expand_shape %5 [[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 2, 2, 2, 2] : tensor<36x8x2xf32> into tensor<6x6x2x2x2x2xf32> - %6 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x6x2x2x2x2xf32>) outs(%arg2 : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> + %6 = linalg.winograd_output_transform fmr(F_4_3) ins(%expanded : tensor<6x6x2x2x2x2xf32>) outs(%arg2 : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> return %6 : tensor<2x8x8x2xf32> } @@ -123,13 +123,13 @@ module attributes {transform.with_named_sequence} { func.func @conv2d_unaligned(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg2: tensor<2x9x9x2xf32>) -> tensor<2x9x9x2xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.empty() : tensor<6x6x5x2xf32> - %1 = linalg.winograd_filter_transform m(4) r(3) ins(%arg1 : tensor<2x3x3x5xf32>) outs(%0 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> + %1 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg1 : tensor<2x3x3x5xf32>) outs(%0 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> %padded = tensor.pad %arg0 low[0, 0, 0, 0] high[0, 3, 3, 0] { ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): tensor.yield %cst : f32 } : tensor<2x11x11x5xf32> to tensor<2x14x14x5xf32> %2 = tensor.empty() : tensor<6x6x3x3x2x5xf32> - %3 = linalg.winograd_input_transform m(4) r(3) ins(%padded : tensor<2x14x14x5xf32>) outs(%2 : tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> + %3 = linalg.winograd_input_transform fmr(F_4_3) ins(%padded : tensor<2x14x14x5xf32>) outs(%2 : tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> %collapsed = tensor.collapse_shape %1 [[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> %collapsed_0 = tensor.collapse_shape %3 [[0, 1], [2, 3, 4], [5]] : tensor<6x6x3x3x2x5xf32> into tensor<36x18x5xf32> %4 = tensor.empty() : tensor<36x18x2xf32> @@ -140,7 +140,7 @@ func.func @conv2d_unaligned(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5x ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): tensor.yield %cst : f32 } : tensor<2x9x9x2xf32> to tensor<2x12x12x2xf32> - %7 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x6x3x3x2x2xf32>) outs(%padded_1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> + %7 = linalg.winograd_output_transform fmr(F_4_3) ins(%expanded : tensor<6x6x3x3x2x2xf32>) outs(%padded_1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> %extracted_slice = tensor.extract_slice %7[0, 0, 0, 0] [2, 9, 9, 2] [1, 1, 1, 1] : tensor<2x12x12x2xf32> to tensor<2x9x9x2xf32> return %extracted_slice : tensor<2x9x9x2xf32> } @@ -259,16 +259,16 @@ module attributes {transform.with_named_sequence} { func.func @conv2d_mx1_rx1(%arg0: tensor<2x6x1x5xf32>, %arg1: tensor<2x3x1x5xf32>, %arg2: tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.empty() : tensor<6x1x5x2xf32> - %1 = linalg.winograd_filter_transform m(4) r(3) ins(%arg1 : tensor<2x3x1x5xf32>) outs(%0 : tensor<6x1x5x2xf32>) -> tensor<6x1x5x2xf32> + %1 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg1 : tensor<2x3x1x5xf32>) outs(%0 : tensor<6x1x5x2xf32>) -> tensor<6x1x5x2xf32> %2 = tensor.empty() : tensor<6x1x1x1x2x5xf32> - %3 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x6x1x5xf32>) outs(%2 : tensor<6x1x1x1x2x5xf32>) -> tensor<6x1x1x1x2x5xf32> + %3 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x6x1x5xf32>) outs(%2 : tensor<6x1x1x1x2x5xf32>) -> tensor<6x1x1x1x2x5xf32> %collapsed = tensor.collapse_shape %1 [[0, 1], [2], [3]] : tensor<6x1x5x2xf32> into tensor<6x5x2xf32> %collapsed_0 = tensor.collapse_shape %3 [[0, 1], [2, 3, 4], [5]] : tensor<6x1x1x1x2x5xf32> into tensor<6x2x5xf32> %4 = tensor.empty() : tensor<6x2x2xf32> %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> %6 = linalg.batch_matmul ins(%collapsed_0, %collapsed : tensor<6x2x5xf32>, tensor<6x5x2xf32>) outs(%5 : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> %expanded = tensor.expand_shape %6 [[0, 1], [2, 3, 4], [5]] output_shape [6, 1, 1, 1, 2, 2] : tensor<6x2x2xf32> into tensor<6x1x1x1x2x2xf32> - %7 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x1x1x1x2x2xf32>) outs(%arg2 : tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> + %7 = linalg.winograd_output_transform fmr(F_4_3) ins(%expanded : tensor<6x1x1x1x2x2xf32>) outs(%arg2 : tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> return %7 : tensor<2x4x1x2xf32> } @@ -350,16 +350,16 @@ module attributes {transform.with_named_sequence} { func.func @conv2d_mx1_rx1_2(%arg0: tensor<2x6x2x5xf32>, %arg1: tensor<2x3x1x5xf32>, %arg2: tensor<2x4x2x2xf32>) -> tensor<2x4x2x2xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.empty() : tensor<6x1x5x2xf32> - %1 = linalg.winograd_filter_transform m(4) r(3) ins(%arg1 : tensor<2x3x1x5xf32>) outs(%0 : tensor<6x1x5x2xf32>) -> tensor<6x1x5x2xf32> + %1 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg1 : tensor<2x3x1x5xf32>) outs(%0 : tensor<6x1x5x2xf32>) -> tensor<6x1x5x2xf32> %2 = tensor.empty() : tensor<6x1x1x2x2x5xf32> - %3 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x6x2x5xf32>) outs(%2 : tensor<6x1x1x2x2x5xf32>) -> tensor<6x1x1x2x2x5xf32> + %3 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x6x2x5xf32>) outs(%2 : tensor<6x1x1x2x2x5xf32>) -> tensor<6x1x1x2x2x5xf32> %collapsed = tensor.collapse_shape %1 [[0, 1], [2], [3]] : tensor<6x1x5x2xf32> into tensor<6x5x2xf32> %collapsed_0 = tensor.collapse_shape %3 [[0, 1], [2, 3, 4], [5]] : tensor<6x1x1x2x2x5xf32> into tensor<6x4x5xf32> %4 = tensor.empty() : tensor<6x4x2xf32> %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<6x4x2xf32>) -> tensor<6x4x2xf32> %6 = linalg.batch_matmul ins(%collapsed_0, %collapsed : tensor<6x4x5xf32>, tensor<6x5x2xf32>) outs(%5 : tensor<6x4x2xf32>) -> tensor<6x4x2xf32> %expanded = tensor.expand_shape %6 [[0, 1], [2, 3, 4], [5]] output_shape [6, 1, 1, 2, 2, 2] : tensor<6x4x2xf32> into tensor<6x1x1x2x2x2xf32> - %7 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x1x1x2x2x2xf32>) outs(%arg2 : tensor<2x4x2x2xf32>) -> tensor<2x4x2x2xf32> + %7 = linalg.winograd_output_transform fmr(F_4_3) ins(%expanded : tensor<6x1x1x2x2x2xf32>) outs(%arg2 : tensor<2x4x2x2xf32>) -> tensor<2x4x2x2xf32> return %7 : tensor<2x4x2x2xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-tile-winograd.mlir b/mlir/test/Dialect/Linalg/transform-tile-winograd.mlir index fc6424fd4c812..beb8d0b125738 100644 --- a/mlir/test/Dialect/Linalg/transform-tile-winograd.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-winograd.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -transform-interpreter --split-input-file | FileCheck %s func.func @tile_winograd_filter(%arg0: tensor<2x3x3x5xf32>, %arg1: tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> { - %0 = linalg.winograd_filter_transform m(4) r(3) ins(%arg0 : tensor<2x3x3x5xf32>) outs(%arg1 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> + %0 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg0 : tensor<2x3x3x5xf32>) outs(%arg1 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> return %0 : tensor<6x6x5x2xf32> } @@ -25,13 +25,13 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S2:.*]] = scf.for %[[ARG4:.*]] = %[[C0_0]] to %[[C5]] step %[[C1_1]] iter_args(%[[ARG5:.*]] = %[[ARG3]]) // CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG0]][%[[ARG2]], 0, 0, %[[ARG4]]] [1, 3, 3, 1] [1, 1, 1, 1] : tensor<2x3x3x5xf32> to tensor<1x3x3x1xf32> // CHECK: %[[EXTRACTED_SLICE_2:.*]] = tensor.extract_slice %[[ARG5]][0, 0, %[[ARG4]], %[[ARG2]]] [6, 6, 1, 1] [1, 1, 1, 1] : tensor<6x6x5x2xf32> to tensor<6x6x1x1xf32> -// CHECK: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[EXTRACTED_SLICE]] : tensor<1x3x3x1xf32>) outs(%[[EXTRACTED_SLICE_2]] : tensor<6x6x1x1xf32>) -> tensor<6x6x1x1xf32> +// CHECK: %[[S3:.*]] = linalg.winograd_filter_transform fmr(F_4_3) ins(%[[EXTRACTED_SLICE]] : tensor<1x3x3x1xf32>) outs(%[[EXTRACTED_SLICE_2]] : tensor<6x6x1x1xf32>) -> tensor<6x6x1x1xf32> // CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S3]] into %[[ARG5]][0, 0, %[[ARG4]], %[[ARG2]]] [6, 6, 1, 1] [1, 1, 1, 1] : tensor<6x6x1x1xf32> into tensor<6x6x5x2xf32> // ----- func.func @tile_winograd_filter(%arg0: tensor<2x3x3x5xf32>, %arg1: tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> { - %0 = linalg.winograd_filter_transform m(4) r(3) ins(%arg0 : tensor<2x3x3x5xf32>) outs(%arg1 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> + %0 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg0 : tensor<2x3x3x5xf32>) outs(%arg1 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> return %0 : tensor<6x6x5x2xf32> } @@ -58,12 +58,12 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S3:.*]] = affine.min #[[$MAP0]](%[[ARG4]]) // CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG0]][%[[ARG2]], 0, 0, %[[ARG4]]] [1, 3, 3, %[[S3]]] [1, 1, 1, 1] : tensor<2x3x3x5xf32> to tensor<1x3x3x?xf32> // CHECK: %[[EXTRACTED_SLICE_3:.*]] = tensor.extract_slice %[[ARG5]][0, 0, %[[ARG4]], %[[ARG2]]] [6, 6, %[[S3]], 1] [1, 1, 1, 1] : tensor<6x6x5x2xf32> to tensor<6x6x?x1xf32> -// CHECK: %[[S4:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[EXTRACTED_SLICE]] : tensor<1x3x3x?xf32>) outs(%[[EXTRACTED_SLICE_3]] : tensor<6x6x?x1xf32>) -> tensor<6x6x?x1xf32> +// CHECK: %[[S4:.*]] = linalg.winograd_filter_transform fmr(F_4_3) ins(%[[EXTRACTED_SLICE]] : tensor<1x3x3x?xf32>) outs(%[[EXTRACTED_SLICE_3]] : tensor<6x6x?x1xf32>) -> tensor<6x6x?x1xf32> // CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S4]] into %[[ARG5]][0, 0, %[[ARG4]], %[[ARG2]]] [6, 6, %[[S3]], 1] [1, 1, 1, 1] : tensor<6x6x?x1xf32> into tensor<6x6x5x2xf32> // ----- func.func @tile_winograd_filter(%arg0: tensor<2x3x1x5xf32>, %arg1: tensor<6x1x5x2xf32>) -> tensor<6x1x5x2xf32> { - %0 = linalg.winograd_filter_transform m(4) r(3) ins(%arg0 : tensor<2x3x1x5xf32>) outs(%arg1 : tensor<6x1x5x2xf32>) -> tensor<6x1x5x2xf32> + %0 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg0 : tensor<2x3x1x5xf32>) outs(%arg1 : tensor<6x1x5x2xf32>) -> tensor<6x1x5x2xf32> return %0 : tensor<6x1x5x2xf32> } @@ -87,13 +87,13 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S2:.*]] = scf.for %[[ARG4:.*]] = %[[C0_0]] to %[[C5]] step %[[C1_1]] iter_args(%[[ARG5:.*]] = %[[ARG3]]) // CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG0]][%[[ARG2]], 0, 0, %[[ARG4]]] [1, 3, 1, 1] [1, 1, 1, 1] : tensor<2x3x1x5xf32> to tensor<1x3x1x1xf32> // CHECK: %[[EXTRACTED_SLICE_2:.*]] = tensor.extract_slice %[[ARG5]][0, 0, %[[ARG4]], %[[ARG2]]] [6, 1, 1, 1] [1, 1, 1, 1] : tensor<6x1x5x2xf32> to tensor<6x1x1x1xf32> -// CHECK: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[EXTRACTED_SLICE]] : tensor<1x3x1x1xf32>) outs(%[[EXTRACTED_SLICE_2]] : tensor<6x1x1x1xf32>) -> tensor<6x1x1x1xf32> +// CHECK: %[[S3:.*]] = linalg.winograd_filter_transform fmr(F_4_3) ins(%[[EXTRACTED_SLICE]] : tensor<1x3x1x1xf32>) outs(%[[EXTRACTED_SLICE_2]] : tensor<6x1x1x1xf32>) -> tensor<6x1x1x1xf32> // CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S3]] into %[[ARG5]][0, 0, %[[ARG4]], %[[ARG2]]] [6, 1, 1, 1] [1, 1, 1, 1] : tensor<6x1x1x1xf32> into tensor<6x1x5x2xf32> // ----- func.func @tile_winograd_input(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> { - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x10x10x5xf32>) outs(%arg1 : tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x10x10x5xf32>) outs(%arg1 : tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> return %0 : tensor<6x6x2x2x2x5xf32> } @@ -123,13 +123,13 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S6:.*]] = affine.apply #[[$MAP1]]() // CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG0]][0, %[[S3]], %[[S4]], 0] [2, %[[S5]], %[[S6]], 5] [1, 1, 1, 1] : tensor<2x10x10x5xf32> to tensor<2x?x?x5xf32> // CHECK: %[[EXTRACTED_SLICE_5:.*]] = tensor.extract_slice %[[ARG5]][0, 0, %[[ARG2]], %[[ARG4]], 0, 0] [6, 6, 1, 1, 2, 5] [1, 1, 1, 1, 1, 1] : tensor<6x6x2x2x2x5xf32> to tensor<6x6x1x1x2x5xf32> -// CHECK: %[[S7:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[EXTRACTED_SLICE]] : tensor<2x?x?x5xf32>) outs(%[[EXTRACTED_SLICE_5]] : tensor<6x6x1x1x2x5xf32>) -> tensor<6x6x1x1x2x5xf32> +// CHECK: %[[S7:.*]] = linalg.winograd_input_transform fmr(F_4_3) ins(%[[EXTRACTED_SLICE]] : tensor<2x?x?x5xf32>) outs(%[[EXTRACTED_SLICE_5]] : tensor<6x6x1x1x2x5xf32>) -> tensor<6x6x1x1x2x5xf32> // CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S7]] into %[[ARG5]][0, 0, %[[ARG2]], %[[ARG4]], 0, 0] [6, 6, 1, 1, 2, 5] [1, 1, 1, 1, 1, 1] : tensor<6x6x1x1x2x5xf32> into tensor<6x6x2x2x2x5xf32> // ----- func.func @tile_winograd_input(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> { - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x10x10x5xf32>) outs(%arg1 : tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x10x10x5xf32>) outs(%arg1 : tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> return %0 : tensor<6x6x2x2x2x5xf32> } @@ -167,13 +167,13 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S8:.*]] = affine.apply #[[$MAP1]]() // CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG0]][%[[ARG6]], %[[S5]], %[[S6]], %[[ARG8]]] [1, %[[S7]], %[[S8]], 1] [1, 1, 1, 1] : tensor<2x10x10x5xf32> to tensor<1x?x?x1xf32> // CHECK: %[[EXTRACTED_SLICE_10:.*]] = tensor.extract_slice %[[ARG9]][0, 0, %[[ARG2]], %[[ARG4]], %[[ARG6]], %[[ARG8]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] : tensor<6x6x2x2x2x5xf32> to tensor<6x6x1x1x1x1xf32> -// CHECK: %[[S9:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[EXTRACTED_SLICE]] : tensor<1x?x?x1xf32>) outs(%[[EXTRACTED_SLICE_10]] : tensor<6x6x1x1x1x1xf32>) -> tensor<6x6x1x1x1x1xf32> +// CHECK: %[[S9:.*]] = linalg.winograd_input_transform fmr(F_4_3) ins(%[[EXTRACTED_SLICE]] : tensor<1x?x?x1xf32>) outs(%[[EXTRACTED_SLICE_10]] : tensor<6x6x1x1x1x1xf32>) -> tensor<6x6x1x1x1x1xf32> // CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S9]] into %[[ARG9]][0, 0, %[[ARG2]], %[[ARG4]], %[[ARG6]], %[[ARG8]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] : tensor<6x6x1x1x1x1xf32> into tensor<6x6x2x2x2x5xf32> // ----- func.func @tile_winograd_input(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> { - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x10x10x5xf32>) outs(%arg1 : tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x10x10x5xf32>) outs(%arg1 : tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> return %0 : tensor<6x6x2x2x2x5xf32> } @@ -213,13 +213,13 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S9:.*]] = affine.apply #[[$MAP2]]() // CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG0]][%[[ARG6]], %[[S6]], %[[S7]], %[[ARG8]]] [2, %[[S8]], %[[S9]], %[[S5]]] [1, 1, 1, 1] : tensor<2x10x10x5xf32> to tensor<2x?x?x?xf32> // CHECK: %[[EXTRACTED_SLICE_12:.*]] = tensor.extract_slice %[[ARG9]][0, 0, %[[ARG2]], %[[ARG4]], %[[ARG6]], %[[ARG8]]] [6, 6, 2, 2, 2, %[[S5]]] [1, 1, 1, 1, 1, 1] : tensor<6x6x2x2x2x5xf32> to tensor<6x6x2x2x2x?xf32> -// CHECK: %[[S10:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[EXTRACTED_SLICE]] : tensor<2x?x?x?xf32>) outs(%[[EXTRACTED_SLICE_12]] : tensor<6x6x2x2x2x?xf32>) -> tensor<6x6x2x2x2x?xf32> +// CHECK: %[[S10:.*]] = linalg.winograd_input_transform fmr(F_4_3) ins(%[[EXTRACTED_SLICE]] : tensor<2x?x?x?xf32>) outs(%[[EXTRACTED_SLICE_12]] : tensor<6x6x2x2x2x?xf32>) -> tensor<6x6x2x2x2x?xf32> // CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S10]] into %[[ARG9]][0, 0, %[[ARG2]], %[[ARG4]], %[[ARG6]], %[[ARG8]]] [6, 6, 2, 2, 2, %[[S5]]] [1, 1, 1, 1, 1, 1] : tensor<6x6x2x2x2x?xf32> into tensor<6x6x2x2x2x5xf32> // ----- func.func @tile_winograd_input(%arg0: tensor<2x1x10x5xf32>, %arg1: tensor<1x6x1x2x2x5xf32>) -> tensor<1x6x1x2x2x5xf32> { - %0 = linalg.winograd_input_transform m(4) r(3) ins(%arg0 : tensor<2x1x10x5xf32>) outs(%arg1 : tensor<1x6x1x2x2x5xf32>) -> tensor<1x6x1x2x2x5xf32> + %0 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x1x10x5xf32>) outs(%arg1 : tensor<1x6x1x2x2x5xf32>) -> tensor<1x6x1x2x2x5xf32> return %0 : tensor<1x6x1x2x2x5xf32> } @@ -258,13 +258,13 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S8:.*]] = affine.apply #[[$MAP2]]() // CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG0]][%[[ARG6]], %[[S5]], %[[S6]], %[[ARG8]]] [1, 1, %[[S8]], 1] [1, 1, 1, 1] : tensor<2x1x10x5xf32> to tensor<1x1x?x1xf32> // CHECK: %[[EXTRACTED_SLICE_10:.*]] = tensor.extract_slice %[[ARG9]][0, 0, %[[ARG2]], %[[ARG4]], %[[ARG6]], %[[ARG8]]] [1, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] : tensor<1x6x1x2x2x5xf32> to tensor<1x6x1x1x1x1xf32> -// CHECK: %[[S9:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[EXTRACTED_SLICE]] : tensor<1x1x?x1xf32>) outs(%[[EXTRACTED_SLICE_10]] : tensor<1x6x1x1x1x1xf32>) -> tensor<1x6x1x1x1x1xf32> +// CHECK: %[[S9:.*]] = linalg.winograd_input_transform fmr(F_4_3) ins(%[[EXTRACTED_SLICE]] : tensor<1x1x?x1xf32>) outs(%[[EXTRACTED_SLICE_10]] : tensor<1x6x1x1x1x1xf32>) -> tensor<1x6x1x1x1x1xf32> // CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S9]] into %[[ARG9]][0, 0, %[[ARG2]], %[[ARG4]], %[[ARG6]], %[[ARG8]]] [1, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] : tensor<1x6x1x1x1x1xf32> into tensor<1x6x1x2x2x5xf32> // ----- func.func @tile_winograd_output(%arg0 : tensor<6x6x2x2x2x2xf32>, %arg1: tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> { - %0 = linalg.winograd_output_transform m(4) r(3) ins(%arg0 : tensor<6x6x2x2x2x2xf32>) outs(%arg1 : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> + %0 = linalg.winograd_output_transform fmr(F_4_3) ins(%arg0 : tensor<6x6x2x2x2x2xf32>) outs(%arg1 : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> return %0 : tensor<2x8x8x2xf32> } @@ -298,7 +298,7 @@ module attributes {transform.with_named_sequence} { // ----- func.func @tile_winograd_output(%arg0 : tensor<6x6x2x2x3x5xf32>, %arg1: tensor<3x8x8x5xf32>) -> tensor<3x8x8x5xf32> { - %0 = linalg.winograd_output_transform m(4) r(3) ins(%arg0 : tensor<6x6x2x2x3x5xf32>) outs(%arg1 : tensor<3x8x8x5xf32>) -> tensor<3x8x8x5xf32> + %0 = linalg.winograd_output_transform fmr(F_4_3) ins(%arg0 : tensor<6x6x2x2x3x5xf32>) outs(%arg1 : tensor<3x8x8x5xf32>) -> tensor<3x8x8x5xf32> return %0 : tensor<3x8x8x5xf32> } @@ -346,7 +346,7 @@ module attributes {transform.with_named_sequence} { // ----- func.func @tile_winograd_output(%arg0 : tensor<6x1x2x1x3x5xf32>, %arg1: tensor<3x8x1x5xf32>) -> tensor<3x8x1x5xf32> { - %0 = linalg.winograd_output_transform m(4) r(3) ins(%arg0 : tensor<6x1x2x1x3x5xf32>) outs(%arg1 : tensor<3x8x1x5xf32>) -> tensor<3x8x1x5xf32> + %0 = linalg.winograd_output_transform fmr(F_4_3) ins(%arg0 : tensor<6x1x2x1x3x5xf32>) outs(%arg1 : tensor<3x8x1x5xf32>) -> tensor<3x8x1x5xf32> return %0 : tensor<3x8x1x5xf32> } @@ -385,4 +385,4 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S7:.*]] = affine.apply #[[$MAP2]]() // CHECK: %[[S8:.*]] = affine.apply #[[$MAP2]]() // CHECK: %[[EXTRACTED_SLICE_9:.*]] = tensor.extract_slice %[[ARG12]][%[[ARG6]], %[[S5]], %[[S6]], %[[ARG8]]] [1, %[[S7]], 1, 1] [1, 1, 1, 1] : tensor<3x8x1x5xf32> to tensor<1x?x1x1xf32> -// CHECK: %[[S9:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXTRACTED_SLICE]] : tensor<6x1x1x1x1x1xf32>) outs(%[[EXTRACTED_SLICE_9]] : tensor<1x?x1x1xf32>) -> tensor<1x?x1x1xf32> +// CHECK: %[[S9:.*]] = linalg.winograd_output_transform fmr(F_4_3) ins(%[[EXTRACTED_SLICE]] : tensor<6x1x1x1x1x1xf32>) outs(%[[EXTRACTED_SLICE_9]] : tensor<1x?x1x1xf32>) -> tensor<1x?x1x1xf32> diff --git a/mlir/test/Dialect/Linalg/transform-winograd-conv2d.mlir b/mlir/test/Dialect/Linalg/transform-winograd-conv2d.mlir index 1de861e653005..e0ead54c956fc 100644 --- a/mlir/test/Dialect/Linalg/transform-winograd-conv2d.mlir +++ b/mlir/test/Dialect/Linalg/transform-winograd-conv2d.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -transform-interpreter -canonicalize --split-input-file -verify-diagnostics| FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -transform-interpreter -canonicalize --split-input-file -verify-diagnostics| FileCheck %s func.func @conv2d(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg2: tensor<1xf32>, %arg3: tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> { %0 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x10x10x5xf32>, tensor<2x3x3x5xf32>) outs(%arg3 : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> @@ -8,16 +8,16 @@ func.func @conv2d(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.conv_2d_nhwc_fhwc"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.structured.winograd_conv2d %0 { m = 4, r = 3 } : (!transform.any_op) -> (!transform.any_op) + %1 = transform.structured.winograd_conv2d %0 { fmr = 1: i32 } : (!transform.any_op) -> (!transform.any_op) transform.yield } } // CHECK-LABEL: func.func @conv2d -// CHECK: linalg.winograd_filter_transform m(4) r(3) -// CHECK: linalg.winograd_input_transform m(4) r(3) +// CHECK: linalg.winograd_filter_transform fmr(F_4_3) +// CHECK: linalg.winograd_input_transform fmr(F_4_3) // CHECK: linalg.batch_matmul -// CHECK: linalg.winograd_output_transform m(4) r(3) +// CHECK: linalg.winograd_output_transform fmr(F_4_3) // ----- @@ -29,19 +29,19 @@ func.func @conv2d_unaligned(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5x module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.conv_2d_nhwc_fhwc"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.structured.winograd_conv2d %0 { m = 4, r = 3 } : (!transform.any_op) -> (!transform.any_op) + %1 = transform.structured.winograd_conv2d %0 { fmr = 1: i32 } : (!transform.any_op) -> (!transform.any_op) transform.yield } } // CHECK-LABEL: func.func @conv2d_unaligned -// CHECK: linalg.winograd_filter_transform m(4) r(3) +// CHECK: linalg.winograd_filter_transform fmr(F_4_3) // CHECK: tensor.pad // CHECK-SAME: low[0, 0, 0, 0] high[0, 3, 3, 0] -// CHECK: linalg.winograd_input_transform m(4) r(3) +// CHECK: linalg.winograd_input_transform fmr(F_4_3) // CHECK: tensor.pad // CHECK-SAME: low[0, 0, 0, 0] high[0, 3, 3, 0] -// CHECK: linalg.winograd_output_transform m(4) r(3) +// CHECK: linalg.winograd_output_transform fmr(F_4_3) // ----- @@ -54,7 +54,7 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.conv_2d_nhwc_hwcf"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @+1 {{this operation is not supported to convert to Winograd Conv2D}} - %1 = transform.structured.winograd_conv2d %0 { m = 4, r = 3 } : (!transform.any_op) -> (!transform.any_op) + %1 = transform.structured.winograd_conv2d %0 { fmr = 1: i32 } : (!transform.any_op) -> (!transform.any_op) transform.yield } } @@ -70,7 +70,7 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.conv_2d_nhwc_fhwc"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @+1 {{apply Winograd Conv2D failed}} - %1 = transform.structured.winograd_conv2d %0 { m = 4, r = 3 } : (!transform.any_op) -> (!transform.any_op) + %1 = transform.structured.winograd_conv2d %0 { fmr = 1: i32 } : (!transform.any_op) -> (!transform.any_op) transform.yield } } @@ -86,7 +86,7 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.conv_2d_nhwc_fhwc"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @+1 {{apply Winograd Conv2D failed}} - %1 = transform.structured.winograd_conv2d %0 { m = 4, r = 3 } : (!transform.any_op) -> (!transform.any_op) + %1 = transform.structured.winograd_conv2d %0 { fmr = 1: i32 } : (!transform.any_op) -> (!transform.any_op) transform.yield } } diff --git a/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir b/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir index 16d06a7473272..c7b0bd51308ba 100644 --- a/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir +++ b/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir @@ -3,13 +3,13 @@ func.func @conv2d(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg2: tensor<2x9x9x2xf32>) -> tensor<2x9x9x2xf32> { %cst = arith.constant 0.000000e+00 : f32 %2 = tensor.empty() : tensor<6x6x5x2xf32> - %3 = linalg.winograd_filter_transform m(4) r(3) ins(%arg1 : tensor<2x3x3x5xf32>) outs(%2 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> + %3 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg1 : tensor<2x3x3x5xf32>) outs(%2 : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> %padded = tensor.pad %arg0 low[0, 0, 0, 0] high[0, 3, 3, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<2x11x11x5xf32> to tensor<2x14x14x5xf32> %4 = tensor.empty() : tensor<6x6x3x3x2x5xf32> - %5 = linalg.winograd_input_transform m(4) r(3) ins(%padded : tensor<2x14x14x5xf32>) outs(%4 : tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> + %5 = linalg.winograd_input_transform fmr(F_4_3) ins(%padded : tensor<2x14x14x5xf32>) outs(%4 : tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> %collapsed = tensor.collapse_shape %3 [[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> %collapsed_0 = tensor.collapse_shape %5 [[0, 1], [2, 3, 4], [5]] : tensor<6x6x3x3x2x5xf32> into tensor<36x18x5xf32> %6 = tensor.empty() : tensor<36x18x2xf32> @@ -20,7 +20,7 @@ func.func @conv2d(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<2x9x9x2xf32> to tensor<2x12x12x2xf32> - %9 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x6x3x3x2x2xf32>) outs(%padded_1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> + %9 = linalg.winograd_output_transform fmr(F_4_3) ins(%expanded : tensor<6x6x3x3x2x2xf32>) outs(%padded_1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> %extracted_slice = tensor.extract_slice %9[0, 0, 0, 0] [2, 9, 9, 2] [1, 1, 1, 1] : tensor<2x12x12x2xf32> to tensor<2x9x9x2xf32> return %extracted_slice : tensor<2x9x9x2xf32> } diff --git a/mlir/test/Dialect/Linalg/winograd-conv2d.mlir b/mlir/test/Dialect/Linalg/winograd-conv2d.mlir index 0040d81a2d24e..e80fa6b4af944 100644 --- a/mlir/test/Dialect/Linalg/winograd-conv2d.mlir +++ b/mlir/test/Dialect/Linalg/winograd-conv2d.mlir @@ -9,16 +9,16 @@ func.func @conv2d_4x4_3x3(%arg0: tensor<2x6x6x5xf32>, %arg1: tensor<2x3x3x5xf32> // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x6x6x5xf32>, %[[ARG1:.*]]: tensor<2x3x3x5xf32>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> { // CHECK-NEXT: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<6x6x5x2xf32> -// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S2]] : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> +// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform fmr(F_4_3) ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S2]] : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<6x6x1x1x2x5xf32> -// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[ARG0]] : tensor<2x6x6x5xf32>) outs(%[[S4]] : tensor<6x6x1x1x2x5xf32>) -> tensor<6x6x1x1x2x5xf32> +// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform fmr(F_4_3) ins(%[[ARG0]] : tensor<2x6x6x5xf32>) outs(%[[S4]] : tensor<6x6x1x1x2x5xf32>) -> tensor<6x6x1x1x2x5xf32> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x6x1x1x2x5xf32> into tensor<36x2x5xf32> // CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<36x2x2xf32> // CHECK-NEXT: %[[S7:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S6]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> // CHECK-NEXT: %[[S8:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x2x5xf32>, tensor<36x5x2xf32>) outs(%[[S7]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> // CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S8]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32> -// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> +// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform fmr(F_4_3) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> // CHECK-NEXT: return %[[S9]] : tensor<2x4x4x2xf32> // CHECK-NEXT: } @@ -33,16 +33,16 @@ func.func @conv2d_2x2_5x5(%arg0: tensor<2x6x6x5xf32>, %arg1: tensor<2x5x5x5xf32> // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x6x6x5xf32>, %[[ARG1:.*]]: tensor<2x5x5x5xf32>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x2x2x2xf32>) -> tensor<2x2x2x2xf32> { // CHECK-NEXT: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<6x6x5x2xf32> -// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(2) r(5) ins(%[[ARG1]] : tensor<2x5x5x5xf32>) outs(%[[S2]] : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> +// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform fmr(F_2_5) ins(%[[ARG1]] : tensor<2x5x5x5xf32>) outs(%[[S2]] : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<6x6x1x1x2x5xf32> -// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform m(2) r(5) ins(%[[ARG0]] : tensor<2x6x6x5xf32>) outs(%[[S4]] : tensor<6x6x1x1x2x5xf32>) -> tensor<6x6x1x1x2x5xf32> +// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform fmr(F_2_5) ins(%[[ARG0]] : tensor<2x6x6x5xf32>) outs(%[[S4]] : tensor<6x6x1x1x2x5xf32>) -> tensor<6x6x1x1x2x5xf32> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x6x1x1x2x5xf32> into tensor<36x2x5xf32> // CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<36x2x2xf32> // CHECK-NEXT: %[[S7:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S6]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> // CHECK-NEXT: %[[S8:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x2x5xf32>, tensor<36x5x2xf32>) outs(%[[S7]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> // CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S8]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32> -// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform m(2) r(5) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x2x2x2xf32>) -> tensor<2x2x2x2xf32> +// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform fmr(F_2_5) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x2x2x2xf32>) -> tensor<2x2x2x2xf32> // CHECK-NEXT: return %[[S9]] : tensor<2x2x2x2xf32> // CHECK-NEXT: } @@ -57,16 +57,16 @@ func.func @conv2d_1x4_1x3(%arg0: tensor<2x1x6x5xf32>, %arg1: tensor<2x1x3x5xf32> // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x1x6x5xf32>, %[[ARG1:.*]]: tensor<2x1x3x5xf32>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x1x4x2xf32>) -> tensor<2x1x4x2xf32> { // CHECK-NEXT: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<1x6x5x2xf32> -// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x1x3x5xf32>) outs(%[[S2]] : tensor<1x6x5x2xf32>) -> tensor<1x6x5x2xf32> +// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform fmr(F_4_3) ins(%[[ARG1]] : tensor<2x1x3x5xf32>) outs(%[[S2]] : tensor<1x6x5x2xf32>) -> tensor<1x6x5x2xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<1x6x1x1x2x5xf32> -// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[ARG0]] : tensor<2x1x6x5xf32>) outs(%[[S4]] : tensor<1x6x1x1x2x5xf32>) -> tensor<1x6x1x1x2x5xf32> +// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform fmr(F_4_3) ins(%[[ARG0]] : tensor<2x1x6x5xf32>) outs(%[[S4]] : tensor<1x6x1x1x2x5xf32>) -> tensor<1x6x1x1x2x5xf32> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2], [3]] : tensor<1x6x5x2xf32> into tensor<6x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<1x6x1x1x2x5xf32> into tensor<6x2x5xf32> // CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<6x2x2xf32> // CHECK-NEXT: %[[S7:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S6]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> // CHECK-NEXT: %[[S8:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<6x2x5xf32>, tensor<6x5x2xf32>) outs(%[[S7]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> // CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S8]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [1, 6, 1, 1, 2, 2] : tensor<6x2x2xf32> into tensor<1x6x1x1x2x2xf32> -// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<1x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x1x4x2xf32>) -> tensor<2x1x4x2xf32> +// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform fmr(F_4_3) ins(%[[EXPANDED]] : tensor<1x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x1x4x2xf32>) -> tensor<2x1x4x2xf32> // CHECK-NEXT: return %[[S9]] : tensor<2x1x4x2xf32> // CHECK-NEXT: } @@ -81,16 +81,16 @@ func.func @conv2d_4x1_3x1(%arg0: tensor<2x6x1x5xf32>, %arg1: tensor<2x3x1x5xf32> // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x6x1x5xf32>, %[[ARG1:.*]]: tensor<2x3x1x5xf32>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> { // CHECK-NEXT: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<6x1x5x2xf32> -// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x3x1x5xf32>) outs(%[[S2]] : tensor<6x1x5x2xf32>) -> tensor<6x1x5x2xf32> +// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform fmr(F_4_3) ins(%[[ARG1]] : tensor<2x3x1x5xf32>) outs(%[[S2]] : tensor<6x1x5x2xf32>) -> tensor<6x1x5x2xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<6x1x1x1x2x5xf32> -// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[ARG0]] : tensor<2x6x1x5xf32>) outs(%[[S4]] : tensor<6x1x1x1x2x5xf32>) -> tensor<6x1x1x1x2x5xf32> +// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform fmr(F_4_3) ins(%[[ARG0]] : tensor<2x6x1x5xf32>) outs(%[[S4]] : tensor<6x1x1x1x2x5xf32>) -> tensor<6x1x1x1x2x5xf32> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2], [3]] : tensor<6x1x5x2xf32> into tensor<6x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x1x1x1x2x5xf32> into tensor<6x2x5xf32> // CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<6x2x2xf32> // CHECK-NEXT: %[[S7:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S6]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> // CHECK-NEXT: %[[S8:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<6x2x5xf32>, tensor<6x5x2xf32>) outs(%[[S7]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> // CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S8]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 1, 1, 1, 2, 2] : tensor<6x2x2xf32> into tensor<6x1x1x1x2x2xf32> -// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x1x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> +// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform fmr(F_4_3) ins(%[[EXPANDED]] : tensor<6x1x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> // CHECK-NEXT: return %[[S9]] : tensor<2x4x1x2xf32> // CHECK-NEXT: } @@ -105,16 +105,16 @@ func.func @conv2d_aligned(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<2x3x3x5xf3 // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x10x10x5xf32>, %[[ARG1:.*]]: tensor<2x3x3x5xf32>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> { // CHECK-NEXT: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<6x6x5x2xf32> -// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S2]] : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> +// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform fmr(F_4_3) ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S2]] : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<6x6x2x2x2x5xf32> -// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[ARG0]] : tensor<2x10x10x5xf32>) outs(%[[S4]] : tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> +// CHECK-NEXT: %[[S5:.*]] = linalg.winograd_input_transform fmr(F_4_3) ins(%[[ARG0]] : tensor<2x10x10x5xf32>) outs(%[[S4]] : tensor<6x6x2x2x2x5xf32>) -> tensor<6x6x2x2x2x5xf32> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x6x2x2x2x5xf32> into tensor<36x8x5xf32> // CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<36x8x2xf32> // CHECK-NEXT: %[[S7:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S6]] : tensor<36x8x2xf32>) -> tensor<36x8x2xf32> // CHECK-NEXT: %[[S8:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x8x5xf32>, tensor<36x5x2xf32>) outs(%[[S7]] : tensor<36x8x2xf32>) -> tensor<36x8x2xf32> // CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S8]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 2, 2, 2, 2] : tensor<36x8x2xf32> into tensor<6x6x2x2x2x2xf32> -// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x2x2x2x2xf32>) outs(%[[ARG3]] : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> +// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform fmr(F_4_3) ins(%[[EXPANDED]] : tensor<6x6x2x2x2x2xf32>) outs(%[[ARG3]] : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> // CHECK-NEXT: return %[[S9]] : tensor<2x8x8x2xf32> // CHECK-NEXT: } @@ -129,13 +129,13 @@ func.func @conv2d_unaligned(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5x // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x11x11x5xf32>, %[[ARG1:.*]]: tensor<2x3x3x5xf32>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x9x9x2xf32>) -> tensor<2x9x9x2xf32> { // CHECK-DAG: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[S0:.*]] = tensor.empty() : tensor<6x6x5x2xf32> -// CHECK-NEXT: %[[S1:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S0]] : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> +// CHECK-NEXT: %[[S1:.*]] = linalg.winograd_filter_transform fmr(F_4_3) ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S0]] : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> // CHECK-NEXT: %[[PADDED:.*]] = tensor.pad %[[ARG0]] low[0, 0, 0, 0] high[0, 3, 3, 0] { // CHECK-NEXT: ^bb0 // CHECK-NEXT: tensor.yield %[[CST]] : f32 // CHECK-NEXT: } : tensor<2x11x11x5xf32> to tensor<2x14x14x5xf32> // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<6x6x3x3x2x5xf32> -// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[PADDED]] : tensor<2x14x14x5xf32>) outs(%[[S2]] : tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> +// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_input_transform fmr(F_4_3) ins(%[[PADDED]] : tensor<2x14x14x5xf32>) outs(%[[S2]] : tensor<6x6x3x3x2x5xf32>) -> tensor<6x6x3x3x2x5xf32> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S1]] {{\[}}[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %3 {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x6x3x3x2x5xf32> into tensor<36x18x5xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<36x18x2xf32> @@ -146,7 +146,7 @@ func.func @conv2d_unaligned(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5x // CHECK-NEXT: ^bb0 // CHECK-NEXT: tensor.yield %[[CST]] : f32 // CHECK-NEXT: } : tensor<2x9x9x2xf32> to tensor<2x12x12x2xf32> -// CHECK-NEXT: %[[S7:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x3x3x2x2xf32>) outs(%[[PADDED_1]] : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> +// CHECK-NEXT: %[[S7:.*]] = linalg.winograd_output_transform fmr(F_4_3) ins(%[[EXPANDED]] : tensor<6x6x3x3x2x2xf32>) outs(%[[PADDED_1]] : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> // CHECK-NEXT: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[S7]][0, 0, 0, 0] [2, 9, 9, 2] [1, 1, 1, 1] : tensor<2x12x12x2xf32> to tensor<2x9x9x2xf32> // CHECK-NEXT: return %[[EXTRACTED_SLICE]] : tensor<2x9x9x2xf32> // CHECK-NEXT: } @@ -162,16 +162,16 @@ func.func @conv2d_type_promotion(%arg0: tensor<2x6x6x5xf16>, %arg1: tensor<2x3x3 // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x6x6x5xf16>, %[[ARG1:.*]]: tensor<2x3x3x5xf16>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> { // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-NEXT: %[[S0:.*]] = tensor.empty() : tensor<6x6x5x2xf16> -// CHECK-NEXT: %[[S1:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x3x3x5xf16>) outs(%[[S0]] : tensor<6x6x5x2xf16>) -> tensor<6x6x5x2xf16> +// CHECK-NEXT: %[[S1:.*]] = linalg.winograd_filter_transform fmr(F_4_3) ins(%[[ARG1]] : tensor<2x3x3x5xf16>) outs(%[[S0]] : tensor<6x6x5x2xf16>) -> tensor<6x6x5x2xf16> // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<6x6x1x1x2x5xf16> -// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[ARG0]] : tensor<2x6x6x5xf16>) outs(%[[S2]] : tensor<6x6x1x1x2x5xf16>) -> tensor<6x6x1x1x2x5xf16> +// CHECK-NEXT: %[[S3:.*]] = linalg.winograd_input_transform fmr(F_4_3) ins(%[[ARG0]] : tensor<2x6x6x5xf16>) outs(%[[S2]] : tensor<6x6x1x1x2x5xf16>) -> tensor<6x6x1x1x2x5xf16> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S1]] {{\[}}[0, 1], [2], [3]] : tensor<6x6x5x2xf16> into tensor<36x5x2xf16> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x6x1x1x2x5xf16> into tensor<36x2x5xf16> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<36x2x2xf32> // CHECK-NEXT: %[[S5:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S4]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> // CHECK-NEXT: %[[S6:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x2x5xf16>, tensor<36x5x2xf16>) outs(%[[S5]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> // CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S6]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32> -// CHECK-NEXT: %[[S7:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> +// CHECK-NEXT: %[[S7:.*]] = linalg.winograd_output_transform fmr(F_4_3) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> // CHECK-NEXT: return %[[S7]] : tensor<2x4x4x2xf32> // CHECK-NEXT: } diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir index f908efb638446..8e394b2ac04c8 100644 --- a/mlir/test/Dialect/MemRef/invalid.mlir +++ b/mlir/test/Dialect/MemRef/invalid.mlir @@ -342,6 +342,16 @@ memref.global "priate" constant @memref5 : memref<2xf32> = uninitialized // ----- +// expected-error @+1 {{op initial value element expected to be of type 'f16', but was of type 'f32'}} +"memref.global"() <{constant, initial_value = dense<1.000000e+00> : tensor<1xf32>, sym_name = "memref6", sym_visibility = "private", type = memref<1xf16>}> : () -> () + +// ----- + +// expected-error @+1 {{op initial value shape expected to be 1, 2 but was 2, 2}} +"memref.global"() <{constant, initial_value = dense<1.000000e+00> : tensor<2x2xf16>, sym_name = "memref7", sym_visibility = "private", type = memref<1x2xf16>}> : () -> () + +// ----- + func.func @nonexistent_global_memref() { // expected-error @+1 {{'gv' does not reference a valid global memref}} %0 = memref.get_global @gv : memref<3xf32> diff --git a/mlir/test/Dialect/MemRef/ops.mlir b/mlir/test/Dialect/MemRef/ops.mlir index 13fdf3cf13510..e11de7bec2d0a 100644 --- a/mlir/test/Dialect/MemRef/ops.mlir +++ b/mlir/test/Dialect/MemRef/ops.mlir @@ -174,6 +174,9 @@ memref.global "private" @memref3 : memref<2xf32> = uninitialized // CHECK-LABEL: memref.global "private" constant @memref4 : memref<2xf32> = uninitialized memref.global "private" constant @memref4 : memref<2xf32> = uninitialized +// CHECK-LABEL: memref.global "private" constant @memref5 : memref<1xf16, 42 : i32> = dense<1.000000e+00> +"memref.global"() <{constant, initial_value = dense<1.000000e+00> : tensor<1xf16>, sym_name = "memref5", sym_visibility = "private", type = memref<1xf16, 42 : i32>}> : () -> () + // CHECK-LABEL: func @read_global_memref func.func @read_global_memref() { %0 = memref.get_global @memref0 : memref<2xf32> diff --git a/mlir/test/Dialect/Vector/canonicalize/vector-transpose.mlir b/mlir/test/Dialect/Vector/canonicalize/vector-transpose.mlir index c84aea6609665..f1e1c5e896c66 100644 --- a/mlir/test/Dialect/Vector/canonicalize/vector-transpose.mlir +++ b/mlir/test/Dialect/Vector/canonicalize/vector-transpose.mlir @@ -165,6 +165,25 @@ func.func @shape_cast_of_transpose(%arg : vector<1x4x4x1x1xi8>) -> vector<4x4xi8 // ----- +// In this test, the permutation maps the non-unit dimensions (1 and 2) as follows: +// 1 -> 0 +// 2 -> 4 +// Because 0 < 4, this permutation is order preserving and effectively a shape_cast. +// (same as the example above, but one of the dims is scalable) +// CHECK-LABEL: @shape_cast_of_transpose_scalable +// CHECK-SAME: %[[ARG:.*]]: vector<1x[4]x4x1x1xi8>) -> vector<[4]x4xi8> { +// CHECK: %[[SHAPE_CAST:.*]] = vector.shape_cast %[[ARG]] : +// CHECK-SAME: vector<1x[4]x4x1x1xi8> to vector<[4]x4xi8> +// CHECK: return %[[SHAPE_CAST]] : vector<[4]x4xi8> +func.func @shape_cast_of_transpose_scalable(%arg : vector<1x[4]x4x1x1xi8>) -> vector<[4]x4xi8> { + %0 = vector.transpose %arg, [1, 0, 3, 4, 2] + : vector<1x[4]x4x1x1xi8> to vector<[4]x1x1x1x4xi8> + %1 = vector.shape_cast %0 : vector<[4]x1x1x1x4xi8> to vector<[4]x4xi8> + return %1 : vector<[4]x4xi8> +} + +// ----- + // In this test, the mapping of non-unit dimensions (1 and 2) is as follows: // 1 -> 2 // 2 -> 1 @@ -184,36 +203,10 @@ func.func @negative_shape_cast_of_transpose(%arg : vector<1x4x4x1xi8>) -> vector // ----- -// Currently the conversion shape_cast(transpose) -> shape_cast is disabled for -// scalable vectors because of bad interaction with ConvertIllegalShapeCastOpsToTransposes -// CHECK-LABEL: @negative_shape_cast_of_transpose_scalable -// CHECK: vector.transpose -// CHECK: vector.shape_cast -func.func @negative_shape_cast_of_transpose_scalable(%arg : vector<[4]x1xi8>) -> vector<[4]xi8> { - %0 = vector.transpose %arg, [1, 0] : vector<[4]x1xi8> to vector<1x[4]xi8> - %1 = vector.shape_cast %0 : vector<1x[4]xi8> to vector<[4]xi8> - return %1 : vector<[4]xi8> -} - -// ----- - /// +-------------------------------------------------------------------------- /// Tests of FoldTransposeShapeCast: transpose(shape_cast) -> shape_cast /// +-------------------------------------------------------------------------- -// The conversion transpose(shape_cast) -> shape_cast is not disabled for scalable -// vectors. -// CHECK-LABEL: @transpose_of_shape_cast_scalable -// CHECK: vector.shape_cast -// CHECK-SAME: vector<[4]xi8> to vector<[4]x1xi8> -func.func @transpose_of_shape_cast_scalable(%arg : vector<[4]xi8>) -> vector<[4]x1xi8> { - %0 = vector.shape_cast %arg : vector<[4]xi8> to vector<1x[4]xi8> - %1 = vector.transpose %0, [1, 0] : vector<1x[4]xi8> to vector<[4]x1xi8> - return %1 : vector<[4]x1xi8> -} - -// ----- - // A transpose that is 'order preserving' can be treated like a shape_cast. // CHECK-LABEL: @transpose_of_shape_cast // CHECK-SAME: %[[ARG:.*]]: vector<2x3x1x1xi8>) -> vector<6x1x1xi8> { @@ -229,11 +222,26 @@ func.func @transpose_of_shape_cast(%arg : vector<2x3x1x1xi8>) -> vector<6x1x1xi // ----- -// Scalable dimensions should be treated as non-unit dimensions. // CHECK-LABEL: @transpose_of_shape_cast_scalable +// CHECK-SAME: %[[ARG:.*]]: vector<[2]x3x1x1xi8>) -> vector<[6]x1x1xi8> { +// CHECK: %[[SHAPE_CAST:.*]] = vector.shape_cast %[[ARG]] : +// CHECK-SAME: vector<[2]x3x1x1xi8> to vector<[6]x1x1xi8> +// CHECK: return %[[SHAPE_CAST]] : vector<[6]x1x1xi8> +func.func @transpose_of_shape_cast_scalable(%arg : vector<[2]x3x1x1xi8>) -> vector<[6]x1x1xi8> { + %0 = vector.shape_cast %arg : vector<[2]x3x1x1xi8> to vector<[6]x1x1xi8> + %1 = vector.transpose %0, [0, 2, 1] + : vector<[6]x1x1xi8> to vector<[6]x1x1xi8> + return %1 : vector<[6]x1x1xi8> +} + +// ----- + +// Scalable 1 dimensions (i.e. [1]) should be treated as non-unit dimensions +// (hence no folding). +// CHECK-LABEL: @negative_transpose_of_shape_cast_scalable_unit // CHECK: vector.shape_cast // CHECK: vector.transpose -func.func @transpose_of_shape_cast_scalable_unit(%arg : vector<[1]x4x1xi8>) -> vector<4x[1]xi8> { +func.func @negative_transpose_of_shape_cast_scalable_unit(%arg : vector<[1]x4x1xi8>) -> vector<4x[1]xi8> { %0 = vector.shape_cast %arg : vector<[1]x4x1xi8> to vector<[1]x4xi8> %1 = vector.transpose %0, [1, 0] : vector<[1]x4xi8> to vector<4x[1]xi8> return %1 : vector<4x[1]xi8> diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index ec7cee7b2c641..4935ec8ba8e61 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -1975,6 +1975,15 @@ func.func @flat_transpose_scalable(%arg0: vector<[16]xf32>) -> vector<[16]xf32> // ----- +// expected-note @+1 {{prior use here}} +func.func @vector_splat_type_mismatch(%a: f32) { + // expected-error @+1 {{expects different type than prior uses: 'i32' vs 'f32'}} + %0 = vector.splat %a : vector<1xi32> + return +} + +// ----- + //===----------------------------------------------------------------------===// // vector.load //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index c59f7bd001905..0121bcdbbba45 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -149,7 +149,7 @@ func.func @vector_transfer_ops_tensor(%arg0: tensor, } // CHECK-LABEL: @vector_broadcast -func.func @vector_broadcast(%a: f32, %b: vector, %c: vector<16xf32>, %d: vector<1x16xf32>, %e: vector<8x1xf32>) -> vector<8x16xf32> { +func.func @vector_broadcast(%a: f32, %b: vector, %c: vector<16xf32>, %d: vector<1x16xf32>, %e: vector<8x1xf32>, %f: vector<8x1x!llvm.ptr<1>>) { // CHECK: vector.broadcast %{{.*}} : f32 to vector %0 = vector.broadcast %a : f32 to vector // CHECK: vector.broadcast %{{.*}} : vector to vector<4xf32> @@ -162,7 +162,9 @@ func.func @vector_broadcast(%a: f32, %b: vector, %c: vector<16xf32>, %d: ve %4 = vector.broadcast %d : vector<1x16xf32> to vector<8x16xf32> // CHECK-NEXT: vector.broadcast %{{.*}} : vector<8x1xf32> to vector<8x16xf32> %5 = vector.broadcast %e : vector<8x1xf32> to vector<8x16xf32> - return %4 : vector<8x16xf32> + // CHECK-NEXT: vector.broadcast %{{.*}} : vector<8x1x!llvm.ptr<1>> to vector<8x16x!llvm.ptr<1>> + %6 = vector.broadcast %f : vector<8x1x!llvm.ptr<1>> to vector<8x16x!llvm.ptr<1>> + return } // CHECK-LABEL: @shuffle0D @@ -959,13 +961,16 @@ func.func @vector_scan(%0: vector<4x8x16x32xf32>) -> vector<4x8x16x32xf32> { } // CHECK-LABEL: func @test_splat_op -// CHECK-SAME: [[S:%arg[0-9]+]]: f32 -func.func @test_splat_op(%s : f32) { - // CHECK: vector.splat [[S]] : vector<8xf32> +// CHECK-SAME: %[[s:.*]]: f32, %[[s2:.*]]: !llvm.ptr<1> +func.func @test_splat_op(%s : f32, %s2 : !llvm.ptr<1>) { + // CHECK: vector.splat %[[s]] : vector<8xf32> %v = vector.splat %s : vector<8xf32> - // CHECK: vector.splat [[S]] : vector<4xf32> + // CHECK: vector.splat %[[s]] : vector<4xf32> %u = "vector.splat"(%s) : (f32) -> vector<4xf32> + + // CHECK: vector.splat %[[s2]] : vector<16x!llvm.ptr<1>> + %w = vector.splat %s2 : vector<16x!llvm.ptr<1>> return } diff --git a/mlir/test/IR/attribute.mlir b/mlir/test/IR/attribute.mlir index 4f280bde1aecc..edb7357e4e04b 100644 --- a/mlir/test/IR/attribute.mlir +++ b/mlir/test/IR/attribute.mlir @@ -454,6 +454,10 @@ func.func @allowed_cases_pass() { %0 = "test.i32_enum_attr"() {attr = 5: i32} : () -> i32 // CHECK: test.i32_enum_attr %1 = "test.i32_enum_attr"() {attr = 10: i32} : () -> i32 + // CHECK: test.i32_enum_attr + %2 = "test.i32_enum_attr"() {attr = 2147483648: i32} : () -> i32 + // CHECK: test.i32_enum_attr + %3 = "test.i32_enum_attr"() {attr = 4294967295: i32} : () -> i32 return } diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/transfer-read-scalable-non-trailing.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/transfer-read-scalable-non-trailing.mlir new file mode 100644 index 0000000000000..36fdb60d3e7bf --- /dev/null +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/transfer-read-scalable-non-trailing.mlir @@ -0,0 +1,79 @@ +// REQUIRES: arm-emulator + +// DEFINE: %{compile} = mlir-opt %s \ +// DEFINE: --arm-sve-legalize-vector-storage --convert-vector-to-scf --convert-scf-to-cf --convert-vector-to-llvm='enable-arm-sve' \ +// DEFINE: --expand-strided-metadata --lower-affine --convert-to-llvm --finalize-memref-to-llvm --reconcile-unrealized-casts \ +// DEFINE: -o %t + +// DEFINE: %{entry_point} = main + +// DEFINE: %{run} = %mcr_aarch64_cmd %t -e %{entry_point} -entry-point-result=void --march=aarch64 --mattr="+sve" \ +// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%native_mlir_arm_runner_utils + +// RUN: rm -f %t && %{compile} && %{run} | FileCheck %s + +// Test the transfer_read with vector type with a non-trailing scalable +// dimension as transformed by the pattern LegalizeTransferRead. + +func.func @transfer_read_scalable_non_trailing(%vs : i32, %M : memref) { + func.call @setArmVLBits(%vs) : (i32) -> () + + // Read an LLVM-illegal vector + %c0 = arith.constant 0 : index + %c0_i8 = arith.constant 0 : i8 + %A = vector.transfer_read %M[%c0, %c0], %c0_i8 {in_bounds = [true, true]} : memref, vector<[4]x8xi8> + + // Print the vector, for verification. + %B = vector.shape_cast %A : vector<[4]x8xi8> to vector<[32]xi8> + func.call @printVec(%B) : (vector<[32]xi8>) -> () + + return +} + +func.func @main() { + + %c0 = arith.constant 0 : index + +// Prepare an 8x8 buffer with test data. The test performs two reads +// of a [4]x8 vector from the buffer. One read, with vector length 128 bits, +// reads the first half the buffer. The other read, with vector length +// 256 bits, reads the entire buffer. + %T = arith.constant dense<[[11, 12, 13, 14, 15, 16, 17, 18], + [21, 22, 23, 24, 25, 26, 27, 28], + [31, 32, 33, 34, 35, 36, 37, 38], + [41, 42, 43, 44, 45, 46, 47, 48], + [51, 52, 53, 54, 55, 56, 57, 58], + [61, 62, 63, 64, 65, 66, 67, 68], + [71, 72, 73, 74, 75, 76, 77, 78], + [81, 82, 83, 84, 85, 86, 87, 88]]> : vector<8x8xi8> + + %M = memref.alloca() : memref<8x8xi8> + vector.transfer_write %T, %M[%c0, %c0] : vector<8x8xi8>, memref<8x8xi8> + %MM = memref.cast %M : memref<8x8xi8> to memref + +// CHECK-LABEL: Result(VL128): +// CHECK:( 11, 12, 13, 14, 15, 16, 17, 18, 21, 22, 23, 24, 25, 26, 27, 28 ) +// CHECK:( 31, 32, 33, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 47, 48 ) + vector.print str "Result(VL128):\n" + %c128 = arith.constant 128 : i32 + func.call @transfer_read_scalable_non_trailing(%c128, %MM) : (i32, memref) -> () + +// CHECK-LABEL: Result(VL256): +// CHECK: ( 11, 12, 13, 14, 15, 16, 17, 18, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 47, 48 ) +// CHECK: ( 51, 52, 53, 54, 55, 56, 57, 58, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 73, 74, 75, 76, 77, 78, 81, 82, 83, 84, 85, 86, 87, 88 ) + vector.print str "Result(VL256):\n" + %c256 = arith.constant 256 : i32 + func.call @transfer_read_scalable_non_trailing(%c256, %MM) : (i32, memref) -> () + + return +} + +func.func private @printVec(%v : vector<[32]xi8>) { + %v0 = vector.scalable.extract %v[0] : vector<[16]xi8> from vector<[32]xi8> + %v1 = vector.scalable.extract %v[16] : vector<[16]xi8> from vector<[32]xi8> + vector.print %v0 : vector<[16]xi8> + vector.print %v1 : vector<[16]xi8> + return +} + +func.func private @setArmVLBits(%bits : i32) diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir index 77e52946b830f..0f69875d596f1 100644 --- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --transform-interpreter --cse --split-input-file %s | FileCheck %s +// RUN: mlir-opt --transform-interpreter --cse --split-input-file --verify-diagnostics %s | FileCheck %s #map = affine_map<(d0) -> (d0)> module { @@ -620,3 +620,294 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- + +func.func @multi_slice_fusion1(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor, %arg3 : index) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %dim0 = tensor.dim %arg0, %c0 : tensor + %dim1 = tensor.dim %arg0, %c1 : tensor + %loop:2 = scf.forall (%iv0) = (%c0) to (%dim0) step (%arg3) shared_outs(%init0 = %arg1, %init1 = %arg2) -> (tensor, tensor) { + %tilesize = affine.min affine_map<(d0)[s0, s1] -> (s1, s0 - d0)>(%iv0)[%dim0, %arg3] + %arg0_slice = tensor.extract_slice %arg0[%iv0, 0] [%tilesize, %dim1] [1, 1] : tensor to tensor + %init0_slice = tensor.extract_slice %init0[%iv0] [%tilesize] [1] : tensor to tensor + %init1_slice = tensor.extract_slice %init1[%iv0] [%tilesize] [1] : tensor to tensor + %generic:2 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0)>], + iterator_types = ["parallel", "reduction"]} + ins(%arg0_slice : tensor) outs(%init0_slice, %init1_slice : tensor, tensor) { + ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): + %0 = arith.mulf %b0, %b1 : f32 + %1 = arith.addf %b0, %b2 : f32 + linalg.yield %0, %1 : f32, f32 + } -> (tensor, tensor) + scf.forall.in_parallel { + tensor.parallel_insert_slice %generic#0 into %init0[%iv0] [%tilesize] [1] : tensor into tensor + tensor.parallel_insert_slice %generic#1 into %init1[%iv0] [%tilesize] [1] : tensor into tensor + } + } + %empty = tensor.empty(%dim0) : tensor + %result = linalg.generic { + indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], + iterator_types = ["parallel"]} + ins(%loop#0, %loop#1 : tensor, tensor) outs(%empty : tensor) { + ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): + %0 = arith.addf %b0, %b1 : f32 + linalg.yield %0 : f32 + } -> tensor + return %result : tensor +} +// CHECK-LABEL: func @multi_slice_fusion1( +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK: %[[C0:.+]] = arith.constant 0 +// CHECK: %[[DIM0:.+]] = tensor.dim %[[ARG0]], %[[C0]] +// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM0]]) +// CHECK: %[[RESULT:.+]]:3 = scf.forall (%[[IV:.+]]) = +// CHECK-SAME: , %[[INIT:[a-zA-Z0-9]+]] = %[[EMPTY]]) +// CHECK: %[[TILESIZE:.+]] = affine.min +// CHECK-DAG: %[[GENERIC:.+]]:2 = linalg.generic +// CHECK-DAG: %[[INIT_SLICE:.+]] = tensor.extract_slice %[[INIT]][%[[IV]]] [%[[TILESIZE]]] +// CHECK: %[[FUSED:.+]] = linalg.generic +// CHECK-SAME: ins(%[[GENERIC]]#0, %[[GENERIC]]#1 : +// CHECK: tensor.parallel_insert_slice %[[FUSED]] into %[[INIT]][%[[IV]]] [%[[TILESIZE]]] +// CHECK: return %[[RESULT]]#2 + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) { + %loop = transform.structured.match ops{["scf.forall"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + %yield = transform.structured.match ops{["tensor.parallel_insert_slice"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + %yield0, %yield1 = transform.split_handle %yield : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + %a, %b = transform.test.fuse_consumer %yield0, %yield1 in (%loop) + : (!transform.any_op, !transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// Check that when the given operand tiles are inconsistent, tiling fails. + +func.func @multi_slice_fusion2(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor, %arg3 : index) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %dim0 = tensor.dim %arg0, %c0 : tensor + %dim1 = tensor.dim %arg0, %c1 : tensor + %loop:2 = scf.forall (%iv0) = (%c0) to (%dim0) step (%arg3) shared_outs(%init0 = %arg1, %init1 = %arg2) -> (tensor, tensor) { + %tilesize = affine.min affine_map<(d0)[s0, s1] -> (s1, s0 - d0)>(%iv0)[%dim0, %arg3] + %arg0_slice = tensor.extract_slice %arg0[%iv0, 0] [%tilesize, %dim1] [1, 1] : tensor to tensor + %init0_slice = tensor.extract_slice %init0[%iv0] [%tilesize] [1] : tensor to tensor + %generic0 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], + iterator_types = ["parallel", "reduction"]} + ins(%arg0_slice : tensor) outs(%init0_slice : tensor) { + ^bb0(%b0 : f32, %b1 : f32): + %0 = arith.mulf %b0, %b1 : f32 + linalg.yield %0 : f32 + } -> tensor + %init1_slice = tensor.extract_slice %init1[%iv0] [%tilesize] [1] : tensor to tensor + %generic1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], + iterator_types = ["parallel", "reduction"]} + ins(%arg0_slice : tensor) outs(%init1_slice: tensor) { + ^bb0(%b0 : f32, %b1 : f32): + %0 = arith.addf %b0, %b1 : f32 + linalg.yield %0: f32 + } -> tensor + scf.forall.in_parallel { + tensor.parallel_insert_slice %generic0 into %init0[%iv0] [%tilesize] [1] : tensor into tensor + tensor.parallel_insert_slice %generic1 into %init1[%iv0] [%tilesize] [1] : tensor into tensor + } + } + %empty = tensor.empty(%dim0) : tensor + %result = linalg.generic { + indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], + iterator_types = ["parallel"]} + ins(%loop#0, %loop#1 : tensor, tensor) outs(%empty : tensor) { + ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): + %0 = arith.addf %b0, %b1 : f32 + linalg.yield %0 : f32 + } -> tensor + return %result : tensor +} +// CHECK-LABEL: func @multi_slice_fusion2( +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK: %[[C0:.+]] = arith.constant 0 +// CHECK: %[[DIM0:.+]] = tensor.dim %[[ARG0]], %[[C0]] +// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM0]]) +// CHECK: %[[RESULT:.+]]:3 = scf.forall (%[[IV:.+]]) = +// CHECK-SAME: , %[[INIT:[a-zA-Z0-9]+]] = %[[EMPTY]]) +// CHECK: %[[TILESIZE:.+]] = affine.min +// CHECK: %[[GENERIC0:.+]] = linalg.generic +// CHECK: %[[GENERIC1:.+]] = linalg.generic +// CHECK-DAG: %[[INIT_SLICE:.+]] = tensor.extract_slice %[[INIT]][%[[IV]]] [%[[TILESIZE]]] +// CHECK: %[[FUSED:.+]] = linalg.generic +// CHECK-SAME: ins(%[[GENERIC0]], %[[GENERIC1]] : +// CHECK: tensor.parallel_insert_slice %[[FUSED]] into %[[INIT]][%[[IV]]] [%[[TILESIZE]]] +// CHECK: return %[[RESULT]]#2 + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) { + %loop = transform.structured.match ops{["scf.forall"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + %yield = transform.structured.match ops{["tensor.parallel_insert_slice"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + %yield0, %yield1 = transform.split_handle %yield : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + %a, %b = transform.test.fuse_consumer %yield0, %yield1 in (%loop) + : (!transform.any_op, !transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +func.func @multi_slice_fusion_with_broadcast(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor, + %arg3 : index, %arg4 : index) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %dim0 = tensor.dim %arg0, %c0 : tensor + %dim1 = tensor.dim %arg0, %c1 : tensor + %dim2 = tensor.dim %arg0, %c2 : tensor + %loop:2 = scf.forall (%iv0, %iv1) = (%c0, %c0) to (%dim0, %dim1) step (%arg3, %arg4) + shared_outs(%init0 = %arg1, %init1 = %arg2) -> (tensor, tensor) { + %tilesize0 = affine.min affine_map<(d0)[s0, s1] -> (s1, s0 - d0)>(%iv0)[%dim0, %arg3] + %tilesize1 = affine.min affine_map<(d0)[s0, s1] -> (s1, s0 - d0)>(%iv1)[%dim1, %arg4] + %arg0_slice = tensor.extract_slice %arg0[%iv0, %iv1, 0] [%tilesize0, %tilesize1, %dim2] [1, 1, 1] + : tensor to tensor + %init0_slice = tensor.extract_slice %init0[%iv0, %iv1] [%tilesize0, %tilesize1] [1, 1] + : tensor to tensor + %generic0 = linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], + iterator_types = ["parallel", "parallel", "reduction"]} + ins(%arg0_slice : tensor) outs(%init0_slice : tensor) { + ^bb0(%b0 : f32, %b1 : f32): + %0 = arith.mulf %b0, %b1 : f32 + linalg.yield %0 : f32 + } -> tensor + %init1_slice = tensor.extract_slice %init1[%iv0] [%tilesize0] [1] : tensor to tensor + %generic1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], + iterator_types = ["parallel", "reduction"]} + ins(%generic0 : tensor) outs(%init1_slice: tensor) { + ^bb0(%b0 : f32, %b1 : f32): + %0 = arith.addf %b0, %b1 : f32 + linalg.yield %0: f32 + } -> tensor + scf.forall.in_parallel { + tensor.parallel_insert_slice %generic0 into %init0[%iv0, %iv1] [%tilesize0, %tilesize1] [1, 1] + : tensor into tensor + tensor.parallel_insert_slice %generic1 into %init1[%iv0] [%tilesize0] [1] : tensor into tensor + } + } + %empty = tensor.empty(%dim0, %dim1) : tensor + %result = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"]} + ins(%loop#0, %loop#1 : tensor, tensor) outs(%empty : tensor) { + ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): + %0 = arith.addf %b0, %b1 : f32 + linalg.yield %0 : f32 + } -> tensor + return %result : tensor +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) { + %loop = transform.structured.match ops{["scf.forall"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + %yield = transform.structured.match ops{["tensor.parallel_insert_slice"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + %yield0, %yield1 = transform.split_handle %yield : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + %a, %b = transform.test.fuse_consumer %yield0, %yield1 in (%loop) + : (!transform.any_op, !transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.yield + } +} +// CHECK-LABEL: func @multi_slice_fusion_with_broadcast( +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 +// CHECK-DAG: %[[DIM0:.+]] = tensor.dim %[[ARG0]], %[[C0]] +// CHECK-DAG: %[[DIM1:.+]] = tensor.dim %[[ARG0]], %[[C1]] +// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM0]], %[[DIM1]]) +// CHECK: %[[RESULT:.+]]:3 = scf.forall (%[[IV0:[a-zA-Z0-9]+]], %[[IV1:[a-zA-Z0-9]+]]) = +// CHECK-SAME: , %[[INIT:[a-zA-Z0-9]+]] = %[[EMPTY]]) +// CHECK-DAG: %[[TILESIZE0:.+]] = affine.min {{.+}}(%[[IV0]]) +// CHECK-DAG: %[[TILESIZE1:.+]] = affine.min {{.+}}(%[[IV1]]) +// CHECK: %[[GENERIC0:.+]] = linalg.generic +// CHECK: %[[GENERIC1:.+]] = linalg.generic +// CHECK-DAG: %[[INIT_SLICE:.+]] = tensor.extract_slice %[[INIT]][%[[IV0]], %[[IV1]]] [%[[TILESIZE0]], %[[TILESIZE1]]] +// CHECK: %[[FUSED:.+]] = linalg.generic +// CHECK-SAME: ins(%[[GENERIC0]], %[[GENERIC1]] : +// CHECK: tensor.parallel_insert_slice %[[FUSED]] into %[[INIT]][%[[IV0]], %[[IV1]]] [%[[TILESIZE0]], %[[TILESIZE1]]] +// CHECK: return %[[RESULT]]#2 + +// ----- + +func.func @multi_slice_fusion_invalid(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor, + %arg3 : index, %arg4 : index) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %dim0 = tensor.dim %arg0, %c0 : tensor + %dim1 = tensor.dim %arg0, %c1 : tensor + %dim2 = tensor.dim %arg0, %c2 : tensor + %loop:2 = scf.forall (%iv0, %iv1) = (%c0, %c0) to (%dim0, %dim1) step (%arg3, %arg4) + shared_outs(%init0 = %arg1, %init1 = %arg2) -> (tensor, tensor) { + %tilesize0 = affine.min affine_map<(d0)[s0, s1] -> (s1, s0 - d0)>(%iv0)[%dim0, %arg3] + %tilesize1 = affine.min affine_map<(d0)[s0, s1] -> (s1, s0 - d0)>(%iv1)[%dim1, %arg4] + %arg0_slice = tensor.extract_slice %arg0[%iv0, %iv1, 0] [%tilesize0, %tilesize1, %dim2] [1, 1, 1] + : tensor to tensor + %init0_slice = tensor.extract_slice %init0[%iv0, %iv1] [%tilesize0, %tilesize1] [1, 1] + : tensor to tensor + %generic0 = linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], + iterator_types = ["parallel", "parallel", "reduction"]} + ins(%arg0_slice : tensor) outs(%init0_slice : tensor) { + ^bb0(%b0 : f32, %b1 : f32): + %0 = arith.mulf %b0, %b1 : f32 + linalg.yield %0 : f32 + } -> tensor + %init1_slice = tensor.extract_slice %init1[%iv0, %iv1] [%tilesize0, %tilesize1] [1, 1] + : tensor to tensor + %generic1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], + iterator_types = ["parallel", "parallel", "reduction"]} + ins(%arg0_slice : tensor) outs(%init1_slice: tensor) { + ^bb0(%b0 : f32, %b1 : f32): + %0 = arith.addf %b0, %b1 : f32 + linalg.yield %0: f32 + } -> tensor + scf.forall.in_parallel { + // expected-error @below {{failed to fuse consumer of slice}} + tensor.parallel_insert_slice %generic0 into %init0[%iv0, %iv1] [%tilesize0, %tilesize1] [1, 1] + : tensor into tensor + tensor.parallel_insert_slice %generic1 into %init1[%iv0, %iv1] [%tilesize0, %tilesize1] [1, 1] + : tensor into tensor + } + } + %empty = tensor.empty(%dim0, %dim1) : tensor + %result = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"]} + ins(%loop#0, %loop#1 : tensor, tensor) outs(%empty : tensor) { + ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): + %0 = arith.addf %b0, %b1 : f32 + linalg.yield %0 : f32 + } -> tensor + return %result : tensor +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) { + %loop = transform.structured.match ops{["scf.forall"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + %yield = transform.structured.match ops{["tensor.parallel_insert_slice"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + %yield0, %yield1 = transform.split_handle %yield : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + %a, %b = transform.test.fuse_consumer %yield0, %yield1 in (%loop) + : (!transform.any_op, !transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.yield + } +} diff --git a/mlir/test/Target/LLVMIR/Import/import-failure.ll b/mlir/test/Target/LLVMIR/Import/import-failure.ll index a05a2b4bd4507..d48be66f2063e 100644 --- a/mlir/test/Target/LLVMIR/Import/import-failure.ll +++ b/mlir/test/Target/LLVMIR/Import/import-failure.ll @@ -258,22 +258,6 @@ end: ; // ----- -; CHECK: -; CHECK-SAME: warning: expected function_entry_count to be attached to a function -; CHECK: warning: unhandled metadata: !0 = !{!"function_entry_count", i64 42} -define void @cond_br(i1 %arg) { -entry: - br i1 %arg, label %bb1, label %bb2, !prof !0 -bb1: - ret void -bb2: - ret void -} - -!0 = !{!"function_entry_count", i64 42} - -; // ----- - ; CHECK: ; CHECK-SAME: warning: dropped instruction: call void @llvm.experimental.noalias.scope.decl(metadata !0) define void @unused_scope() { diff --git a/mlir/test/Target/LLVMIR/nvvm/elect.mlir b/mlir/test/Target/LLVMIR/nvvm/elect.mlir new file mode 100644 index 0000000000000..3c5cac4b650bb --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/elect.mlir @@ -0,0 +1,20 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// CHECK-LABEL: @test_nvvm_elect_sync +llvm.func @test_nvvm_elect_sync() -> i1 { + // CHECK: %[[RES:.*]] = call { i32, i1 } @llvm.nvvm.elect.sync(i32 -1) + // CHECK-NEXT: %[[PRED:.*]] = extractvalue { i32, i1 } %[[RES]], 1 + // CHECK-NEXT: ret i1 %[[PRED]] + %0 = nvvm.elect.sync -> i1 + llvm.return %0 : i1 +} + +// CHECK-LABEL: @test_nvvm_elect_sync_mask +llvm.func @test_nvvm_elect_sync_mask(%mask : i32) -> i1 { + // CHECK: %[[RES:.*]] = call { i32, i1 } @llvm.nvvm.elect.sync(i32 %0) + // CHECK-NEXT: %[[PRED:.*]] = extractvalue { i32, i1 } %[[RES]], 1 + // CHECK-NEXT: ret i1 %[[PRED]] + %0 = nvvm.elect.sync %mask -> i1 + llvm.return %0 : i1 +} + diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 660d0a22dce9c..f86a04186f512 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -265,15 +265,6 @@ llvm.func @nvvm_vote(%0 : i32, %1 : i1) -> i32 { llvm.return %3 : i32 } -// CHECK-LABEL: @nvvm_elect_sync -llvm.func @nvvm_elect_sync() -> i1 { - // CHECK: %[[RES:.*]] = call { i32, i1 } @llvm.nvvm.elect.sync(i32 -1) - // CHECK-NEXT: %[[PRED:.*]] = extractvalue { i32, i1 } %[[RES]], 1 - // CHECK-NEXT: ret i1 %[[PRED]] - %0 = nvvm.elect.sync -> i1 - llvm.return %0 : i1 -} - // CHECK-LABEL: @nvvm_mma_mn8n8k4_row_col_f32_f32 llvm.func @nvvm_mma_mn8n8k4_row_col_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index 971bea2068544..e6ea3aaeec656 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -1,15 +1,17 @@ // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s -llvm.func @_QPopenmp_target_data() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%2 : !llvm.ptr) { - %3 = llvm.mlir.constant(99 : i32) : i32 - llvm.store %3, %1 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%2 : !llvm.ptr) { + %3 = llvm.mlir.constant(99 : i32) : i32 + llvm.store %3, %1 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4] @@ -38,23 +40,25 @@ llvm.func @_QPopenmp_target_data() { // ----- -llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { - %1 = llvm.mlir.constant(1023 : index) : i64 - %2 = llvm.mlir.constant(0 : index) : i64 - %3 = llvm.mlir.constant(1024 : index) : i64 - %4 = llvm.mlir.constant(1 : index) : i64 - %5 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%3 : i64) stride(%4 : i64) start_idx(%4 : i64) - %6 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%6 : !llvm.ptr) { - %7 = llvm.mlir.constant(99 : i32) : i32 - %8 = llvm.mlir.constant(1 : i64) : i64 - %9 = llvm.mlir.constant(1 : i64) : i64 - %10 = llvm.mlir.constant(0 : i64) : i64 - %11 = llvm.getelementptr %0[0, %10] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32> - llvm.store %7, %11 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { + %1 = llvm.mlir.constant(1023 : index) : i64 + %2 = llvm.mlir.constant(0 : index) : i64 + %3 = llvm.mlir.constant(1024 : index) : i64 + %4 = llvm.mlir.constant(1 : index) : i64 + %5 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%3 : i64) stride(%4 : i64) start_idx(%4 : i64) + %6 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%6 : !llvm.ptr) { + %7 = llvm.mlir.constant(99 : i32) : i32 + %8 = llvm.mlir.constant(1 : i64) : i64 + %9 = llvm.mlir.constant(1 : i64) : i64 + %10 = llvm.mlir.constant(0 : i64) : i64 + %11 = llvm.getelementptr %0[0, %10] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32> + llvm.store %7, %11 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4096] @@ -85,50 +89,52 @@ llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { // ----- -llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { - %4 = llvm.mlir.constant(1 : i64) : i64 - %5 = llvm.alloca %4 x i32 {bindc_name = "dvc", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_enter_exitEdvc"} : (i64) -> !llvm.ptr - %6 = llvm.mlir.constant(1 : i64) : i64 - %7 = llvm.alloca %6 x i32 {bindc_name = "i", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_enter_exitEi"} : (i64) -> !llvm.ptr - %8 = llvm.mlir.constant(5 : i32) : i32 - llvm.store %8, %7 : i32, !llvm.ptr - %9 = llvm.mlir.constant(2 : i32) : i32 - llvm.store %9, %5 : i32, !llvm.ptr - %10 = llvm.load %7 : !llvm.ptr -> i32 - %11 = llvm.mlir.constant(10 : i32) : i32 - %12 = llvm.icmp "slt" %10, %11 : i32 - %13 = llvm.load %5 : !llvm.ptr -> i32 - %14 = llvm.mlir.constant(1023 : index) : i64 - %15 = llvm.mlir.constant(0 : index) : i64 - %16 = llvm.mlir.constant(1024 : index) : i64 - %17 = llvm.mlir.constant(1 : index) : i64 - %18 = omp.map.bounds lower_bound(%15 : i64) upper_bound(%14 : i64) extent(%16 : i64) stride(%17 : i64) start_idx(%17 : i64) - %map1 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) bounds(%18) -> !llvm.ptr {name = ""} - %19 = llvm.mlir.constant(511 : index) : i64 - %20 = llvm.mlir.constant(0 : index) : i64 - %21 = llvm.mlir.constant(512 : index) : i64 - %22 = llvm.mlir.constant(1 : index) : i64 - %23 = omp.map.bounds lower_bound(%20 : i64) upper_bound(%19 : i64) extent(%21 : i64) stride(%22 : i64) start_idx(%22 : i64) - %map2 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%23) -> !llvm.ptr {name = ""} - omp.target_enter_data if(%12) device(%13 : i32) map_entries(%map1, %map2 : !llvm.ptr, !llvm.ptr) - %24 = llvm.load %7 : !llvm.ptr -> i32 - %25 = llvm.mlir.constant(10 : i32) : i32 - %26 = llvm.icmp "sgt" %24, %25 : i32 - %27 = llvm.load %5 : !llvm.ptr -> i32 - %28 = llvm.mlir.constant(1023 : index) : i64 - %29 = llvm.mlir.constant(0 : index) : i64 - %30 = llvm.mlir.constant(1024 : index) : i64 - %31 = llvm.mlir.constant(1 : index) : i64 - %32 = omp.map.bounds lower_bound(%29 : i64) upper_bound(%28 : i64) extent(%30 : i64) stride(%31 : i64) start_idx(%31 : i64) - %map3 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%32) -> !llvm.ptr {name = ""} - %33 = llvm.mlir.constant(511 : index) : i64 - %34 = llvm.mlir.constant(0 : index) : i64 - %35 = llvm.mlir.constant(512 : index) : i64 - %36 = llvm.mlir.constant(1 : index) : i64 - %37 = omp.map.bounds lower_bound(%34 : i64) upper_bound(%33 : i64) extent(%35 : i64) stride(%36 : i64) start_idx(%36 : i64) - %map4 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%37) -> !llvm.ptr {name = ""} - omp.target_exit_data if(%26) device(%27 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr) - llvm.return +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { + %4 = llvm.mlir.constant(1 : i64) : i64 + %5 = llvm.alloca %4 x i32 {bindc_name = "dvc", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_enter_exitEdvc"} : (i64) -> !llvm.ptr + %6 = llvm.mlir.constant(1 : i64) : i64 + %7 = llvm.alloca %6 x i32 {bindc_name = "i", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_enter_exitEi"} : (i64) -> !llvm.ptr + %8 = llvm.mlir.constant(5 : i32) : i32 + llvm.store %8, %7 : i32, !llvm.ptr + %9 = llvm.mlir.constant(2 : i32) : i32 + llvm.store %9, %5 : i32, !llvm.ptr + %10 = llvm.load %7 : !llvm.ptr -> i32 + %11 = llvm.mlir.constant(10 : i32) : i32 + %12 = llvm.icmp "slt" %10, %11 : i32 + %13 = llvm.load %5 : !llvm.ptr -> i32 + %14 = llvm.mlir.constant(1023 : index) : i64 + %15 = llvm.mlir.constant(0 : index) : i64 + %16 = llvm.mlir.constant(1024 : index) : i64 + %17 = llvm.mlir.constant(1 : index) : i64 + %18 = omp.map.bounds lower_bound(%15 : i64) upper_bound(%14 : i64) extent(%16 : i64) stride(%17 : i64) start_idx(%17 : i64) + %map1 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) bounds(%18) -> !llvm.ptr {name = ""} + %19 = llvm.mlir.constant(511 : index) : i64 + %20 = llvm.mlir.constant(0 : index) : i64 + %21 = llvm.mlir.constant(512 : index) : i64 + %22 = llvm.mlir.constant(1 : index) : i64 + %23 = omp.map.bounds lower_bound(%20 : i64) upper_bound(%19 : i64) extent(%21 : i64) stride(%22 : i64) start_idx(%22 : i64) + %map2 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%23) -> !llvm.ptr {name = ""} + omp.target_enter_data if(%12) device(%13 : i32) map_entries(%map1, %map2 : !llvm.ptr, !llvm.ptr) + %24 = llvm.load %7 : !llvm.ptr -> i32 + %25 = llvm.mlir.constant(10 : i32) : i32 + %26 = llvm.icmp "sgt" %24, %25 : i32 + %27 = llvm.load %5 : !llvm.ptr -> i32 + %28 = llvm.mlir.constant(1023 : index) : i64 + %29 = llvm.mlir.constant(0 : index) : i64 + %30 = llvm.mlir.constant(1024 : index) : i64 + %31 = llvm.mlir.constant(1 : index) : i64 + %32 = omp.map.bounds lower_bound(%29 : i64) upper_bound(%28 : i64) extent(%30 : i64) stride(%31 : i64) start_idx(%31 : i64) + %map3 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%32) -> !llvm.ptr {name = ""} + %33 = llvm.mlir.constant(511 : index) : i64 + %34 = llvm.mlir.constant(0 : index) : i64 + %35 = llvm.mlir.constant(512 : index) : i64 + %36 = llvm.mlir.constant(1 : index) : i64 + %37 = omp.map.bounds lower_bound(%34 : i64) upper_bound(%33 : i64) extent(%35 : i64) stride(%36 : i64) start_idx(%36 : i64) + %map4 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%37) -> !llvm.ptr {name = ""} + omp.target_exit_data if(%26) device(%27 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr) + llvm.return + } } // CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 4096, i64 2048] @@ -205,18 +211,20 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { // ----- -llvm.func @_QPopenmp_target_use_dev_ptr() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %map1 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%map1 : !llvm.ptr) use_device_ptr(%map2 -> %arg0 : !llvm.ptr) { - %1 = llvm.mlir.constant(10 : i32) : i32 - %2 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr - llvm.store %1, %2 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_use_dev_ptr() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %map1 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} + %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%map1 : !llvm.ptr) use_device_ptr(%map2 -> %arg0 : !llvm.ptr) { + %1 = llvm.mlir.constant(10 : i32) : i32 + %2 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr + llvm.store %1, %2 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 8] @@ -249,18 +257,20 @@ llvm.func @_QPopenmp_target_use_dev_ptr() { // ----- -llvm.func @_QPopenmp_target_use_dev_addr() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %map = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { - %1 = llvm.mlir.constant(10 : i32) : i32 - %2 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr - llvm.store %1, %2 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_use_dev_addr() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %map = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} + %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { + %1 = llvm.mlir.constant(10 : i32) : i32 + %2 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr + llvm.store %1, %2 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 8] @@ -291,17 +301,19 @@ llvm.func @_QPopenmp_target_use_dev_addr() { // ----- -llvm.func @_QPopenmp_target_use_dev_addr_no_ptr() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %a = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr - %map = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { - %1 = llvm.mlir.constant(10 : i32) : i32 - llvm.store %1, %arg0 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_use_dev_addr_no_ptr() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %a = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr + %map = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + %map2 = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { + %1 = llvm.mlir.constant(10 : i32) : i32 + llvm.store %1, %arg0 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4] @@ -331,23 +343,25 @@ llvm.func @_QPopenmp_target_use_dev_addr_no_ptr() { // ----- -llvm.func @_QPopenmp_target_use_dev_addr_nomap() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %1 = llvm.mlir.constant(1 : i64) : i64 - %b = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %map = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { - %2 = llvm.mlir.constant(10 : i32) : i32 - %3 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr - llvm.store %2, %3 : i32, !llvm.ptr - %4 = llvm.mlir.constant(20 : i32) : i32 - %5 = llvm.load %b : !llvm.ptr -> !llvm.ptr - llvm.store %4, %5 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_use_dev_addr_nomap() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %1 = llvm.mlir.constant(1 : i64) : i64 + %b = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %map = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} + %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { + %2 = llvm.mlir.constant(10 : i32) : i32 + %3 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr + llvm.store %2, %3 : i32, !llvm.ptr + %4 = llvm.mlir.constant(20 : i32) : i32 + %5 = llvm.load %b : !llvm.ptr -> !llvm.ptr + llvm.store %4, %5 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 8, i64 0] @@ -387,25 +401,27 @@ llvm.func @_QPopenmp_target_use_dev_addr_nomap() { // ----- -llvm.func @_QPopenmp_target_use_dev_both() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %1 = llvm.mlir.constant(1 : i64) : i64 - %b = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %map = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - %map1 = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - %map3 = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%map, %map1 : !llvm.ptr, !llvm.ptr) use_device_addr(%map3 -> %arg0 : !llvm.ptr) use_device_ptr(%map2 -> %arg1 : !llvm.ptr) { - %2 = llvm.mlir.constant(10 : i32) : i32 - %3 = llvm.load %arg1 : !llvm.ptr -> !llvm.ptr - llvm.store %2, %3 : i32, !llvm.ptr - %4 = llvm.mlir.constant(20 : i32) : i32 - %5 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr - llvm.store %4, %5 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_use_dev_both() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %1 = llvm.mlir.constant(1 : i64) : i64 + %b = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %map = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + %map1 = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + %map3 = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%map, %map1 : !llvm.ptr, !llvm.ptr) use_device_addr(%map3 -> %arg0 : !llvm.ptr) use_device_ptr(%map2 -> %arg1 : !llvm.ptr) { + %2 = llvm.mlir.constant(10 : i32) : i32 + %3 = llvm.load %arg1 : !llvm.ptr -> !llvm.ptr + llvm.store %2, %3 : i32, !llvm.ptr + %4 = llvm.mlir.constant(20 : i32) : i32 + %5 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr + llvm.store %4, %5 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 8, i64 8] @@ -448,19 +464,21 @@ llvm.func @_QPopenmp_target_use_dev_both() { // ----- -llvm.func @_QPopenmp_target_data_update() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%2 : !llvm.ptr) { - %3 = llvm.mlir.constant(99 : i32) : i32 - llvm.store %3, %1 : i32, !llvm.ptr - omp.terminator - } +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data_update() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%2 : !llvm.ptr) { + %3 = llvm.mlir.constant(99 : i32) : i32 + llvm.store %3, %1 : i32, !llvm.ptr + omp.terminator + } - omp.target_update map_entries(%2 : !llvm.ptr) + omp.target_update map_entries(%2 : !llvm.ptr) - llvm.return + llvm.return + } } // CHECK-LABEL: define void @_QPopenmp_target_data_update @@ -488,26 +506,28 @@ llvm.func @_QPopenmp_target_data_update() { // ----- -omp.declare_mapper @_QQFmy_testmy_mapper : !llvm.struct<"_QFmy_testTmy_type", (i32)> { -^bb0(%arg0: !llvm.ptr): - %0 = llvm.mlir.constant(0 : i32) : i32 - %1 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)> - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var%data"} - %3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)>) map_clauses(tofrom) capture(ByRef) members(%2 : [0] : !llvm.ptr) -> !llvm.ptr {name = "var", partial_map = true} - omp.declare_mapper.info map_entries(%3, %2 : !llvm.ptr, !llvm.ptr) -} +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + omp.declare_mapper @_QQFmy_testmy_mapper : !llvm.struct<"_QFmy_testTmy_type", (i32)> { + ^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(0 : i32) : i32 + %1 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)> + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var%data"} + %3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)>) map_clauses(tofrom) capture(ByRef) members(%2 : [0] : !llvm.ptr) -> !llvm.ptr {name = "var", partial_map = true} + omp.declare_mapper.info map_entries(%3, %2 : !llvm.ptr, !llvm.ptr) + } -llvm.func @_QPopenmp_target_data_mapper() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x !llvm.struct<"_QFmy_testTmy_type", (i32)> {bindc_name = "a"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)>) map_clauses(tofrom) capture(ByRef) mapper(@_QQFmy_testmy_mapper) -> !llvm.ptr {name = "a"} - omp.target_data map_entries(%2 : !llvm.ptr) { - %3 = llvm.mlir.constant(10 : i32) : i32 - %4 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)> - llvm.store %3, %4 : i32, !llvm.ptr - omp.terminator + llvm.func @_QPopenmp_target_data_mapper() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x !llvm.struct<"_QFmy_testTmy_type", (i32)> {bindc_name = "a"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)>) map_clauses(tofrom) capture(ByRef) mapper(@_QQFmy_testmy_mapper) -> !llvm.ptr {name = "a"} + omp.target_data map_entries(%2 : !llvm.ptr) { + %3 = llvm.mlir.constant(10 : i32) : i32 + %4 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)> + llvm.store %3, %4 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4] diff --git a/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir b/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir index dba8c553aaca5..f5c620a8942d7 100644 --- a/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir @@ -1,13 +1,15 @@ // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s 2>&1 | FileCheck %s -llvm.func @_QPopenmp_target_data_enter() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data_enter() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_enter_data map_entries(%2 : !llvm.ptr) nowait + omp.target_enter_data map_entries(%2 : !llvm.ptr) nowait - llvm.return + llvm.return + } } // CHECK: define void @_QPopenmp_target_data_enter() { @@ -32,14 +34,16 @@ llvm.func @_QPopenmp_target_data_enter() { // ----- -llvm.func @_QPopenmp_target_data_update() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data_update() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_update map_entries(%2 : !llvm.ptr) nowait + omp.target_update map_entries(%2 : !llvm.ptr) nowait - llvm.return + llvm.return + } } // CHECK: define void @_QPopenmp_target_data_update() { @@ -64,14 +68,16 @@ llvm.func @_QPopenmp_target_data_update() { // ----- -llvm.func @_QPopenmp_target_data_exit() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data_exit() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_exit_data map_entries(%2 : !llvm.ptr) nowait + omp.target_exit_data map_entries(%2 : !llvm.ptr) nowait - llvm.return + llvm.return + } } // CHECK: define void @_QPopenmp_target_data_exit() { diff --git a/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir b/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir index 717a77e61b9a1..53c9b4f559645 100644 --- a/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir +++ b/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir @@ -3,7 +3,7 @@ // This tests checks that a target op inside a data op // We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash. // CHECK: {{.*}} = add i32 {{.*}}, 1 -module attributes { } { +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 llvm.func @_QQmain() attributes {fir.bindc_name = "main", omp.declare_target = #omp.declaretarget} { %0 = llvm.mlir.constant(99 : index) : i64 diff --git a/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir b/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir new file mode 100644 index 0000000000000..1589778e0627f --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir @@ -0,0 +1,62 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s +// This tests the fix for https://github.com/llvm/llvm-project/issues/138102 +// We are only interested in ensuring that the -mlir-to-llvmir pass doesn't crash + +// CHECK-LABEL: define internal void @_QQmain..omp_par + +omp.private {type = private} @_QFEi_private_i32 : i32 +omp.private {type = firstprivate} @_QFEc_firstprivate_i32 : i32 copy { +^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): +%0 = llvm.load %arg0 : !llvm.ptr -> i32 +llvm.store %0, %arg1 : i32, !llvm.ptr +omp.yield(%arg1 : !llvm.ptr) +} +llvm.func @_QQmain() { +%0 = llvm.mlir.constant(1 : i64) : i64 +%1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr +%2 = llvm.mlir.constant(1 : i64) : i64 +%3 = llvm.alloca %2 x i32 {bindc_name = "c"} : (i64) -> !llvm.ptr +%4 = llvm.mlir.constant(10 : index) : i64 +%5 = llvm.mlir.constant(0 : index) : i64 +%6 = llvm.mlir.constant(10000 : index) : i64 +%7 = llvm.mlir.constant(1 : index) : i64 +%8 = llvm.mlir.constant(1 : i64) : i64 +%9 = llvm.mlir.addressof @_QFECchunksz : !llvm.ptr +%10 = llvm.mlir.constant(1 : i64) : i64 +%11 = llvm.trunc %7 : i64 to i32 +llvm.br ^bb1(%11, %4 : i32, i64) +^bb1(%12: i32, %13: i64): // 2 preds: ^bb0, ^bb2 +%14 = llvm.icmp "sgt" %13, %5 : i64 +llvm.store %12, %3 : i32, !llvm.ptr +omp.task private(@_QFEc_firstprivate_i32 %3 -> %arg0 : !llvm.ptr) { + %19 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} + %20 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "c"} + %21 = omp.map.info var_ptr(%9 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "chunksz"} + omp.target map_entries(%19 -> %arg1, %20 -> %arg2, %21 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + %22 = llvm.mlir.constant(9999 : i32) : i32 + %23 = llvm.mlir.constant(1 : i32) : i32 + omp.parallel { + %24 = llvm.load %arg2 : !llvm.ptr -> i32 + %25 = llvm.add %24, %22 : i32 + omp.wsloop private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) { + omp.loop_nest (%arg5) : i32 = (%24) to (%25) inclusive step (%23) { + llvm.store %arg5, %arg4 : i32, !llvm.ptr + omp.yield + } + } + omp.terminator + } + omp.terminator + } + omp.terminator +} +llvm.return +} +llvm.mlir.global internal constant @_QFECchunksz() {addr_space = 0 : i32} : i32 { +%0 = llvm.mlir.constant(10000 : i32) : i32 +llvm.return %0 : i32 +} +llvm.mlir.global internal constant @_QFECn() {addr_space = 0 : i32} : i32 { +%0 = llvm.mlir.constant(100000 : i32) : i32 +llvm.return %0 : i32 +} diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp index 738648b8ccdcf..684d491b532f4 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -211,8 +211,8 @@ static void applyEraseUnnecessaryInputs(func::FuncOp funcOp) { static void applyWinogradConv2D(func::FuncOp funcOp) { RewritePatternSet patterns(funcOp.getContext()); - populateWinogradConv2DPatterns(patterns, /*m=*/4, /*r=*/3); - populateWinogradConv2DPatterns(patterns, /*m=*/2, /*r=*/5); + populateWinogradConv2DPatterns(patterns, WinogradConv2DFmr::F_4_3); + populateWinogradConv2DPatterns(patterns, WinogradConv2DFmr::F_2_5); (void)applyPatternsGreedily(funcOp, std::move(patterns)); } diff --git a/mlir/test/lib/Dialect/Test/TestEnumDefs.td b/mlir/test/lib/Dialect/Test/TestEnumDefs.td index 5b785a600aad2..10e424a0f2523 100644 --- a/mlir/test/lib/Dialect/Test/TestEnumDefs.td +++ b/mlir/test/lib/Dialect/Test/TestEnumDefs.td @@ -17,9 +17,13 @@ include "mlir/IR/EnumAttr.td" def I32Case5: I32EnumAttrCase<"case5", 5>; def I32Case10: I32EnumAttrCase<"case10", 10>; +def I32CaseSignedMaxPlusOne + : I32EnumAttrCase<"caseSignedMaxPlusOne", 2147483648>; +def I32CaseUnsignedMax : I32EnumAttrCase<"caseUnsignedMax", 4294967295>; -def SomeI32Enum: I32EnumAttr< - "SomeI32Enum", "", [I32Case5, I32Case10]>; +def SomeI32Enum : I32EnumAttr<"SomeI32Enum", "", + [I32Case5, I32Case10, I32CaseSignedMaxPlusOne, + I32CaseUnsignedMax]>; def I64Case5: I64EnumAttrCase<"case5", 5>; def I64Case10: I64EnumAttrCase<"case10", 10>; diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 9126736d1d175..6b22b171822ae 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -1014,7 +1014,7 @@ struct TestPassthroughInvalidOp : public ConversionPattern { .getResult()); } rewriter.replaceOpWithNewOp(op, TypeRange(), flattened, - std::nullopt); + ArrayRef()); return success(); } }; @@ -1030,7 +1030,7 @@ struct TestDropAndReplaceInvalidOp : public ConversionPattern { matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { rewriter.replaceOpWithNewOp(op, TypeRange(), ValueRange(), - std::nullopt); + ArrayRef()); return success(); } }; diff --git a/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.cpp b/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.cpp index 9971f0cde4ed2..ee3eb9522db7e 100644 --- a/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.cpp +++ b/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.cpp @@ -21,6 +21,9 @@ #include "mlir/IR/Dominance.h" #include "mlir/IR/OpImplementation.h" #include "mlir/Interfaces/TilingInterface.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "test-tiling-interface" #define GET_OP_CLASSES #include "TestTilingInterfaceTransformOps.h.inc" @@ -168,29 +171,30 @@ transform::TestFuseAndYieldOp::apply(TransformRewriter &rewriter, /// Apply fusing of consumer transformation to all payload ops and store both /// the original consumer operation as well as the fused consumer operation. -template static LogicalResult applyFuseConsumer( - RewriterBase &rewriter, Operation *transformOp, Range &&payloadOps, - MutableArrayRef loops, uint32_t numConsumerToFuse, - TransformResults &transformResults) { + RewriterBase &rewriter, Operation *transformOp, + ArrayRef slices, MutableArrayRef loops, + uint32_t numConsumerToFuse, TransformResults &transformResults) { SmallVector originalConsumerOps; SmallVector fusedConsumerOps; - for (Operation *target : payloadOps) { - rewriter.setInsertionPoint(target); + rewriter.setInsertionPoint(slices.front()); - while (numConsumerToFuse--) { - FailureOr fuseConsumerResults = - scf::tileAndFuseConsumerOfSlice(rewriter, target, loops); + while (numConsumerToFuse--) { + FailureOr fuseConsumerResults = + scf::tileAndFuseConsumerOfSlices(rewriter, slices, loops); - if (failed(fuseConsumerResults)) - return failure(); + if (failed(fuseConsumerResults)) + return slices.front()->emitOpError("failed to fuse consumer of slice"); - // Report back the relevant handles to the transform op. - originalConsumerOps.push_back( - fuseConsumerResults->origConsumerOperand->getOwner()); - fusedConsumerOps.push_back( - fuseConsumerResults->tiledAndFusedConsumerOperand->getOwner()); + // Report back the relevant handles to the transform op. + for (OpOperand *origConsumerOperand : + fuseConsumerResults->origConsumerOperands) { + originalConsumerOps.push_back(origConsumerOperand->getOwner()); + } + for (OpOperand *tiledAndFusedConsumerOperand : + fuseConsumerResults->tiledAndFusedConsumerOperands) { + fusedConsumerOps.push_back(tiledAndFusedConsumerOperand->getOwner()); } } @@ -203,6 +207,12 @@ DiagnosedSilenceableFailure transform::TestFuseConsumerOp::apply(TransformRewriter &rewriter, TransformResults &transformResults, TransformState &state) { + SmallVector slices; + for (auto op : getTargets()) { + auto sliceOp = *state.getPayloadOps(op).begin(); + slices.push_back(sliceOp); + } + SmallVector loops; for (auto op : llvm::reverse(getLoops())) { auto loopLikeOp = @@ -212,16 +222,16 @@ transform::TestFuseConsumerOp::apply(TransformRewriter &rewriter, } loops.push_back(loopLikeOp); } - LogicalResult result = applyFuseConsumer( - rewriter, getOperation(), state.getPayloadOps(getTarget()), loops, - getNumConsumerToFuse(), transformResults); + LogicalResult result = + applyFuseConsumer(rewriter, getOperation(), slices, loops, + getNumConsumerToFuse(), transformResults); return failed(result) ? DiagnosedSilenceableFailure::definiteFailure() : DiagnosedSilenceableFailure::success(); } void transform::TestFuseConsumerOp::getEffects( SmallVectorImpl &effects) { - consumesHandle(getTargetMutable(), effects); + consumesHandle(getTargetsMutable(), effects); consumesHandle(getLoopsMutable(), effects); producesHandle(getOperation()->getOpResults(), effects); modifiesPayload(effects); diff --git a/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.td b/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.td index 98f7145c99cb1..3c09082e192ea 100644 --- a/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.td +++ b/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.td @@ -50,7 +50,8 @@ def TestFuseAndYieldOp : Op, + [AttrSizedOperandSegments, + DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, ReportTrackingListenerFailuresOpTrait]> { let description = [{ @@ -59,14 +60,14 @@ def TestFuseConsumerOp : Op:$targets, Variadic:$loops, DefaultValuedAttr:$num_consumer_to_fuse); let results = (outs TransformHandleTypeInterface:$consumer, TransformHandleTypeInterface:$fused_consumer); let assemblyFormat = [{ - $target `in` `(` $loops `)` + $targets `in` `(` $loops `)` (`num_consumer_to_fuse` `=` $num_consumer_to_fuse^)? attr-dict `:` functional-type(operands, results) }]; diff --git a/mlir/test/python/dialects/transform_debug_ext.py b/mlir/test/python/dialects/transform_debug_ext.py new file mode 100644 index 0000000000000..2dfdaed343865 --- /dev/null +++ b/mlir/test/python/dialects/transform_debug_ext.py @@ -0,0 +1,45 @@ +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +from mlir.dialects import transform +from mlir.dialects.transform import debug + + +def run(f): + print("\nTEST:", f.__name__) + with Context(), Location.unknown(): + module = Module.create() + with InsertionPoint(module.body): + sequence = transform.SequenceOp( + transform.FailurePropagationMode.Propagate, + [], + transform.AnyOpType.get(), + ) + with InsertionPoint(sequence.body): + f(sequence.bodyTarget) + transform.YieldOp() + print(module) + return f + + +@run +def testDebugEmitParamAsRemark(target): + i0 = IntegerAttr.get(IntegerType.get_signless(32), 0) + i0_param = transform.ParamConstantOp(transform.AnyParamType.get(), i0) + debug.emit_param_as_remark(i0_param) + debug.emit_param_as_remark(i0_param, anchor=target, message="some text") + # CHECK-LABEL: TEST: testDebugEmitParamAsRemark + # CHECK: ^{{.*}}(%[[ARG0:.+]]: !transform.any_op): + # CHECK: %[[PARAM:.*]] = transform.param.constant + # CHECK: transform.debug.emit_param_as_remark %[[PARAM]] + # CHECK: transform.debug.emit_param_as_remark %[[PARAM]] + # CHECK-SAME: "some text" + # CHECK-SAME: at %[[ARG0]] + + +@run +def testDebugEmitRemarkAtOp(target): + debug.emit_remark_at(target, "some text") + # CHECK-LABEL: TEST: testDebugEmitRemarkAtOp + # CHECK: ^{{.*}}(%[[ARG0:.+]]: !transform.any_op): + # CHECK: transform.debug.emit_remark_at %[[ARG0]], "some text" diff --git a/mlir/tools/mlir-tblgen/EnumsGen.cpp b/mlir/tools/mlir-tblgen/EnumsGen.cpp index 9941a203bc5cb..06dc588f90203 100644 --- a/mlir/tools/mlir-tblgen/EnumsGen.cpp +++ b/mlir/tools/mlir-tblgen/EnumsGen.cpp @@ -648,8 +648,10 @@ static void emitSpecializedAttrDef(const Record &enumDef, raw_ostream &os) { os << formatv("{0} {1}::getValue() const {{\n", enumName, attrClassName); - os << formatv(" return static_cast<{0}>(::mlir::IntegerAttr::getInt());\n", - enumName); + os << formatv( + " return " + "static_cast<{0}>(::mlir::IntegerAttr::getValue().getZExtValue());\n", + enumName); os << "}\n"; } diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td index 8c88fe6e21e6a..0476fa1f7c27a 100644 --- a/offload/liboffload/API/Program.td +++ b/offload/liboffload/API/Program.td @@ -13,7 +13,9 @@ def : Function { let name = "olCreateProgram"; let desc = "Create a program for the device from the binary image pointed to by `ProgData`."; - let details = []; + let details = [ + "The provided `ProgData` will be copied and need not outlive the returned handle", + ]; let params = [ Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>, Param<"const void*", "ProgData", "pointer to the program binary data", PARAM_IN>, diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index da2101529ffec..c2a35a245e2a7 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -480,6 +480,14 @@ Error olCreateProgram_impl(ol_device_handle_t Device, const void *ProgData, } Error olDestroyProgram_impl(ol_program_handle_t Program) { + auto &Device = Program->Image->getDevice(); + if (auto Err = Device.unloadBinary(Program->Image)) + return Err; + + auto &LoadedImages = Device.LoadedImages; + LoadedImages.erase( + std::find(LoadedImages.begin(), LoadedImages.end(), Program->Image)); + return olDestroy(Program); } diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 73e1e66928fac..bc1a768feafdd 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2023,6 +2023,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Plugin::success(); } + Error unloadBinaryImpl(DeviceImageTy *Image) override { + AMDGPUDeviceImageTy &AMDImage = static_cast(*Image); + + // Unload the executable of the image. + return AMDImage.unloadExecutable(); + } + /// Deinitialize the device and release its resources. Error deinitImpl() override { // Deinitialize the stream and event pools. @@ -2035,19 +2042,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { if (auto Err = AMDGPUSignalManager.deinit()) return Err; - // Close modules if necessary. - if (!LoadedImages.empty()) { - // Each image has its own module. - for (DeviceImageTy *Image : LoadedImages) { - AMDGPUDeviceImageTy &AMDImage = - static_cast(*Image); - - // Unload the executable of the image. - if (auto Err = AMDImage.unloadExecutable()) - return Err; - } - } - // Invalidate agent reference. Agent = {0}; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 91df800304378..fbc798faec24b 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -752,6 +752,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy { virtual Expected loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0; + /// Unload a previously loaded Image from the device + Error unloadBinary(DeviceImageTy *Image); + virtual Error unloadBinaryImpl(DeviceImageTy *Image) = 0; + /// Setup the device environment if needed. Notice this setup may not be run /// on some plugins. By default, it will be executed, but plugins can change /// this behavior by overriding the shouldSetupDeviceEnvironment function. @@ -1036,6 +1040,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy { BoolEnvar OMPX_TrackAllocationTraces = BoolEnvar("OFFLOAD_TRACK_ALLOCATION_TRACES", false); + /// Array of images loaded into the device. Images are automatically + /// deallocated by the allocator. + llvm::SmallVector LoadedImages; + private: /// Get and set the stack size and heap size for the device. If not used, the /// plugin can implement the setters as no-op and setting the output @@ -1086,10 +1094,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy { UInt32Envar OMPX_InitialNumStreams; UInt32Envar OMPX_InitialNumEvents; - /// Array of images loaded into the device. Images are automatically - /// deallocated by the allocator. - llvm::SmallVector LoadedImages; - /// The identifier of the device within the plugin. Notice this is not a /// global device id and is not the device id visible to the OpenMP user. const int32_t DeviceId; diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 3e9a62f57095f..ac7031b6e881c 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -821,26 +821,49 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) { return Plugin::success(); } -Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { - for (DeviceImageTy *Image : LoadedImages) - if (auto Err = callGlobalDestructors(Plugin, *Image)) - return Err; +Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) { + if (auto Err = callGlobalDestructors(Plugin, *Image)) + return Err; if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) { GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); - for (auto *Image : LoadedImages) { - DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0}; - GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker", - sizeof(DeviceMemoryPoolTrackingTy), - &ImageDeviceMemoryPoolTracking); - if (auto Err = - GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) { - consumeError(std::move(Err)); - continue; - } - DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking); + DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0}; + GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker", + sizeof(DeviceMemoryPoolTrackingTy), + &ImageDeviceMemoryPoolTracking); + if (auto Err = + GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) { + consumeError(std::move(Err)); } + DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking); + } + + GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); + auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image); + if (!ProfOrErr) + return ProfOrErr.takeError(); + + if (!ProfOrErr->empty()) { + // Dump out profdata + if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) == + uint32_t(DeviceDebugKind::PGODump)) + ProfOrErr->dump(); + + // Write data to profiling file + if (auto Err = ProfOrErr->write()) + return Err; + } + return unloadBinaryImpl(Image); +} + +Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { + for (auto &I : LoadedImages) + if (auto Err = unloadBinary(I)) + return Err; + LoadedImages.clear(); + + if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) { // TODO: Write this by default into a file. printf("\n\n|-----------------------\n" "| Device memory tracker:\n" @@ -856,25 +879,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { DeviceMemoryPoolTracking.AllocationMax); } - for (auto *Image : LoadedImages) { - GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); - auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image); - if (!ProfOrErr) - return ProfOrErr.takeError(); - - if (ProfOrErr->empty()) - continue; - - // Dump out profdata - if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) == - uint32_t(DeviceDebugKind::PGODump)) - ProfOrErr->dump(); - - // Write data to profiling file - if (auto Err = ProfOrErr->write()) - return Err; - } - // Delete the memory manager before deinitializing the device. Otherwise, // we may delete device allocations after the device is deinitialized. if (MemoryManager) diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index 9943f533ef5a8..0e662b038c363 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -358,6 +358,19 @@ struct CUDADeviceTy : public GenericDeviceTy { return Plugin::success(); } + Error unloadBinaryImpl(DeviceImageTy *Image) override { + assert(Context && "Invalid CUDA context"); + + // Each image has its own module. + CUDADeviceImageTy &CUDAImage = static_cast(*Image); + + // Unload the module of the image. + if (auto Err = CUDAImage.unloadModule()) + return Err; + + return Plugin::success(); + } + /// Deinitialize the device and release its resources. Error deinitImpl() override { if (Context) { @@ -372,20 +385,6 @@ struct CUDADeviceTy : public GenericDeviceTy { if (auto Err = CUDAEventManager.deinit()) return Err; - // Close modules if necessary. - if (!LoadedImages.empty()) { - assert(Context && "Invalid CUDA context"); - - // Each image has its own module. - for (DeviceImageTy *Image : LoadedImages) { - CUDADeviceImageTy &CUDAImage = static_cast(*Image); - - // Unload the module of the image. - if (auto Err = CUDAImage.unloadModule()) - return Err; - } - } - if (Context) { CUresult Res = cuDevicePrimaryCtxRelease(Device); if (auto Err = diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index ced9208acaedc..a35910aece986 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -147,6 +147,12 @@ struct GenELF64DeviceTy : public GenericDeviceTy { /// Initialize the device, which is a no-op Error initImpl(GenericPluginTy &Plugin) override { return Plugin::success(); } + /// Unload the binary image + /// + /// TODO: This currently does nothing, and should be implemented as part of + /// broader memory handling logic for this plugin + Error unloadBinaryImpl(DeviceImageTy *) override { return Plugin::success(); } + /// Deinitialize the device, which is a no-op Error deinitImpl() override { return Plugin::success(); } diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp index 801cd06c95502..051f88c5a0996 100644 --- a/openmp/runtime/src/kmp_alloc.cpp +++ b/openmp/runtime/src/kmp_alloc.cpp @@ -70,10 +70,10 @@ static void bectl(kmp_info_t *th, bget_compact_t compact, /* Buffer allocation size quantum: all buffers allocated are a multiple of this size. This MUST be a power of two. */ -/* On IA-32 architecture with Linux* OS, malloc() does not - ensure 16 byte alignment */ +/* On some architectures, malloc() does not ensure 16 byte alignment, + Solaris/sparc and x86 among them. */ -#if KMP_ARCH_X86 || !KMP_HAVE_QUAD +#if KMP_ARCH_X86 || KMP_ARCH_SPARC || !KMP_HAVE_QUAD #define SizeQuant 8 #define AlignType double @@ -1861,7 +1861,7 @@ typedef struct kmp_mem_desc { // Memory block descriptor void *ptr_align; // Pointer to aligned memory, returned kmp_allocator_t *allocator; // allocator } kmp_mem_desc_t; -static int alignment = sizeof(void *); // align to pointer size by default +constexpr size_t alignment = SizeQuant; // external interfaces are wrappers over internal implementation void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) { diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index fdbf9ff45e354..3ca32ba583fe2 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -570,7 +570,7 @@ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, this_thr->th.th_teams_microtask = NULL; this_thr->th.th_teams_level = 0; - *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; + memset(&this_thr->th.th_teams_size, 0, sizeof(kmp_teams_size_t)); va_end(ap); #if KMP_STATS_ENABLED if (previous_state == stats_state_e::SERIAL_REGION) { diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index 0ad14f862bcb9..11fa233c4bd27 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -3242,6 +3242,8 @@ static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { kmp_uint32 gtid = __kmp_entry_gtid(); kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); + if (l == nullptr) + return; // avoid segv if lock already destroyed KMP_I_LOCK_FUNC(l, destroy)(l->lock); kmp_indirect_locktag_t tag = l->type; diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d85a29423540..d7bc4922d54f7 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1528,7 +1528,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, // Calculate shared structure offset including padding after kmp_task_t struct // to align pointers in shared struct shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t; - shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *)); + shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(kmp_uint64)); // Allocate a kmp_taskdata_t block and a kmp_task_t block. KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid, diff --git a/openmp/runtime/test/ompt/misc/lock_double_destroy.cpp b/openmp/runtime/test/ompt/misc/lock_double_destroy.cpp new file mode 100644 index 0000000000000..bbdf348e97e7c --- /dev/null +++ b/openmp/runtime/test/ompt/misc/lock_double_destroy.cpp @@ -0,0 +1,40 @@ +// RUN: %libomp-cxx-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include "omp_testsuite.h" + +// tests that the destructor doesn't segv even though +// ompt_finalize_tool() destroys the lock +struct myLock { + omp_lock_t lock; + myLock() { omp_init_lock(&lock); } + ~myLock() { omp_destroy_lock(&lock); } +}; + +myLock lock; + +int main() { + go_parallel_nthreads(2); + + printf("Before ompt_finalize_tool\n"); + ompt_finalize_tool(); + printf("After ompt_finalize_tool\n"); + + return get_exit_value(); +} + +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] +// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_initial=1 + +// CHECK: {{^}}[[THREAD_ID]]: ompt_event_init_lock + +// CHECK: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin +// CHECK: {{^}}[[THREAD_ID]]: ompt_event_parallel_end + +// CHECK: {{^}}Before ompt_finalize_tool + +// CHECK: {{^}}[[THREAD_ID]]: ompt_event_thread_end: thread_id=[[THREAD_ID]] +// CHECK: 0: ompt_event_runtime_shutdown + +// CHECK: {{^}}After ompt_finalize_tool diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 02cc04fa4f7b6..6f65cfca32943 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -700,6 +700,7 @@ cc_library( ":diagnostic_defs_gen", ":sema_attr_gen", ":support", + "//llvm:BinaryFormat", "//llvm:Core", "//llvm:FrontendDebug", "//llvm:FrontendDriver", diff --git a/utils/bazel/llvm-project-overlay/llvm/config.bzl b/utils/bazel/llvm-project-overlay/llvm/config.bzl index 7cb4b7e9ffe75..ba9db05c651a7 100644 --- a/utils/bazel/llvm-project-overlay/llvm/config.bzl +++ b/utils/bazel/llvm-project-overlay/llvm/config.bzl @@ -32,6 +32,7 @@ posix_defines = [ "BACKTRACE_HEADER=", r'LTDL_SHLIB_EXT=\".so\"', r'LLVM_PLUGIN_EXT=\".so\"', + "LLVM_ENABLE_LLVM_EXPORT_ANNOTATIONS=1", "LLVM_ENABLE_PLUGINS=1", "LLVM_ENABLE_THREADS=1", "HAVE_DEREGISTER_FRAME=1", diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h index 8a9c74d67b124..31d0dc57a7180 100644 --- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h +++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/llvm-config.h @@ -114,6 +114,9 @@ /* Define if building LLVM with BUILD_SHARED_LIBS */ /* #undef LLVM_BUILD_SHARED_LIBS */ +/* Define if exporting LLVM public interface for shared library */ +/* LLVM_ENABLE_LLVM_EXPORT_ANNOTATIONS defined in Bazel */ + /* Define if building LLVM with LLVM_FORCE_USE_OLD_TOOLCHAIN_LIBS */ /* #undef LLVM_FORCE_USE_OLD_TOOLCHAIN ${LLVM_FORCE_USE_OLD_TOOLCHAIN} */ diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index a0b72b9709695..41720f132a9dd 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -10168,6 +10168,7 @@ td_library( ]), includes = ["include"], deps = [ + ":LinalgOpsTdFiles", ":SCFTdFiles", ":TransformDialectTdFiles", ], diff --git a/utils/bazel/llvm_configs/llvm-config.h.cmake b/utils/bazel/llvm_configs/llvm-config.h.cmake index a0ad517a6ecf4..0d8db0b6b52f1 100644 --- a/utils/bazel/llvm_configs/llvm-config.h.cmake +++ b/utils/bazel/llvm_configs/llvm-config.h.cmake @@ -110,6 +110,9 @@ /* Define if building LLVM with BUILD_SHARED_LIBS */ #cmakedefine LLVM_BUILD_SHARED_LIBS +/* Define if exporting LLVM public interface for shared library */ +#cmakedefine LLVM_ENABLE_LLVM_EXPORT_ANNOTATIONS + /* Define if building LLVM with LLVM_FORCE_USE_OLD_TOOLCHAIN_LIBS */ #cmakedefine LLVM_FORCE_USE_OLD_TOOLCHAIN ${LLVM_FORCE_USE_OLD_TOOLCHAIN}