From c000e2e144efd5d20e9718fb1f6eab5cb289ea5b Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Wed, 13 Aug 2025 13:19:04 +0100 Subject: [PATCH 01/13] [CHERI] Introduce RVY Extensions --- llvm/lib/Support/RISCVISAInfo.cpp | 8 +++++++- llvm/lib/Target/RISCV/RISCVFeatures.td | 24 ++++++++++++++++++++++++ llvm/test/MC/RISCV/invalid-attribute.s | 4 ++-- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 5e4af8a8cbc25..197d51460d889 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -42,7 +42,7 @@ struct RISCVSupportedExtension { } // end anonymous namespace -static constexpr StringLiteral AllStdExts = "mafdqlcbkjtpvnh"; +static constexpr StringLiteral AllStdExts = "mafdqlcbkjtpvnhy"; static const char *RISCVGImplications[] = { "i", "m", "a", "f", "d", "zicsr", "zifencei" @@ -85,6 +85,8 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"xtheadvdot", RISCVExtensionVersion{1, 0}}, {"xventanacondops", RISCVExtensionVersion{1, 0}}, + {"y", RISCVExtensionVersion{0, 9}}, + {"zawrs", RISCVExtensionVersion{1, 0}}, {"zba", RISCVExtensionVersion{1, 0}}, @@ -154,6 +156,8 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zvl64b", RISCVExtensionVersion{1, 0}}, {"zvl65536b", RISCVExtensionVersion{1, 0}}, {"zvl8192b", RISCVExtensionVersion{1, 0}}, + + {"zyhybrid", RISCVExtensionVersion{0, 9}}, }; // NOTE: This table should be sorted alphabetically by extension name. @@ -1004,6 +1008,7 @@ static const char *ImpliedExtsZvl512b[] = {"zvl256b"}; static const char *ImpliedExtsZvl64b[] = {"zvl32b"}; static const char *ImpliedExtsZvl65536b[] = {"zvl32768b"}; static const char *ImpliedExtsZvl8192b[] = {"zvl4096b"}; +static const char *ImpliedExtsZYHybrid[] = {"y"}; struct ImpliedExtsEntry { StringLiteral Name; @@ -1069,6 +1074,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"zvl64b"}, {ImpliedExtsZvl64b}}, {{"zvl65536b"}, {ImpliedExtsZvl65536b}}, {{"zvl8192b"}, {ImpliedExtsZvl8192b}}, + {{"zyhybrid"}, {ImpliedExtsZYHybrid}}, }; void RISCVISAInfo::updateImplication() { diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index feb285e040a00..e6ec1c4248e3c 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -829,6 +829,30 @@ def HasCheriRVC : Predicate<"Subtarget->enableCheriRVCInstrs()">, AssemblerPredicate<(all_of (not FeatureCheriNoRVC)), "CHERI RVC Instructions">; +// RVY Features // +def FeatureStdExtY : SubtargetFeature<"y", "HasStdExtY", "true", + "'y' (CHERI aware Instructions)", []>; + +def HasStdExtY: Predicate<"Subtarget->hasStdExtY()">, + AssemblerPredicate<(all_of FeatureStdExtY), + "'y' (CHERI aware Instructions)">; + +def HasCheriOrRVY + : Predicate<"Subtarget->hasCheri() || Subtarget->hasStdExtY()">, + AssemblerPredicate<(any_of FeatureCheri, FeatureStdExtY), + "'y' (CHERI aware Instructions) or" + "'xcheri' Implements CHERI extension">; + +def FeatureStdExtZYHybrid + : SubtargetFeature<"zyhybrid", "HasStdExtZYHybrid", "true", + "'zyhybrid' (Backwards compatiblity for 'y' with RISCV)", + [FeatureStdExtY]>; + +def HasStdExtZYHybrid + : Predicate<"Subtarget->hasStdExtZYHybrid()">, + AssemblerPredicate<(all_of FeatureStdExtZYHybrid), + "'zyhybrid' (Backwards compatiblity for 'y' with RISCV)">; + def FeatureCapMode : SubtargetFeature<"cap-mode", "IsCapMode", "true", "Capability mode">; diff --git a/llvm/test/MC/RISCV/invalid-attribute.s b/llvm/test/MC/RISCV/invalid-attribute.s index 1d732af83cda3..700212e63925c 100644 --- a/llvm/test/MC/RISCV/invalid-attribute.s +++ b/llvm/test/MC/RISCV/invalid-attribute.s @@ -13,8 +13,8 @@ .attribute arch, "foo" # CHECK: [[@LINE-1]]:18: error: invalid arch name 'foo', string must begin with rv32{i,e,g} or rv64{i,e,g} -.attribute arch, "rv32i2p1_y2p0" -# CHECK: [[@LINE-1]]:18: error: invalid arch name 'rv32i2p1_y2p0', invalid standard user-level extension 'y' +.attribute arch, "rv32i2p1_o2p0" +# CHECK: [[@LINE-1]]:18: error: invalid arch name 'rv32i2p1_o2p0', invalid standard user-level extension 'o' .attribute stack_align, "16" # CHECK: [[@LINE-1]]:25: error: expected numeric constant From dd1f94da9fa836d015daf4ca3fe13df46be6f6b3 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Wed, 13 Aug 2025 15:48:50 +0200 Subject: [PATCH 02/13] [RISCV] Configure clang for rvy extension --- clang/lib/Basic/Targets/RISCV.cpp | 60 ++++++++++++++----- clang/lib/Driver/ToolChains/Arch/RISCV.cpp | 5 +- .../RISCV/MCTargetDesc/RISCVBaseInfo.cpp | 3 +- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 2 +- 4 files changed, 50 insertions(+), 20 deletions(-) diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index add1f0e4b3849..d2de9a794ea73 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -223,21 +223,44 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, } // Macros for use with the set and get permissions builtins. - Builder.defineMacro("__CHERI_CAP_PERMISSION_GLOBAL__", Twine(1<<0)); - Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_EXECUTE__", - Twine(1<<1)); - Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_LOAD__", Twine(1<<2)); - Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_STORE__", Twine(1<<3)); - Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_LOAD_CAPABILITY__", - Twine(1<<4)); - Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_STORE_CAPABILITY__", - Twine(1<<5)); - Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_STORE_LOCAL__", - Twine(1<<6)); - Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_SEAL__", Twine(1<<7)); - Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_INVOKE__", Twine(1<<8)); - Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_UNSEAL__", Twine(1<<9)); - Builder.defineMacro("__CHERI_CAP_PERMISSION_ACCESS_SYSTEM_REGISTERS__", Twine(1<<10)); + if (ISAInfo->hasExtension("y")) { + Builder.defineMacro("__CHERI_CAP_PERMISSION_WRITE__", Twine(1 << 0)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_LOAD_MUTABLE__", + Twine(1 << 1)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_LOAD_GLOBAL__", + Twine(1 << 2)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_STORE_LOCAL__", + Twine(1 << 3)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_CAPABILITY_GLOBAL__", + Twine(1 << 4)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_CAPABILITY__", Twine(1 << 5)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_ACCESS_SYSTEM_REGISTERS__", + Twine(1 << 16)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_EXECUTE__", Twine(1 << 17)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_READ__", Twine(1 << 18)); + } else { + Builder.defineMacro("__CHERI_CAP_PERMISSION_GLOBAL__", Twine(1 << 0)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_EXECUTE__", + Twine(1 << 1)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_LOAD__", + Twine(1 << 2)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_STORE__", + Twine(1 << 3)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_LOAD_CAPABILITY__", + Twine(1 << 4)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_STORE_CAPABILITY__", + Twine(1 << 5)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_STORE_LOCAL__", + Twine(1 << 6)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_SEAL__", + Twine(1 << 7)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_INVOKE__", + Twine(1 << 8)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_PERMIT_UNSEAL__", + Twine(1 << 9)); + Builder.defineMacro("__CHERI_CAP_PERMISSION_ACCESS_SYSTEM_REGISTERS__", + Twine(1 << 10)); + } Builder.defineMacro("__riscv_clen", Twine(getCHERICapabilityWidth())); // TODO: _MIPS_CAP_ALIGN_MASK equivalent? @@ -249,6 +272,11 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, // Defines to allow software to detect a ISAv9 compiler vs. an older v8 one. Builder.defineMacro("__riscv_xcheri_tag_clear"); Builder.defineMacro("__riscv_xcheri_no_relocation"); + // Define macros for compatibility. + if (ISAInfo->hasExtension("y")) + Builder.defineMacro("__riscv_zcheripurecap", "9000"); + if (ISAInfo->hasExtension("zyhybrid")) + Builder.defineMacro("__riscv_zcherihybrid", "9000"); } if (ISAInfo->hasExtension("zve32x")) { @@ -372,7 +400,7 @@ bool RISCVTargetInfo::handleTargetFeatures(std::vector &Features, } else { ISAInfo = std::move(*ParseResult); } - if (ISAInfo->hasExtension("xcheri")) { + if (ISAInfo->hasExtension("xcheri") || ISAInfo->hasExtension("y")) { HasCheri = true; CapSize = XLen * 2; } diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index 40f72f96b0dce..5d269b8b6a073 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -169,7 +169,7 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple, if (Args.hasArg(options::OPT_ffixed_x31)) Features.push_back("+reserve-x31"); - bool IsCheri = ISAInfo->hasExtension("xcheri"); + bool IsCheri = ISAInfo->hasExtension("xcheri") || ISAInfo->hasExtension("y"); // -mrelax is default, unless -mno-relax is specified. // For CHERI it's currently not supported, so forbid enabling it and disable @@ -208,7 +208,8 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple, if (!IsCheri) { D.Diag(diag::err_riscv_invalid_abi) << A->getValue() - << "pure capability ABI requires xcheri extension to be specified"; + << "pure capability ABI requires xcheri or y extension to be " + "specified"; return; } Features.push_back("+cap-mode"); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 0a587504e6b68..d6bac89a838b8 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -64,7 +64,8 @@ ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits, "target-abi)\n"; TargetABI = ABI_Unknown; } else if ((ABIName.startswith("il32pc") || ABIName.startswith("l64pc")) && - !FeatureBits[RISCV::FeatureCheri]) { + !(FeatureBits[RISCV::FeatureCheri] || + FeatureBits[RISCV::FeatureStdExtY])) { errs() << "Pure-capability ABI can't be used for a target that " "doesn't support the XCheri instruction set extension (ignoring " "target-abi)\n"; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 91e897d34c98d..efe0e2a1deb02 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -112,7 +112,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef FS, if (!FS.empty()) llvm::append_range(Features, llvm::split(FS, ',')); auto ISAInfo = cantFail(llvm::RISCVISAInfo::parseFeatures(XLen, Features)); - if (ISAInfo->hasExtension("xcheri")) { + if (ISAInfo->hasExtension("xcheri") || ISAInfo->hasExtension("y")) { if (TT.isArch64Bit()) CapTypes = "-pf200:128:128:128:64"; else From 5f25f3131733e3078ef2b27d8da213845a29c860 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Wed, 13 Aug 2025 16:01:04 +0200 Subject: [PATCH 03/13] [RISCV] Add rvy substitutions to lit config --- llvm/utils/lit/lit/llvm/config.py | 21 +++++++++++++++++++++ llvm/utils/update_cc_test_checks.py | 4 ++++ llvm/utils/update_llc_test_checks.py | 4 ++++ 3 files changed, 29 insertions(+) diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 61c7478df02cd..057188a2d2b15 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -482,6 +482,10 @@ def _add_cheri_tool_substitution(self, tool): riscv64_cheri_args = [triple_opt + '=riscv64-unknown-freebsd', '-mattr=+xcheri'] + extra_args riscv32_cheri_purecap_args = ['-target-abi', 'il32pc64d', '-mattr=+cap-mode'] + riscv32_cheri_args riscv64_cheri_purecap_args = ['-target-abi', 'l64pc128d', '-mattr=+cap-mode'] + riscv64_cheri_args + riscv32y_purecap_args = [triple_opt + '=riscv32-unknown-freebsd', '-mattr=+y,+cap-mode', '-target-abi', 'il32pc64d'] + riscv64y_purecap_args = [triple_opt + '=riscv64-unknown-freebsd', '-mattr=+y,+cap-mode', '-target-abi', 'l64pc128d'] + riscv32y_hybrid_args = [triple_opt + '=riscv32-unknown-freebsd', '-mattr=+y,+zyhybrid'] + riscv64y_hybrid_args = [triple_opt + '=riscv64-unknown-freebsd', '-mattr=+y,+zyhybrid'] default_args = cheri128_args tool_patterns = [ @@ -495,6 +499,10 @@ def _add_cheri_tool_substitution(self, tool): ToolSubst('%riscv64_cheri_' + tool, FindTool(tool), extra_args=riscv64_cheri_args), ToolSubst('%riscv32_cheri_purecap_' + tool, FindTool(tool), extra_args=riscv32_cheri_purecap_args), ToolSubst('%riscv64_cheri_purecap_' + tool, FindTool(tool), extra_args=riscv64_cheri_purecap_args), + ToolSubst('%riscv32y_purecap_' + tool, FindTool(tool), extra_args=riscv32y_purecap_args), + ToolSubst('%riscv64y_purecap_' + tool, FindTool(tool), extra_args=riscv64y_purecap_args), + ToolSubst('%riscv32y_hybrid_' + tool, FindTool(tool), extra_args=riscv32y_hybrid_args), + ToolSubst('%riscv64y_hybrid_' + tool, FindTool(tool), extra_args=riscv64y_hybrid_args), ] self.add_tool_substitutions(tool_patterns, [self.config.llvm_tools_dir]) @@ -652,6 +660,15 @@ def use_clang( riscv64_cheri_cc1_args = clang_cc1_args + [ '-triple', 'riscv64-unknown-freebsd', '-target-feature', '+xcheri', '-mllvm', '-verify-machineinstrs'] + riscv32y_purecap_cc1_args = clang_cc1_args + ['-triple', 'riscv32-unknown-freebsd', + '-target-feature', '+y', '-mllvm', '-verify-machineinstrs'] + riscv64y_purecap_cc1_args = clang_cc1_args + ['-triple', 'riscv64-unknown-freebsd', + '-target-feature', '+y', '-mllvm', '-verify-machineinstrs'] + riscv32y_hybrid_cc1_args = clang_cc1_args + ['-triple', 'riscv32-unknown-freebsd', + '-target-feature', '+zyhybrid', '-mllvm', '-verify-machineinstrs'] + riscv64y_hybrid_cc1_args = clang_cc1_args + ['-triple', 'riscv64-unknown-freebsd', + '-target-feature', '+zyhybrid', '-mllvm', '-verify-machineinstrs'] + cheri_cc1_args = cheri128_cc1_args default_cheri_cpu = 'cheri128' cheri_clang_args = ['-target', 'mips64-unknown-freebsd', '-nostdinc', @@ -661,6 +678,10 @@ def use_clang( tool_substitutions = [ # CHERI substitutions (order is important due to repeated substitutions!) + ToolSubst('%riscv32y_purecap_cc1', command=self.config.clang, extra_args=riscv32y_purecap_cc1_args+additional_flags+['-target-abi', 'il32pc64', '-target-feature', '+cap-mode']), + ToolSubst('%riscv64y_purecap_cc1', command=self.config.clang, extra_args=riscv64y_purecap_cc1_args+additional_flags+['-target-abi', 'l64pc128', '-target-feature', '+cap-mode']), + ToolSubst('%riscv32y_hybrid_cc1', command=self.config.clang, extra_args=riscv32y_hybrid_cc1_args+additional_flags), + ToolSubst('%riscv64y_hybrid_cc1', command=self.config.clang, extra_args=riscv64y_hybrid_cc1_args+additional_flags), ToolSubst('%cheri_purecap_cc1', command='%cheri_cc1', extra_args=['-target-abi', 'purecap']+additional_flags), ToolSubst('%cheri128_purecap_cc1', command='%cheri128_cc1', extra_args=['-target-abi', 'purecap']+additional_flags), ToolSubst('%cheri_cc1', command=self.config.clang, extra_args=cheri_cc1_args+additional_flags), diff --git a/llvm/utils/update_cc_test_checks.py b/llvm/utils/update_cc_test_checks.py index 925eb7cd22cf7..e1991aba56a74 100755 --- a/llvm/utils/update_cc_test_checks.py +++ b/llvm/utils/update_cc_test_checks.py @@ -49,6 +49,10 @@ '%riscv64_cheri_purecap_cc1': ['-cc1', "-triple=riscv64-unknown-freebsd", "-target-feature", "+xcheri", "-target-abi", "l64pc128", '-target-feature', '+cap-mode'], '%riscv32_cheri_purecap_clang': ['-target', 'riscv32-unknown-freebsd', '-march=rv32imafdcxcheri', '-mabi=il32pc64'], '%riscv64_cheri_purecap_clang': ['-target', 'riscv64-unknown-freebsd', '-march=rv64imafdcxcheri', '-mabi=l64pc128'], + '%riscv32y_hybrid_cc1' : ["-cc1", "-triple=riscv32-unknown-freebsd", "-target-feature", "+zyhybrid"], + '%riscv64y_hybrid_cc1' : ["-cc1", "-triple=riscv64-unknown-freebsd", "-target-feature", "+zyhybrid"], + '%riscv32y_purecap_cc1' : ["-cc1", "-triple=riscv32-unknown-freebsd", "-target-feature", "+y", "-target-abi", "il32pc64", "-target-feature", "+cap-mode"], + '%riscv64y_purecap_cc1' : ["-cc1", "-triple=riscv64-unknown-freebsd", "-target-feature", "+y", "-target-abi", "l64pc128", "-target-feature", "+cap-mod"], } def get_line2func_list(clang_cmd: Command): diff --git a/llvm/utils/update_llc_test_checks.py b/llvm/utils/update_llc_test_checks.py index 11d711ae71b84..fba726ba449c4 100755 --- a/llvm/utils/update_llc_test_checks.py +++ b/llvm/utils/update_llc_test_checks.py @@ -106,6 +106,10 @@ def main(): llc_cmd = llc_cmd.replace("%riscv64_cheri_purecap_llc", "llc -mtriple=riscv64-unknown-freebsd -target-abi l64pc128d -mattr=+xcheri,+cap-mode") llc_cmd = llc_cmd.replace("%riscv32_cheri_llc", "llc -mtriple=riscv32-unknown-freebsd -mattr=+xcheri") llc_cmd = llc_cmd.replace("%riscv64_cheri_llc", "llc -mtriple=riscv64-unknown-freebsd -mattr=+xcheri") + llc_cmd = llc_cmd.replace("%riscv32y_purecap_llc", "llc -mtriple=riscv32-unknown-freebsd -target-abi il32pc64d -mattr=+y,+cap-mode") + llc_cmd = llc_cmd.replace("%riscv64y_purecap_llc", "llc -mtriple=riscv64-unknown-freebsd -target-abi l64pc128d -mattr=+y,+cap-mode") + llc_cmd = llc_cmd.replace("%riscv32y_hybrid_llc", "llc -mtriple=riscv32-unknown-freebsd -mattr=+y,+zyhybrid") + llc_cmd = llc_cmd.replace("%riscv64y_hybrid_llc", "llc -mtriple=riscv64-unknown-freebsd -mattr=+y,+zyhybrid") filecheck_cmd = commands[-1] if filecheck_cmd.startswith("%cheri64_FileCheck"): filecheck_cmd = filecheck_cmd.replace("%cheri64_FileCheck", "FileCheck '-D#CAP_SIZE=8'") From 57002c0af6e0362b8b8d258094cfedc99dba88a0 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Wed, 13 Aug 2025 16:10:00 +0200 Subject: [PATCH 04/13] [clang][RISCV] Update init_globals to work with rvy --- clang/lib/Headers/cheri_init_globals.h | 31 +++++++++++++------ .../CodeGenCXX/cheri/cheri_init_globals.cpp | 4 +++ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/clang/lib/Headers/cheri_init_globals.h b/clang/lib/Headers/cheri_init_globals.h index 0004644b3bc40..341d3486e4207 100644 --- a/clang/lib/Headers/cheri_init_globals.h +++ b/clang/lib/Headers/cheri_init_globals.h @@ -50,12 +50,26 @@ struct capreloc { }; static const __SIZE_TYPE__ function_reloc_flag = (__SIZE_TYPE__)1 << (__SIZE_WIDTH__ - 1); +static const __SIZE_TYPE__ constant_reloc_flag = (__SIZE_TYPE__)1 + << (__SIZE_WIDTH__ - 2); +static const __SIZE_TYPE__ indirect_reloc_flag = (__SIZE_TYPE__)1 + << (__SIZE_WIDTH__ - 3); +static const __SIZE_TYPE__ code_reloc_flag = (__SIZE_TYPE__)1 + << (__SIZE_WIDTH__ - 4); +#if defined(__riscv_y) +static const __SIZE_TYPE__ function_pointer_permissions_mask = + ~(__SIZE_TYPE__)(__CHERI_CAP_PERMISSION_WRITE__); +static const __SIZE_TYPE__ constant_pointer_permissions_mask = + ~(__SIZE_TYPE__)(__CHERI_CAP_PERMISSION_WRITE__ | + __CHERI_CAP_PERMISSION_STORE_LOCAL__ | + __CHERI_CAP_PERMISSION_EXECUTE__); +static const __SIZE_TYPE__ global_pointer_permissions_mask = + ~(__SIZE_TYPE__)(__CHERI_CAP_PERMISSION_EXECUTE__); +#else static const __SIZE_TYPE__ function_pointer_permissions_mask = ~(__SIZE_TYPE__)(__CHERI_CAP_PERMISSION_PERMIT_SEAL__ | __CHERI_CAP_PERMISSION_PERMIT_STORE_CAPABILITY__ | __CHERI_CAP_PERMISSION_PERMIT_STORE__); -static const __SIZE_TYPE__ constant_reloc_flag = (__SIZE_TYPE__)1 - << (__SIZE_WIDTH__ - 2); static const __SIZE_TYPE__ constant_pointer_permissions_mask = ~(__SIZE_TYPE__)(__CHERI_CAP_PERMISSION_PERMIT_SEAL__ | __CHERI_CAP_PERMISSION_PERMIT_STORE_CAPABILITY__ | @@ -65,16 +79,13 @@ static const __SIZE_TYPE__ constant_pointer_permissions_mask = static const __SIZE_TYPE__ global_pointer_permissions_mask = ~(__SIZE_TYPE__)(__CHERI_CAP_PERMISSION_PERMIT_SEAL__ | __CHERI_CAP_PERMISSION_PERMIT_EXECUTE__); -static const __SIZE_TYPE__ indirect_reloc_flag = (__SIZE_TYPE__)1 - << (__SIZE_WIDTH__ - 3); -static const __SIZE_TYPE__ code_reloc_flag = (__SIZE_TYPE__)1 - << (__SIZE_WIDTH__ - 4); +#endif -__attribute__((weak)) extern struct capreloc __start___cap_relocs[]; -__attribute__((weak)) extern struct capreloc __stop___cap_relocs[]; +__attribute__((__weak__)) extern struct capreloc __start___cap_relocs; +__attribute__((__weak__)) extern struct capreloc __stop___cap_relocs; -__attribute__((weak)) extern void *__capability __cap_table_start[]; -__attribute__((weak)) extern void *__capability __cap_table_end[]; +__attribute__((__weak__)) extern void *__capability __cap_table_start; +__attribute__((__weak__)) extern void *__capability __cap_table_end; /* * Sandbox data segments are relocated by moving DDC, since they're compiled as diff --git a/clang/test/CodeGenCXX/cheri/cheri_init_globals.cpp b/clang/test/CodeGenCXX/cheri/cheri_init_globals.cpp index 557342c142b51..9c25663d0bed8 100644 --- a/clang/test/CodeGenCXX/cheri/cheri_init_globals.cpp +++ b/clang/test/CodeGenCXX/cheri/cheri_init_globals.cpp @@ -16,6 +16,10 @@ // RUN: %cheri_cc1 -xc %s -o /dev/null -Wall -Wextra -Wpedantic -Wsystem-headers -verify -emit-obj -Werror=undef // RUN: %riscv64_cheri_cc1 -xc %s -o /dev/null -Wall -Wextra -Wpedantic -Wsystem-headers -verify -emit-obj -Werror=undef // RUN: %riscv32_cheri_cc1 -xc %s -o /dev/null -Wall -Wextra -Wpedantic -Wsystem-headers -verify -emit-obj -Werror=undef +// RUN: %riscv64y_purecap_cc1 -xc %s -o /dev/null -Wall -Wextra -Wpedantic -Wsystem-headers -verify -Werror=undef +// RUN: %riscv32y_purecap_cc1 -xc %s -o /dev/null -Wall -Wextra -Wpedantic -Wsystem-headers -verify -Werror=undef +// RUN: %riscv64y_hybrid_cc1 -xc %s -o /dev/null -Wall -Wextra -Wpedantic -Wsystem-headers -verify -Werror=undef +// RUN: %riscv32y_hybrid_cc1 -xc %s -o /dev/null -Wall -Wextra -Wpedantic -Wsystem-headers -verify -Werror=undef // expected-no-diagnostics #include From fc394feca80202f94c363fdcd14aa573c139559d Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Wed, 13 Aug 2025 17:06:45 +0200 Subject: [PATCH 05/13] [clang][RISCV] Update cheriintrin.h header to work with rvy --- clang/lib/Headers/cheriintrin.h | 12 +++ clang/test/CodeGen/cheri/cheriintrin.c | 115 +++++++++++++++++++++++++ 2 files changed, 127 insertions(+) diff --git a/clang/lib/Headers/cheriintrin.h b/clang/lib/Headers/cheriintrin.h index b40142997107c..075ee767d402c 100644 --- a/clang/lib/Headers/cheriintrin.h +++ b/clang/lib/Headers/cheriintrin.h @@ -70,6 +70,17 @@ typedef long cheri_otype_t; /* Capability permissions: */ typedef enum __attribute__((flag_enum, enum_extensibility(open))) { +#if defined(__riscv_y) + CHERI_PERM_CAP = __CHERI_CAP_PERMISSION_CAPABILITY__, + CHERI_PERM_WRITE = __CHERI_CAP_PERMISSION_WRITE__, + CHERI_PERM_READ = __CHERI_CAP_PERMISSION_READ__, + CHERI_PERM_EXECUTE = __CHERI_CAP_PERMISSION_EXECUTE__, + CHERI_PERM_SYSTEM_REGS = __CHERI_CAP_PERMISSION_ACCESS_SYSTEM_REGISTERS__, + CHERI_PERM_LOAD_MUTABLE = __CHERI_CAP_PERMISSION_LOAD_MUTABLE__, + CHERI_PERM_LOAD_GLOBAL = __CHERI_CAP_PERMISSION_LOAD_GLOBAL__, + CHERI_PERM_STORE_LOCAL = __CHERI_CAP_PERMISSION_STORE_LOCAL__, + CHERI_PERM_CAPABILITY_GLOBAL = __CHERI_CAP_PERMISSION_CAPABILITY_GLOBAL__, +#else CHERI_PERM_GLOBAL = __CHERI_CAP_PERMISSION_GLOBAL__, CHERI_PERM_EXECUTE = __CHERI_CAP_PERMISSION_PERMIT_EXECUTE__, CHERI_PERM_LOAD = __CHERI_CAP_PERMISSION_PERMIT_LOAD__, @@ -81,6 +92,7 @@ typedef enum __attribute__((flag_enum, enum_extensibility(open))) { CHERI_PERM_INVOKE = __CHERI_CAP_PERMISSION_PERMIT_INVOKE__, CHERI_PERM_UNSEAL = __CHERI_CAP_PERMISSION_PERMIT_UNSEAL__, CHERI_PERM_SYSTEM_REGS = __CHERI_CAP_PERMISSION_ACCESS_SYSTEM_REGISTERS__, +#endif /* TODO: architecture-dependent permissions */ } cheri_perms_t; #define cheri_perms_get(x) ((cheri_perms_t)(__builtin_cheri_perms_get(x))) diff --git a/clang/test/CodeGen/cheri/cheriintrin.c b/clang/test/CodeGen/cheri/cheriintrin.c index d97fac9db7c3b..a4c9028837d31 100644 --- a/clang/test/CodeGen/cheri/cheriintrin.c +++ b/clang/test/CodeGen/cheri/cheriintrin.c @@ -1,6 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %cheri_cc1 %s -o - -emit-llvm -O1 -Weverything -Werror -verify -Wno-declaration-after-statement | FileCheck %s // RUN: %cheri_purecap_cc1 %s -o - -emit-llvm -O1 -Weverything -Werror -Wno-declaration-after-statement -verify | FileCheck %s +// RUN: %riscv64y_hybrid_cc1 %s -o - -emit-llvm -O1 -Weverything -Werror -Wno-declaration-after-statement -verify | FileCheck %s --check-prefix=RVY +// RUN: %riscv64y_purecap_cc1 %s -o - -emit-llvm -O1 -Weverything -Werror -Wno-declaration-after-statement -verify | FileCheck %s --check-prefix=RVY // expected-no-diagnostics #include @@ -88,6 +90,82 @@ void test(void *__capability cap, char *__capability cap2, __SIZE_TYPE__ i); // CHECK-NEXT: tail call void @use_size_t(i64 noundef zeroext [[TMP31]]) #[[ATTR5]] // CHECK-NEXT: ret void // +// RVY-LABEL: @test( +// RVY-NEXT: entry: +// RVY-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cheri.cap.address.get.i64(ptr addrspace(200) [[CAP:%.*]]) +// RVY-NEXT: tail call void @use_size_t(i64 noundef [[TMP0]]) #[[ATTR5:[0-9]+]] +// RVY-NEXT: [[TMP1:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.address.set.i64(ptr addrspace(200) [[CAP]], i64 [[I:%.*]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP1]]) #[[ATTR5]] +// RVY-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.cheri.cap.base.get.i64(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: tail call void @use_size_t(i64 noundef [[TMP2]]) #[[ATTR5]] +// RVY-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.cheri.cap.length.get.i64(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: tail call void @use_size_t(i64 noundef [[TMP3]]) #[[ATTR5]] +// RVY-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.cheri.cap.offset.get.i64(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: tail call void @use_size_t(i64 noundef [[TMP4]]) #[[ATTR5]] +// RVY-NEXT: [[TMP5:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.offset.set.i64(ptr addrspace(200) [[CAP]], i64 [[I]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP5]]) #[[ATTR5]] +// RVY-NEXT: [[TMP6:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.tag.clear(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP6]]) #[[ATTR5]] +// RVY-NEXT: [[TMP7:%.*]] = tail call i1 @llvm.cheri.cap.tag.get(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: tail call void @use_bool(i1 noundef zeroext [[TMP7]]) #[[ATTR5]] +// RVY-NEXT: tail call void @use_bool(i1 noundef zeroext [[TMP7]]) #[[ATTR5]] +// RVY-NEXT: [[LNOT:%.*]] = xor i1 [[TMP7]], true +// RVY-NEXT: tail call void @use_bool(i1 noundef zeroext [[LNOT]]) #[[ATTR5]] +// RVY-NEXT: [[TMP8:%.*]] = tail call i1 @llvm.cheri.cap.equal.exact(ptr addrspace(200) [[CAP]], ptr addrspace(200) [[CAP2:%.*]]) +// RVY-NEXT: tail call void @use_bool(i1 noundef zeroext [[TMP8]]) #[[ATTR5]] +// RVY-NEXT: [[TMP9:%.*]] = tail call i1 @llvm.cheri.cap.subset.test(ptr addrspace(200) [[CAP]], ptr addrspace(200) [[CAP2]]) +// RVY-NEXT: tail call void @use_bool(i1 noundef zeroext [[TMP9]]) #[[ATTR5]] +// RVY-NEXT: [[TMP10:%.*]] = tail call i64 @llvm.cheri.round.representable.length.i64(i64 [[I]]) +// RVY-NEXT: tail call void @use_size_t(i64 noundef [[TMP10]]) #[[ATTR5]] +// RVY-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.cheri.representable.alignment.mask.i64(i64 [[I]]) +// RVY-NEXT: tail call void @use_size_t(i64 noundef [[TMP11]]) #[[ATTR5]] +// RVY-NEXT: [[TMP12:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) [[CAP]], i64 [[I]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP12]]) #[[ATTR5]] +// RVY-NEXT: [[TMP13:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.bounds.set.exact.i64(ptr addrspace(200) [[CAP]], i64 [[I]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP13]]) #[[ATTR5]] +// RVY-NEXT: [[TMP14:%.*]] = tail call i64 @llvm.cheri.cap.type.get.i64(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: tail call void @use_size_t(i64 noundef [[TMP14]]) #[[ATTR5]] +// RVY-NEXT: [[TMP15:%.*]] = tail call i1 @llvm.cheri.cap.sealed.get(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: tail call void @use_bool(i1 noundef zeroext [[TMP15]]) #[[ATTR5]] +// RVY-NEXT: [[LNOT1:%.*]] = xor i1 [[TMP15]], true +// RVY-NEXT: tail call void @use_bool(i1 noundef zeroext [[LNOT1]]) #[[ATTR5]] +// RVY-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP14]], -2 +// RVY-NEXT: tail call void @use_bool(i1 noundef zeroext [[CMP]]) #[[ATTR5]] +// RVY-NEXT: [[TMP16:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.seal.entry(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP16]]) #[[ATTR5]] +// RVY-NEXT: [[TMP17:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.seal(ptr addrspace(200) [[CAP]], ptr addrspace(200) [[CAP2]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP17]]) #[[ATTR5]] +// RVY-NEXT: [[TMP18:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.unseal(ptr addrspace(200) [[CAP]], ptr addrspace(200) [[CAP2]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP18]]) #[[ATTR5]] +// RVY-NEXT: [[TMP19:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.build(ptr addrspace(200) [[CAP]], ptr addrspace(200) [[CAP2]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP19]]) #[[ATTR5]] +// RVY-NEXT: [[TMP20:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.conditional.seal(ptr addrspace(200) [[CAP]], ptr addrspace(200) [[CAP2]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP20]]) #[[ATTR5]] +// RVY-NEXT: [[TMP21:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.type.copy(ptr addrspace(200) [[CAP]], ptr addrspace(200) [[CAP2]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP21]]) #[[ATTR5]] +// RVY-NEXT: [[TMP22:%.*]] = tail call i64 @llvm.cheri.cap.perms.get.i64(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: [[CONV2:%.*]] = and i64 [[TMP22]], 4294967295 +// RVY-NEXT: tail call void @use_size_t(i64 noundef [[CONV2]]) #[[ATTR5]] +// RVY-NEXT: [[TMP23:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.perms.and.i64(ptr addrspace(200) [[CAP]], i64 262144) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP23]]) #[[ATTR5]] +// RVY-NEXT: [[TMP24:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.perms.and.i64(ptr addrspace(200) [[CAP]], i64 -131073) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP24]]) #[[ATTR5]] +// RVY-NEXT: [[TMP25:%.*]] = tail call ptr addrspace(200) @llvm.cheri.ddc.get() +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP25]]) #[[ATTR5]] +// RVY-NEXT: [[TMP26:%.*]] = tail call ptr addrspace(200) @llvm.cheri.pcc.get() +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP26]]) #[[ATTR5]] +// RVY-NEXT: [[TMP27:%.*]] = tail call i64 @llvm.cheri.cap.flags.get.i64(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: tail call void @use_size_t(i64 noundef [[TMP27]]) #[[ATTR5]] +// RVY-NEXT: [[TMP28:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.flags.set.i64(ptr addrspace(200) [[CAP]], i64 [[I]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP28]]) #[[ATTR5]] +// RVY-NEXT: [[TMP29:%.*]] = tail call i64 @llvm.cheri.cap.high.get.i64(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: tail call void @use_size_t(i64 noundef [[TMP29]]) #[[ATTR5]] +// RVY-NEXT: [[TMP30:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.high.set.i64(ptr addrspace(200) [[CAP]], i64 [[I]]) +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[TMP30]]) #[[ATTR5]] +// RVY-NEXT: [[TMP31:%.*]] = tail call i64 @llvm.cheri.cap.load.tags.i64(ptr addrspace(200) [[CAP]]) +// RVY-NEXT: tail call void @use_size_t(i64 noundef [[TMP31]]) #[[ATTR5]] +// RVY-NEXT: ret void +// void test(void *__capability cap, char *__capability cap2, __SIZE_TYPE__ i) { use_size_t(cheri_address_get(cap)); use_cap(cheri_address_set(cap, i)); @@ -130,6 +208,7 @@ void test(void *__capability cap, char *__capability cap2, __SIZE_TYPE__ i) { use_cap(cheri_seal_conditionally(cap, cap2)); use_cap(cheri_type_copy(cap, cap2)); +#if !defined(__riscv_y) _Static_assert(CHERI_PERM_GLOBAL != 0, "must be defined"); _Static_assert(CHERI_PERM_EXECUTE != 0, "must be defined"); _Static_assert(CHERI_PERM_LOAD != 0, "must be defined"); @@ -141,10 +220,25 @@ void test(void *__capability cap, char *__capability cap2, __SIZE_TYPE__ i) { _Static_assert(CHERI_PERM_INVOKE != 0, "must be defined"); _Static_assert(CHERI_PERM_UNSEAL != 0, "must be defined"); _Static_assert(CHERI_PERM_SYSTEM_REGS != 0, "must be defined"); +#else + _Static_assert(CHERI_PERM_CAP != 0, "must be defined"); + _Static_assert(CHERI_PERM_WRITE != 0, "must be defined"); + _Static_assert(CHERI_PERM_READ != 0, "must be defined"); + _Static_assert(CHERI_PERM_EXECUTE != 0, "must be defined"); + _Static_assert(CHERI_PERM_SYSTEM_REGS != 0, "must be defined"); + _Static_assert(CHERI_PERM_LOAD_MUTABLE != 0, "must be defined"); + _Static_assert(CHERI_PERM_LOAD_GLOBAL != 0, "must be defined"); + _Static_assert(CHERI_PERM_STORE_LOCAL != 0, "must be defined"); + _Static_assert(CHERI_PERM_CAPABILITY_GLOBAL != 0, "must be defined"); +#endif /* Check that CHERI_PERMS_T is defined */ cheri_perms_t cap_perms = cheri_perms_get(cap); use_size_t(cap_perms); +#if !defined(__riscv_y) use_cap(cheri_perms_and(cap, CHERI_PERM_LOAD)); +#else + use_cap(cheri_perms_and(cap, CHERI_PERM_READ)); +#endif use_cap(cheri_perms_clear(cap, CHERI_PERM_EXECUTE)); use_cap(cheri_ddc_get()); @@ -187,6 +281,27 @@ void test_alignment_builtins(void *__capability cap, __SIZE_TYPE__ align); // CHECK-NEXT: tail call void @use_bool(i1 noundef zeroext [[IS_ALIGNED]]) #[[ATTR5]] // CHECK-NEXT: ret void // +// RVY-LABEL: @test_alignment_builtins( +// RVY-NEXT: entry: +// RVY-NEXT: [[MASK:%.*]] = add i64 [[ALIGN:%.*]], -1 +// RVY-NEXT: [[PTRADDR:%.*]] = tail call i64 @llvm.cheri.cap.address.get.i64(ptr addrspace(200) [[CAP:%.*]]) +// RVY-NEXT: [[OVER_BOUNDARY:%.*]] = add i64 [[PTRADDR]], [[MASK]] +// RVY-NEXT: [[INVERTED_MASK:%.*]] = sub i64 0, [[ALIGN]] +// RVY-NEXT: [[ALIGNED_INTPTR:%.*]] = and i64 [[OVER_BOUNDARY]], [[INVERTED_MASK]] +// RVY-NEXT: [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[PTRADDR]] +// RVY-NEXT: [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, ptr addrspace(200) [[CAP]], i64 [[DIFF]] +// RVY-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(200) [[ALIGNED_RESULT]], i64 [[ALIGN]]) ] +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[ALIGNED_RESULT]]) #[[ATTR5]] +// RVY-NEXT: [[ALIGNED_INTPTR5:%.*]] = and i64 [[PTRADDR]], [[INVERTED_MASK]] +// RVY-NEXT: [[DIFF6:%.*]] = sub i64 [[ALIGNED_INTPTR5]], [[PTRADDR]] +// RVY-NEXT: [[ALIGNED_RESULT7:%.*]] = getelementptr inbounds i8, ptr addrspace(200) [[CAP]], i64 [[DIFF6]] +// RVY-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(200) [[ALIGNED_RESULT7]], i64 [[ALIGN]]) ] +// RVY-NEXT: tail call void @use_cap(ptr addrspace(200) noundef [[ALIGNED_RESULT7]]) #[[ATTR5]] +// RVY-NEXT: [[SET_BITS:%.*]] = and i64 [[PTRADDR]], [[MASK]] +// RVY-NEXT: [[IS_ALIGNED:%.*]] = icmp eq i64 [[SET_BITS]], 0 +// RVY-NEXT: tail call void @use_bool(i1 noundef zeroext [[IS_ALIGNED]]) #[[ATTR5]] +// RVY-NEXT: ret void +// void test_alignment_builtins(void *__capability cap, __SIZE_TYPE__ align) { use_cap(cheri_align_up(cap, align)); use_cap(cheri_align_down(cap, align)); From dabb7e94c463bbd891613a200d49a3b50ccd2240 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Fri, 15 Aug 2025 10:16:38 +0100 Subject: [PATCH 06/13] [RISCV] Test target features are correctly added --- .../cheri/riscv/rvy-default-ir-features.c | 55 +++++++++++++++++++ .../Preprocessor/cheri-rvy-feature-flags.c | 33 +++++++++++ 2 files changed, 88 insertions(+) create mode 100644 clang/test/CodeGen/cheri/riscv/rvy-default-ir-features.c create mode 100644 clang/test/Preprocessor/cheri-rvy-feature-flags.c diff --git a/clang/test/CodeGen/cheri/riscv/rvy-default-ir-features.c b/clang/test/CodeGen/cheri/riscv/rvy-default-ir-features.c new file mode 100644 index 0000000000000..5a5a06ca001cc --- /dev/null +++ b/clang/test/CodeGen/cheri/riscv/rvy-default-ir-features.c @@ -0,0 +1,55 @@ +// RUN: %clang --target=riscv32 -S -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV32-NOCHERI '-DFEATURES=+32bit,+a,+c,+m' +// RUN: %clang --target=riscv64 -S -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV64-NOCHERI '-DFEATURES=+64bit,+a,+c,+m' + +// Hybrid/Legacy +// RUN: %clang --target=riscv32 -march=rv32izyhybrid -S -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV32-ZYHYBRID '-DFEATURES=+32bit,+y,+zyhybrid' +// RUN: %clang --target=riscv64 -march=rv64izyhybrid -S -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV64-ZYHYBRID '-DFEATURES=+64bit,+y,+zyhybrid' +// RUN: %clang --target=riscv32 -march=rv32izyhybrid -S -mxcheri-rvc -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV32-ZYHYBRID,NO-RVC '-DFEATURES=+32bit,+y,+zyhybrid' +// RUN: %clang --target=riscv64 -march=rv64izyhybrid -S -mxcheri-rvc -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV64-ZYHYBRID,NO-RVC '-DFEATURES=+64bit,+y,+zyhybrid' +// RUN: %clang --target=riscv32 -march=rv32izyhybrid -S -mno-xcheri-rvc -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV32-ZYHYBRID '-DFEATURES=+32bit,+xcheri-norvc,+y,+zyhybrid' +// RUN: %clang --target=riscv64 -march=rv64izyhybrid -S -mno-xcheri-rvc -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV64-ZYHYBRID '-DFEATURES=+64bit,+xcheri-norvc,+y,+zyhybrid' + + +// RVY Purecap +// RUN: %clang --target=riscv32 -march=rv32iy -mabi=il32pc64 -S -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV32-Y '-DFEATURES=+32bit,+cap-mode,+y' +// RUN: %clang --target=riscv64 -march=rv64iy -mabi=l64pc128 -S -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV64-Y '-DFEATURES=+64bit,+cap-mode,+y' +// RUN: %clang --target=riscv32 -march=rv32iy -mabi=il32pc64 -S -mxcheri-rvc -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV32-Y,NO-RVC '-DFEATURES=+32bit,+cap-mode,+y' +// RUN: %clang --target=riscv64 -march=rv64iy -mabi=l64pc128 -S -mxcheri-rvc -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV64-Y,NO-RVC '-DFEATURES=+64bit,+cap-mode,+y' +// RUN: %clang --target=riscv32 -march=rv32iy -mabi=il32pc64 -S -mno-xcheri-rvc -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV32-Y '-DFEATURES=+32bit,+cap-mode,+xcheri-norvc,+y' +// RUN: %clang --target=riscv64 -march=rv64iy -mabi=l64pc128 -S -mno-xcheri-rvc -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CHECK,RV64-Y '-DFEATURES=+64bit,+cap-mode,+xcheri-norvc,+y' + +// RV32-NOCHERI: target datalayout = "e-m:e-p:32:32-i64:64-n32-S128" +// RV64-NOCHERI: target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +// RV32-ZYHYBRID: target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128" +// RV64-ZYHYBRID: target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n32:64-S128" +// RV32-Y: target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" +// RV64-Y: target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n32:64-S128-A200-P200-G200" + +// CHECK: "target-features"="[[FEATURES]] +// CHECK-SAME: -save-restore +// NO-RVC-SAME: -xcheri-norvc + +// RV32-NOCHERI: !{i32 1, !"target-abi", !"ilp32"} +// RV64-NOCHERI: !{i32 1, !"target-abi", !"lp64"} +// RV32-ZYHYBRID: !{i32 1, !"target-abi", !"ilp32"} +// RV64-ZYHYBRID: !{i32 1, !"target-abi", !"lp64"} +// RV32-Y: !{i32 1, !"target-abi", !"il32pc64"} +// RV64-Y: !{i32 1, !"target-abi", !"l64pc128"} + +int test(void){ + return 1; +} diff --git a/clang/test/Preprocessor/cheri-rvy-feature-flags.c b/clang/test/Preprocessor/cheri-rvy-feature-flags.c new file mode 100644 index 0000000000000..aa4fad39b57eb --- /dev/null +++ b/clang/test/Preprocessor/cheri-rvy-feature-flags.c @@ -0,0 +1,33 @@ +// RUN: %riscv64y_purecap_cc1 -E -dM -ffreestanding < /dev/null \ +// RUN: | FileCheck %s +// RUN: %riscv32y_purecap_cc1 -E -dM -ffreestanding < /dev/null \ +// RUN: | FileCheck %s +// RUN: %riscv64y_hybrid_cc1 -E -dM -ffreestanding < /dev/null \ +// RUN: | FileCheck --check-prefixes=CHECK,CHECK-HYBRID %s +// RUN: %riscv32y_hybrid_cc1 -E -dM -ffreestanding < /dev/null \ +// RUN: | FileCheck --check-prefixes=CHECK,CHECK-HYBRID %s + +// CHECK: #define __CHERI_CAP_PERMISSION_ACCESS_SYSTEM_REGISTERS__ 65536 +// CHECK: #define __CHERI_CAP_PERMISSION_CAPABILITY_GLOBAL__ 16 +// CHECK: #define __CHERI_CAP_PERMISSION_CAPABILITY__ 32 +// CHECK: #define __CHERI_CAP_PERMISSION_EXECUTE__ 131072 +// CHECK: #define __CHERI_CAP_PERMISSION_LOAD_GLOBAL__ 4 +// CHECK: #define __CHERI_CAP_PERMISSION_LOAD_MUTABLE__ 2 +// CHECK: #define __CHERI_CAP_PERMISSION_READ__ 262144 +// CHECK: #define __CHERI_CAP_PERMISSION_STORE_LOCAL__ 8 +// CHECK: #define __CHERI_CAP_PERMISSION_WRITE__ 1 +// CHECK-NOT: #define __CHERI_CAP_PERMISSION_ACCESS_SYSTEM_REGISTERS__ 1024 +// CHECK-NOT: #define __CHERI_CAP_PERMISSION_GLOBAL__ 1 +// CHECK-NOT: #define __CHERI_CAP_PERMISSION_PERMIT_EXECUTE__ 2 +// CHECK-NOT: #define __CHERI_CAP_PERMISSION_PERMIT_INVOKE__ 256 +// CHECK-NOT: #define __CHERI_CAP_PERMISSION_PERMIT_LOAD_CAPABILITY__ 16 +// CHECK-NOT: #define __CHERI_CAP_PERMISSION_PERMIT_LOAD__ 4 +// CHECK-NOT: #define __CHERI_CAP_PERMISSION_PERMIT_SEAL__ 128 +// CHECK-NOT: #define __CHERI_CAP_PERMISSION_PERMIT_STORE_CAPABILITY__ 32 +// CHECK-NOT: #define __CHERI_CAP_PERMISSION_PERMIT_STORE_LOCAL__ 64 +// CHECK-NOT: #define __CHERI_CAP_PERMISSION_PERMIT_STORE__ 8 +// CHECK-NOT: #define __CHERI_CAP_PERMISSION_PERMIT_UNSEAL__ 512 +// CHECK: #define __riscv_y [[VERSION:[0-9]+]] +// CHECK-HYBRID: #define __riscv_zcherihybrid [[VERSION]] +// CHECK: #define __riscv_zcheripurecap [[VERSION]] +// CHECK-HYBRID: #define __riscv_zyhybrid [[VERSION]] From f0a06656068d033295522b354ebd9e062a4d0672 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Fri, 15 Aug 2025 10:22:14 +0100 Subject: [PATCH 07/13] [lld][RVY] Emit rvy instructions into .plt --- lld/ELF/Arch/RISCV.cpp | 18 +++++++++++++----- lld/ELF/Config.h | 1 + lld/ELF/Driver.cpp | 2 ++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 93bab4940a710..40e558dfd4460 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -68,6 +68,9 @@ enum Op { CIncOffsetImm = 0x105b, CLC_64 = 0x3003, CLC_128 = 0x200f, + + ADDIY = 0x201B, + CLY = 0x400F, }; enum Reg { @@ -254,9 +257,12 @@ void RISCV::writePltHeader(uint8_t *buf) const { // (c)jr (c)t3 // (if shift == 0): nop uint32_t offset = in.gotPlt->getVA() - in.plt->getVA(); - uint32_t ptrload = config->isCheriAbi ? config->is64 ? CLC_128 : CLC_64 - : config->is64 ? LD : LW; - uint32_t ptraddi = config->isCheriAbi ? CIncOffsetImm : ADDI; + uint32_t ptrload = + config->isCheriAbi + ? (config->zRVY ? CLY : (config->is64 ? CLC_128 : CLC_64)) + : (config->is64 ? LD : LW); + uint32_t ptraddi = + config->isCheriAbi ? (config->zRVY ? ADDIY : CIncOffsetImm) : ADDI; // Shift is log2(pltsize / ptrsize), which is 0 for CHERI-128 so skipped uint32_t shift = 2 - config->is64 - config->isCheriAbi; uint32_t ptrsize = config->isCheriAbi ? config->capabilitySize @@ -280,8 +286,10 @@ void RISCV::writePlt(uint8_t *buf, const Symbol &sym, // l[wdc] (c)t3, %pcrel_lo(1b)((c)t3) // (c)jalr (c)t1, (c)t3 // nop - uint32_t ptrload = config->isCheriAbi ? config->is64 ? CLC_128 : CLC_64 - : config->is64 ? LD : LW; + uint32_t ptrload = + config->isCheriAbi + ? (config->zRVY ? CLY : (config->is64 ? CLC_128 : CLC_64)) + : (config->is64 ? LD : LW); uint32_t entryva = sym.getGotPltVA(); uint32_t offset = entryva - pltEntryAddr; write32le(buf + 0, utype(AUIPC, X_T3, hi20(offset))); diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index f688a94260bef..d9bee1878d2c7 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -303,6 +303,7 @@ struct Config { // -z captabledebug: add additional symbols $captable_load_ before // each captable clc instruction that indicates which symbol should be loaded bool zCapTableDebug; + bool zRVY; bool zCombreloc; bool zCopyreloc; bool zForceBti; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 7c2a3479f2aa8..cdcb77f52217e 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -564,6 +564,7 @@ constexpr const char *knownZFlags[] = { "relro", "retpolineplt", "rodynamic", + "rvy", "separate-code", "separate-loadable-segments", "shstk", @@ -1394,6 +1395,7 @@ static void readConfigs(opt::InputArgList &args) { args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true); config->whyExtract = args.getLastArgValue(OPT_why_extract); config->zCapTableDebug = getZFlag(args, "captabledebug", "nocaptabledebug", false); + config->zRVY = hasZOption(args, "rvy"); config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); config->zForceBti = hasZOption(args, "force-bti"); From 77e422d4718a87ce0071fd599081466b5916a5ff Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Thu, 21 Aug 2025 11:52:57 +0100 Subject: [PATCH 08/13] [lld][RISCV] Deduce RVY from merged riscv.attributes --- lld/ELF/Arch/RISCV.cpp | 7 ++++--- lld/ELF/Config.h | 3 ++- lld/ELF/Driver.cpp | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 40e558dfd4460..0d1b87eb5e87e 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -259,10 +259,10 @@ void RISCV::writePltHeader(uint8_t *buf) const { uint32_t offset = in.gotPlt->getVA() - in.plt->getVA(); uint32_t ptrload = config->isCheriAbi - ? (config->zRVY ? CLY : (config->is64 ? CLC_128 : CLC_64)) + ? (config->isRVY ? CLY : (config->is64 ? CLC_128 : CLC_64)) : (config->is64 ? LD : LW); uint32_t ptraddi = - config->isCheriAbi ? (config->zRVY ? ADDIY : CIncOffsetImm) : ADDI; + config->isCheriAbi ? (config->isRVY ? ADDIY : CIncOffsetImm) : ADDI; // Shift is log2(pltsize / ptrsize), which is 0 for CHERI-128 so skipped uint32_t shift = 2 - config->is64 - config->isCheriAbi; uint32_t ptrsize = config->isCheriAbi ? config->capabilitySize @@ -288,7 +288,7 @@ void RISCV::writePlt(uint8_t *buf, const Symbol &sym, // nop uint32_t ptrload = config->isCheriAbi - ? (config->zRVY ? CLY : (config->is64 ? CLC_128 : CLC_64)) + ? (config->isRVY ? CLY : (config->is64 ? CLC_128 : CLC_64)) : (config->is64 ? LD : LW); uint32_t entryva = sym.getGotPltVA(); uint32_t offset = entryva - pltEntryAddr; @@ -1083,6 +1083,7 @@ mergeAttributesSection(const SmallVector §ions) { std::make_unique(xlen, exts))) { merged.strAttr.try_emplace(RISCVAttrs::ARCH, saver().save((*result)->toString())); + config->isRVY = result.get()->hasExtension("y"); } else { errorOrWarn(llvm::toString(result.takeError())); } diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index d9bee1878d2c7..fd9ef2e50a7cd 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -303,7 +303,6 @@ struct Config { // -z captabledebug: add additional symbols $captable_load_ before // each captable clc instruction that indicates which symbol should be loaded bool zCapTableDebug; - bool zRVY; bool zCombreloc; bool zCopyreloc; bool zForceBti; @@ -426,6 +425,8 @@ struct Config { // True if we are creating a pure-capability CheriABI output. bool isCheriAbi = false; + // True if riscv 'y' extension is enabled. + bool isRVY = false; // Mode of MTE to write to the ELF note. Should be one of NT_MEMTAG_ASYNC (for // async), NT_MEMTAG_SYNC (for sync), or NT_MEMTAG_LEVEL_NONE (for none). If // async or sync is enabled, write the ELF note specifying the default MTE diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index cdcb77f52217e..bfc4a5a96a7b7 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1394,8 +1394,8 @@ static void readConfigs(opt::InputArgList &args) { config->warnSymbolOrdering = args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true); config->whyExtract = args.getLastArgValue(OPT_why_extract); - config->zCapTableDebug = getZFlag(args, "captabledebug", "nocaptabledebug", false); - config->zRVY = hasZOption(args, "rvy"); + config->zCapTableDebug = + getZFlag(args, "captabledebug", "nocaptabledebug", false); config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); config->zForceBti = hasZOption(args, "force-bti"); From 08d52174d5adaa49272f56273364de9ea6999b69 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Thu, 21 Aug 2025 15:11:10 +0100 Subject: [PATCH 09/13] [RISCV] Introduce RVY Instructions --- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 124 +++- .../RISCV/Disassembler/RISCVDisassembler.cpp | 27 + .../RISCV/MCTargetDesc/RISCVBaseInfo.cpp | 5 + .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 15 + .../RISCV/MCTargetDesc/RISCVInstPrinter.cpp | 18 + .../RISCV/MCTargetDesc/RISCVInstPrinter.h | 4 + .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 19 +- .../RISCV/RISCVExpandAtomicPseudoInsts.cpp | 91 ++- .../Target/RISCV/RISCVExpandPseudoInsts.cpp | 19 +- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 4 +- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 35 +- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 1 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 52 +- llvm/lib/Target/RISCV/RISCVInstrFormatsY.td | 109 +++ llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 140 +++- llvm/lib/Target/RISCV/RISCVInstrInfo.h | 2 + llvm/lib/Target/RISCV/RISCVInstrInfo.td | 1 + llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td | 326 ++++----- llvm/lib/Target/RISCV/RISCVInstrInfoY.td | 624 ++++++++++++++++++ llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 26 +- llvm/lib/Target/RISCV/RISCVSubtarget.h | 3 +- llvm/lib/Target/RISCV/RISCVSystemOperands.td | 17 +- llvm/lib/Target/RISCV/RISCVSystemOperandsY.td | 90 +++ .../cheri/rv32cxcheri-cap-mode-invalid.s | 2 +- 24 files changed, 1514 insertions(+), 240 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVInstrFormatsY.td create mode 100644 llvm/lib/Target/RISCV/RISCVInstrInfoY.td create mode 100644 llvm/lib/Target/RISCV/RISCVSystemOperandsY.td diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index cc05503dcb5d4..8f9c8362d4ec8 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -119,7 +119,8 @@ class RISCVAsmParser : public MCTargetAsmParser { ParseStatus parseDirective(AsmToken DirectiveID) override; bool isCheri() const override { - return getSTI().getFeatureBits()[RISCV::FeatureCheri]; + return getSTI().getFeatureBits()[RISCV::FeatureCheri] || + getSTI().getFeatureBits()[RISCV::FeatureStdExtY]; } unsigned getCheriCapabilitySize() const override { @@ -224,6 +225,7 @@ class RISCVAsmParser : public MCTargetAsmParser { ParseStatus parseSpecialCapRegister(OperandVector &Operands); ParseStatus parseFPImm(OperandVector &Operands); ParseStatus parseImmediate(OperandVector &Operands); + ParseStatus parseCSetBndImmOperand(OperandVector &Operands); ParseStatus parseRegister(OperandVector &Operands, bool AllowParens = false); ParseStatus parseMemOpBaseReg(OperandVector &Operands); ParseStatus parseZeroOffsetMemOp(OperandVector &Operands); @@ -359,6 +361,7 @@ struct RISCVOperand final : public MCParsedAsmOperand { FPImmediate, SystemRegister, SpecialCapRegister, + CheriSystemRegister, VType, FRM, Fence, @@ -394,6 +397,12 @@ struct RISCVOperand final : public MCParsedAsmOperand { unsigned Encoding; }; + struct CheriSysRegOp { + const char *Data; + unsigned Length; + unsigned Encoding; + }; + struct VTypeOp { unsigned Val; }; @@ -422,6 +431,7 @@ struct RISCVOperand final : public MCParsedAsmOperand { FPImmOp FPImm; struct SysRegOp SysReg; struct SpecialCapRegOp SpecialCapReg; + struct CheriSysRegOp CheriSysReg; struct VTypeOp VType; struct FRMOp FRM; struct FenceOp Fence; @@ -455,6 +465,9 @@ struct RISCVOperand final : public MCParsedAsmOperand { case KindTy::SpecialCapRegister: SpecialCapReg = o.SpecialCapReg; break; + case KindTy::CheriSystemRegister: + CheriSysReg = o.CheriSysReg; + break; case KindTy::VType: VType = o.VType; break; @@ -497,6 +510,9 @@ struct RISCVOperand final : public MCParsedAsmOperand { bool isSpecialCapRegister() const { return Kind == KindTy::SpecialCapRegister; } bool isRlist() const { return Kind == KindTy::Rlist; } bool isSpimm() const { return Kind == KindTy::Spimm; } + bool isCheriCSRSystemRegister() const { + return Kind == KindTy::CheriSystemRegister; + } bool isGPR() const { return Kind == KindTy::Register && @@ -766,6 +782,27 @@ struct RISCVOperand final : public MCParsedAsmOperand { VK == RISCVMCExpr::VK_RISCV_None; } + bool isCSetBndImm() const { + if (!isImm()) + return false; + + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; + int64_t Imm; + if (!evaluateConstantImm(getImm(), Imm, VK) || + VK != RISCVMCExpr::VK_RISCV_None) + return false; + + if (Imm > 31) { + if (Imm % 16 != 0) + return false; + + if (Imm / 16 > 31) + return false; + } + + return true; + } + bool isSImm5() const { if (!isImm()) return false; @@ -1035,6 +1072,11 @@ struct RISCVOperand final : public MCParsedAsmOperand { return StringRef(SpecialCapReg.Data, SpecialCapReg.Length); } + StringRef getCheriSysReg() const { + assert(Kind == KindTy::CheriSystemRegister && "Invalid access!"); + return StringRef(CheriSysReg.Data, CheriSysReg.Length); + } + const MCExpr *getImm() const { assert(Kind == KindTy::Immediate && "Invalid type access!"); return Imm.Val; @@ -1091,6 +1133,9 @@ struct RISCVOperand final : public MCParsedAsmOperand { case KindTy::SpecialCapRegister: OS << "'; break; + case KindTy::CheriSystemRegister: + OS << "'; + break; case KindTy::VType: OS << " createCheriSysReg(StringRef Str, SMLoc S, + unsigned Encoding) { + auto Op = std::make_unique(KindTy::CheriSystemRegister); + Op->CheriSysReg.Data = Str.data(); + Op->CheriSysReg.Length = Str.size(); + Op->CheriSysReg.Encoding = Encoding; + Op->StartLoc = S; + return Op; + } + static std::unique_ptr createFRMArg(RISCVFPRndMode::RoundingMode FRM, SMLoc S) { auto Op = std::make_unique(KindTy::FRM); @@ -1252,6 +1307,10 @@ struct RISCVOperand final : public MCParsedAsmOperand { Inst.addOperand(MCOperand::createImm(Imm)); } + void addCSetBndImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addFenceArgOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createImm(Fence.Val)); @@ -1267,6 +1326,11 @@ struct RISCVOperand final : public MCParsedAsmOperand { Inst.addOperand(MCOperand::createImm(SpecialCapReg.Encoding)); } + void addCheriCSRSystemRegisterOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createImm(CheriSysReg.Encoding)); + } + // Support non-canonical syntax: // "vsetivli rd, uimm, 0xabc" or "vsetvli rd, rs1, 0xabc" // "vsetivli rd, uimm, (0xc << N)" or "vsetvli rd, rs1, (0xc << N)" @@ -1621,6 +1685,11 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, "capability register name or an integer " "in the range"); } + case Match_InvalidCheriCSRSystemRegister: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, + "operand must be a valid cheri system register name"); + } case Match_InvalidLoadFPImm: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); return Error(ErrorLoc, "operand must be a valid floating-point constant"); @@ -1674,6 +1743,11 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidRnumArg: { return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10); } + case Match_InvalidCSetBndImm: { + const SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, "immediate must be an integer in range [0, 31] " + "or be a multiple of 16 in the range [0, 496]"); + } } llvm_unreachable("Unknown match type detected!"); @@ -1906,6 +1980,12 @@ ParseStatus RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) { if (CE) { int64_t Imm = CE->getValue(); if (isUInt<12>(Imm)) { + auto CheriSysReg = RISCVCheriSysReg::lookupCheriSysRegByEncoding(Imm); + if (CheriSysReg && STI->hasFeature(RISCV::FeatureCapMode)) { + Operands.push_back( + RISCVOperand::createCheriSysReg(CheriSysReg->Name, S, Imm)); + return MatchOperand_Success; + } auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm); // Accept an immediate representing a named or un-named Sys Reg // if the range is valid, regardless of the required features. @@ -1945,6 +2025,13 @@ ParseStatus RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) { return ParseStatus::Failure; } + auto CheriSysReg = RISCVCheriSysReg::lookupCheriSysRegByName(Identifier); + if (CheriSysReg) { + Operands.push_back(RISCVOperand::createCheriSysReg( + Identifier, S, CheriSysReg->Encoding)); + return MatchOperand_Success; + } + auto SysReg = RISCVSysReg::lookupSysRegByName(Identifier); if (!SysReg) if ((SysReg = RISCVSysReg::lookupSysRegByDeprecatedName(Identifier))) @@ -2128,6 +2215,17 @@ ParseStatus RISCVAsmParser::parseImmediate(OperandVector &Operands) { return ParseStatus::Success; } +ParseStatus RISCVAsmParser::parseCSetBndImmOperand(OperandVector &Operands) { + if (getLexer().getKind() == AsmToken::Identifier) { + StringRef Name = getLexer().getTok().getIdentifier(); + MCRegister Reg = matchRegisterNameHelper(isRVE(), Name); + if (Reg.isValid()) + return ParseStatus::NoMatch; + } + + return parseImmediate(Operands); +} + ParseStatus RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) { SMLoc S = getLoc(); SMLoc E; @@ -3031,9 +3129,10 @@ bool RISCVAsmParser::parseDirectiveOption() { if (Parser.parseEOL()) return true; - if (!getSTI().hasFeature(RISCV::FeatureCheri)) + if (!(getSTI().hasFeature(RISCV::FeatureCheri) || + getSTI().hasFeature(RISCV::FeatureStdExtY))) return Error(Parser.getTok().getLoc(), - "option requires 'xcheri' extension"); + "option requires 'xcheri' or 'y' extension"); getTargetStreamer().emitDirectiveOptionCapMode(); setFeatureBits(RISCV::FeatureCapMode, "cap-mode"); @@ -3044,9 +3143,10 @@ bool RISCVAsmParser::parseDirectiveOption() { if (Parser.parseEOL()) return true; - if (!getSTI().hasFeature(RISCV::FeatureCheri)) + if (!(getSTI().hasFeature(RISCV::FeatureCheri) || + getSTI().hasFeature(RISCV::FeatureStdExtY))) return Error(Parser.getTok().getLoc(), - "option requires 'xcheri' extension"); + "option requires 'xcheri' or 'y' extension"); getTargetStreamer().emitDirectiveOptionNoCapMode(); clearFeatureBits(RISCV::FeatureCapMode, "cap-mode"); @@ -3655,9 +3755,9 @@ void RISCVAsmParser::emitCapLoadLocalCap(MCInst &Inst, SMLoc IDLoc, // CINCOFFSET cdest, cdest, %pcrel_lo(TmpLabel) MCOperand DestReg = Inst.getOperand(0); const MCExpr *Symbol = Inst.getOperand(1).getExpr(); - emitAuipccInstPair(DestReg, DestReg, Symbol, - RISCVMCExpr::VK_RISCV_PCREL_HI, - RISCV::CIncOffsetImm, IDLoc, Out); + const bool HasRVY = STI->hasFeature(RISCV::FeatureStdExtY); + emitAuipccInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_PCREL_HI, + HasRVY ? RISCV::ADDIY : RISCV::CIncOffsetImm, IDLoc, Out); } void RISCVAsmParser::emitCapLoadGlobalCap(MCInst &Inst, SMLoc IDLoc, @@ -3670,7 +3770,9 @@ void RISCVAsmParser::emitCapLoadGlobalCap(MCInst &Inst, SMLoc IDLoc, // CLC cdest, %pcrel_lo(TmpLabel)(cdest) MCOperand DestReg = Inst.getOperand(0); const MCExpr *Symbol = Inst.getOperand(1).getExpr(); - unsigned SecondOpcode = isRV64() ? RISCV::CLC_128 : RISCV::CLC_64; + const bool HasRVY = STI->hasFeature(RISCV::FeatureStdExtY); + unsigned SecondOpcode = + HasRVY ? RISCV::CLY : (isRV64() ? RISCV::CLC_128 : RISCV::CLC_64); emitAuipccInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_GOT_HI, SecondOpcode, IDLoc, Out); } @@ -3701,8 +3803,10 @@ void RISCVAsmParser::emitCapLoadTLSGDCap(MCInst &Inst, SMLoc IDLoc, // CINCOFFSET cdest, cdest, %pcrel_lo(TmpLabel) MCOperand DestReg = Inst.getOperand(0); const MCExpr *Symbol = Inst.getOperand(1).getExpr(); + const bool HasRVY = STI->hasFeature(RISCV::FeatureStdExtY); + const unsigned IncOpc = HasRVY ? RISCV::ADDIY : RISCV::CIncOffsetImm; emitAuipccInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_TLS_GD_HI, - RISCV::CIncOffsetImm, IDLoc, Out); + IncOpc, IDLoc, Out); } bool RISCVAsmParser::checkPseudoCIncOffsetTPRel(MCInst &Inst, diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 95d0cc40b9a79..89858cbf3e74a 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -316,6 +316,28 @@ static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeCSetBndImm(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt<6>(Imm) && "Invalid immediate"); + const bool Shift = Imm & (1 << 5); + if (Shift) + Imm = (Imm & ~(1 << 5)) << 4; + Inst.addOperand(MCOperand::createImm(Imm)); + return MCDisassembler::Success; +} + +static DecodeStatus decodeCheriSysReg(MCInst &Inst, uint64_t Imm, + int64_t Address, + const MCDisassembler *Decoder) { + assert(isUInt<12>(Imm) && "Invalid immediate"); + const auto *CheriSysReg = RISCVCheriSysReg::lookupCheriSysRegByEncoding(Imm); + if (!CheriSysReg) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createImm(Imm)); + return MCDisassembler::Success; +} + template static DecodeStatus decodeUImmOperand(MCInst &Inst, uint32_t Imm, int64_t Address, @@ -576,6 +598,11 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, "RISCV32CapModeOnly_32 table"); TRY_TO_DECODE(!STI.hasFeature(RISCV::Feature64Bit), DecoderTableRISCV32Only_32, "RISCV32Only_32 table"); + TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtY, DecoderTableRVYOnly_32, + "RISCVRVYOnly_32 table"); + TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtY) && + STI.hasFeature(RISCV::FeatureCapMode), + DecoderTableRVYCapModeOnly_32, "RVYCapModeOnly_32 table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureCapMode, DecoderTableCapModeOnly_32, "CapModeOnly_32 table"); TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtZdinx) && diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index d6bac89a838b8..3f3af13b72e32 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -37,6 +37,11 @@ namespace RISCVSpecialCapReg { #include "RISCVGenSearchableTables.inc" } // namespace RISCVSpecialCapReg +namespace RISCVCheriSysReg { +#define GET_CheriSysRegsList_IMPL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCVCheriSysReg + namespace RISCVInsnOpcode { #define GET_RISCVOpcodesList_IMPL #include "RISCVGenSearchableTables.inc" diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 9d2a8ff5a5f3b..de14f8ae4a768 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -378,11 +378,14 @@ struct SysReg { // unsigned Number; FeatureBitset FeaturesRequired; bool isRV32Only; + bool isDisabledInCapMode; bool haveRequiredFeatures(const FeatureBitset &ActiveFeatures) const { // Not in 32-bit mode. if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit]) return false; + if (isDisabledInCapMode && ActiveFeatures[RISCV::FeatureCapMode]) + return false; // No required feature associated with the system register. if (FeaturesRequired.none()) return true; @@ -417,6 +420,18 @@ struct SpecialCapReg { #include "RISCVGenSearchableTables.inc" } // end namespace RISCVSpecialCapReg +namespace RISCVCheriSysReg { + +struct CheriSysReg { + const char *Name; + unsigned Encoding; +}; + +#define GET_CheriSysRegsList_DECL +#include "RISCVGenSearchableTables.inc" +#undef GET_CheriSysRegsList_DECL +} // end namespace RISCVCheriSysReg + namespace RISCVInsnOpcode { struct RISCVOpcode { const char *Name; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 95fdd41401cd6..dfe587be41b46 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -141,6 +141,18 @@ void RISCVInstPrinter::printSpecialCapRegister(const MCInst *MI, unsigned OpNo, O << Imm; } +void RISCVInstPrinter::printCheriCSRSystemRegister(const MCInst *MI, + unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNo).getImm(); + auto CheriSysReg = RISCVCheriSysReg::lookupCheriSysRegByEncoding(Imm); + if (CheriSysReg) + O << CheriSysReg->Name; + else + O << Imm; +} + void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -287,3 +299,9 @@ const char *RISCVInstPrinter::getRegisterName(MCRegister Reg) { return getRegisterName(Reg, ArchRegNames ? RISCV::NoRegAltName : RISCV::ABIRegAltName); } + +void RISCVInstPrinter::printCSetBndImm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + printOperand(MI, OpNo, STI, O); +} diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h index 5124d12a579af..8301a30d94420 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h @@ -38,6 +38,8 @@ class RISCVInstPrinter : public MCInstPrinter { const MCSubtargetInfo &STI, raw_ostream &O); void printSpecialCapRegister(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printCheriCSRSystemRegister(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printFenceArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, @@ -54,6 +56,8 @@ class RISCVInstPrinter : public MCInstPrinter { raw_ostream &O); void printSpimm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printCSetBndImm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); // Autogenerated by tblgen. std::pair getMnemonic(const MCInst *MI) override; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 279a3bd5de154..2d7748dfd0978 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -89,6 +89,10 @@ class RISCVMCCodeEmitter : public MCCodeEmitter { SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + unsigned getCSetBndImmOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getVMaskReg(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; @@ -254,8 +258,9 @@ void RISCVMCCodeEmitter::expandCIncOffsetTPRel( 0, Dummy, MCFixupKind(RISCV::fixup_riscv_relax), MI.getLoc())); } + const bool HasRVY = STI.hasFeature(RISCV::FeatureStdExtY); // Emit a normal CIncOffset instruction with the given operands. - MCInst TmpInst = MCInstBuilder(RISCV::CIncOffset) + MCInst TmpInst = MCInstBuilder(HasRVY ? RISCV::ADDY : RISCV::CIncOffset) .addOperand(DestReg) .addOperand(TPReg) .addOperand(SrcReg); @@ -566,6 +571,18 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, return 0; } +unsigned +RISCVMCCodeEmitter::getCSetBndImmOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + unsigned Imm = getImmOpValue(MI, OpNo, Fixups, STI); + if (Imm > 31) { + Imm = Imm >> 4; + Imm |= (1 << 5); + } + return Imm; +} + unsigned RISCVMCCodeEmitter::getVMaskReg(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index 074baaaf83c65..0cbcfd6e50b03 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -111,7 +111,8 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, // expanded instructions for each pseudo is correct in the Size field of the // tablegen definition for the pseudo. const auto &Subtarget = MBB.getParent()->getSubtarget(); - MVT CLenVT = Subtarget.hasCheri() ? Subtarget.typeForCapabilities() : MVT(); + MVT CLenVT = + Subtarget.hasCheriOrStdExtY() ? Subtarget.typeForCapabilities() : MVT(); switch (MBBI->getOpcode()) { case RISCV::PseudoAtomicLoadNand32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, MVT::i32, @@ -506,7 +507,42 @@ static unsigned getSCForRMWCap128(bool PtrIsCap, AtomicOrdering Ordering) { } } -static unsigned getLRForRMW(bool PtrIsCap, AtomicOrdering Ordering, MVT VT) { +static unsigned getLRForRMWCapRVY(bool PtrIsCap, AtomicOrdering Ordering) { + switch (Ordering) { + default: + llvm_unreachable("Unexpected AtomicOrdering"); + case AtomicOrdering::Monotonic: + return PtrIsCap ? RISCV::CLR_Y : RISCV::LR_Y; + case AtomicOrdering::Acquire: + return PtrIsCap ? RISCV::CLR_Y_AQ : RISCV::LR_Y_AQ; + case AtomicOrdering::Release: + return PtrIsCap ? RISCV::CLR_Y_RL : RISCV::LR_Y_RL; + case AtomicOrdering::AcquireRelease: + return PtrIsCap ? RISCV::CLR_Y_AQ : RISCV::LR_Y_AQ; + case AtomicOrdering::SequentiallyConsistent: + return PtrIsCap ? RISCV::CLR_Y_AQ_RL : RISCV::LR_Y_AQ_RL; + } +} + +static unsigned getSCForRMWCapRVY(bool PtrIsCap, AtomicOrdering Ordering) { + switch (Ordering) { + default: + llvm_unreachable("Unexpected AtomicOrdering"); + case AtomicOrdering::Monotonic: + return PtrIsCap ? RISCV::CSC_Y : RISCV::SC_Y; + case AtomicOrdering::Acquire: + return PtrIsCap ? RISCV::CSC_Y_AQ : RISCV::SC_Y_AQ; + case AtomicOrdering::Release: + return PtrIsCap ? RISCV::CSC_Y : RISCV::SC_Y; + case AtomicOrdering::AcquireRelease: + return PtrIsCap ? RISCV::CSC_Y_AQ : RISCV::SC_Y_AQ; + case AtomicOrdering::SequentiallyConsistent: + return PtrIsCap ? RISCV::CSC_Y_AQ_RL : RISCV::SC_Y_AQ_RL; + } +} + +static unsigned getLRForRMW(bool PtrIsCap, AtomicOrdering Ordering, MVT VT, + bool HasRVY) { if (VT == MVT::i8) return getLRForRMW8(PtrIsCap, Ordering); if (VT == MVT::i16) @@ -515,6 +551,8 @@ static unsigned getLRForRMW(bool PtrIsCap, AtomicOrdering Ordering, MVT VT) { return getLRForRMW32(PtrIsCap, Ordering); if (VT == MVT::i64) return getLRForRMW64(PtrIsCap, Ordering); + if (VT.isCapability() && HasRVY) + return getLRForRMWCapRVY(PtrIsCap, Ordering); if (VT == MVT::c64) return getLRForRMWCap64(PtrIsCap, Ordering); if (VT == MVT::c128) @@ -522,7 +560,8 @@ static unsigned getLRForRMW(bool PtrIsCap, AtomicOrdering Ordering, MVT VT) { llvm_unreachable("Unexpected LR type\n"); } -static unsigned getSCForRMW(bool PtrIsCap, AtomicOrdering Ordering, MVT VT) { +static unsigned getSCForRMW(bool PtrIsCap, AtomicOrdering Ordering, MVT VT, + bool HasRVY) { if (VT == MVT::i8) return getSCForRMW8(PtrIsCap, Ordering); if (VT == MVT::i16) @@ -531,6 +570,8 @@ static unsigned getSCForRMW(bool PtrIsCap, AtomicOrdering Ordering, MVT VT) { return getSCForRMW32(PtrIsCap, Ordering); if (VT == MVT::i64) return getSCForRMW64(PtrIsCap, Ordering); + if (VT.isCapability() && HasRVY) + return getSCForRMWCapRVY(PtrIsCap, Ordering); if (VT == MVT::c64) return getSCForRMWCap64(PtrIsCap, Ordering); if (VT == MVT::c128) @@ -550,12 +591,13 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, Register IncrReg = MI.getOperand(3).getReg(); AtomicOrdering Ordering = static_cast(MI.getOperand(4).getImm()); + const RISCVSubtarget &ST = + ThisMBB->getParent()->getSubtarget(); Register ScratchIntReg; Register DestIntReg; if (VT.isFatPointer()) { - MachineFunction *MF = ThisMBB->getParent(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); IncrReg = TRI->getSubReg(IncrReg, RISCV::sub_cap_addr); ScratchIntReg = TRI->getSubReg(ScratchReg, RISCV::sub_cap_addr); DestIntReg = TRI->getSubReg(DestReg, RISCV::sub_cap_addr); @@ -563,13 +605,15 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, ScratchIntReg = ScratchReg; DestIntReg = DestReg; } + const bool HasRVY = ST.hasFeature(RISCV::FeatureStdExtY); // .loop: // lr.[w|d] dest, (addr) // binop scratch, dest, val // sc.[w|d] scratch, scratch, (addr) // bnez scratch, loop - BuildMI(LoopMBB, DL, TII->get(getLRForRMW(PtrIsCap, Ordering, VT)), DestReg) + BuildMI(LoopMBB, DL, TII->get(getLRForRMW(PtrIsCap, Ordering, VT, HasRVY)), + DestReg) .addReg(AddrReg); switch (BinOp) { default: @@ -582,7 +626,8 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, break; case AtomicRMWInst::Add: if (VT.isFatPointer()) { - BuildMI(LoopMBB, DL, TII->get(RISCV::CIncOffset), ScratchReg) + BuildMI(LoopMBB, DL, TII->get(HasRVY ? RISCV::ADDY : RISCV::CIncOffset), + ScratchReg) .addReg(DestReg) .addReg(IncrReg); break; @@ -625,10 +670,11 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, break; } if (VT.isFatPointer() && BinOp != AtomicRMWInst::Add) - BuildMI(LoopMBB, DL, TII->get(RISCV::CSetAddr), ScratchReg) + BuildMI(LoopMBB, DL, TII->get(HasRVY ? RISCV::YADDRW : RISCV::CSetAddr), + ScratchReg) .addReg(DestReg) .addReg(ScratchIntReg); - BuildMI(LoopMBB, DL, TII->get(getSCForRMW(PtrIsCap, Ordering, VT)), + BuildMI(LoopMBB, DL, TII->get(getSCForRMW(PtrIsCap, Ordering, VT, HasRVY)), ScratchIntReg) .addReg(AddrReg) .addReg(ScratchReg); @@ -906,17 +952,19 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( ScratchIntReg = ScratchReg; IncrIntReg = IncrReg; } + const bool HasRVY = MF->getSubtarget().hasFeature(RISCV::FeatureStdExtY); // // .loophead: // lr.[b|h] dest, (addr) // mv scratch, dest // ifnochangeneeded scratch, incr, .looptail - BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(PtrIsCap, Ordering, VT)), - DestReg) + BuildMI(LoopHeadMBB, DL, + TII->get(getLRForRMW(PtrIsCap, Ordering, VT, HasRVY)), DestReg) .addReg(AddrReg); if (VT.isFatPointer()) - BuildMI(LoopHeadMBB, DL, TII->get(RISCV::CMove), ScratchReg) + BuildMI(LoopHeadMBB, DL, TII->get(HasRVY ? RISCV::YMV : RISCV::CMove), + ScratchReg) .addReg(DestReg); else BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), ScratchReg) @@ -957,7 +1005,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( // .loopifbody: // mv scratch, incr if (VT.isFatPointer()) - BuildMI(LoopIfBodyMBB, DL, TII->get(RISCV::CMove), ScratchReg) + BuildMI(LoopIfBodyMBB, DL, TII->get(HasRVY ? RISCV::YMV : RISCV::CMove), + ScratchReg) .addReg(DestReg); else BuildMI(LoopIfBodyMBB, DL, TII->get(RISCV::ADDI), ScratchReg) @@ -967,7 +1016,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( // .looptail: // sc.[b|h] scratch, scratch, (addr) // bnez scratch, loop - BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(PtrIsCap, Ordering, VT)), + BuildMI(LoopTailMBB, DL, + TII->get(getSCForRMW(PtrIsCap, Ordering, VT, HasRVY)), ScratchIntReg) .addReg(AddrReg) .addReg(ScratchReg); @@ -1083,6 +1133,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( AtomicOrdering Ordering = static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); + const bool HasRVY = MF->getSubtarget().hasFeature(RISCV::FeatureStdExtY); if (!IsMasked) { Register DestIntReg; @@ -1099,8 +1150,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // .loophead: // lr.[w|d] dest, (addr) // bne dest, cmpval, done - BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(PtrIsCap, Ordering, VT)), - DestReg) + BuildMI(LoopHeadMBB, DL, + TII->get(getLRForRMW(PtrIsCap, Ordering, VT, HasRVY)), DestReg) .addReg(AddrReg); BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE)) .addReg(DestIntReg, 0) @@ -1109,8 +1160,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // .looptail: // sc.[w|d] scratch, newval, (addr) // bnez scratch, loophead - BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(PtrIsCap, Ordering, VT)), - ScratchReg) + BuildMI(LoopTailMBB, DL, + TII->get(getSCForRMW(PtrIsCap, Ordering, VT, HasRVY)), ScratchReg) .addReg(AddrReg) .addReg(NewValReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) @@ -1126,7 +1177,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // and scratch, dest, mask // bne scratch, cmpval, done Register MaskReg = MI.getOperand(5).getReg(); - BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(false, Ordering, VT)), + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(false, Ordering, VT, HasRVY)), DestReg) .addReg(AddrReg); BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg) @@ -1145,7 +1196,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // bnez scratch, loophead insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg, MaskReg, ScratchReg); - BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(false, Ordering, VT)), + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(false, Ordering, VT, HasRVY)), ScratchReg) .addReg(AddrReg) .addReg(ScratchReg); diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 56716e6e6d5d0..78d1db239773c 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -228,19 +228,21 @@ bool RISCVExpandPseudo::expandAuipccInstPair( bool RISCVExpandPseudo::expandCapLoadLocalCap( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { + const auto &STI = MBB.getParent()->getSubtarget(); + const bool HasRVY = STI.hasFeature(RISCV::FeatureStdExtY); return expandAuipccInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_PCREL_HI, - RISCV::CIncOffsetImm); + HasRVY ? RISCV::ADDIY : RISCV::CIncOffsetImm); } bool RISCVExpandPseudo::expandCapLoadGlobalCap( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { - MachineFunction *MF = MBB.getParent(); - - const auto &STI = MF->getSubtarget(); - unsigned SecondOpcode = STI.is64Bit() ? RISCV::CLC_128 : RISCV::CLC_64; + const auto &STI = MBB.getParent()->getSubtarget(); + const bool HasRVY = STI.hasFeature(RISCV::FeatureStdExtY); + unsigned LoadCapOpc = + HasRVY ? RISCV::CLY : (STI.is64Bit() ? RISCV::CLC_128 : RISCV::CLC_64); return expandAuipccInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_GOT_HI, - SecondOpcode); + LoadCapOpc); } bool RISCVExpandPseudo::expandCapLoadTLSIEAddress( @@ -257,8 +259,11 @@ bool RISCVExpandPseudo::expandCapLoadTLSIEAddress( bool RISCVExpandPseudo::expandCapLoadTLSGDCap( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { + const auto &STI = MBB.getParent()->getSubtarget(); + const bool HasRVY = STI.hasFeature(RISCV::FeatureStdExtY); + const unsigned IncOpc = HasRVY ? RISCV::ADDIY : RISCV::CIncOffsetImm; return expandAuipccInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GD_HI, - RISCV::CIncOffsetImm); + IncOpc); } bool RISCVExpandPseudo::expandCGetAddr(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index c5cdfe69265ff..595aec3fc1f32 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -701,8 +701,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + const bool HasRVY = STI.hasFeature(RISCV::FeatureStdExtY); if (RISCVABI::isCheriPureCapABI(STI.getTargetABI())) - BuildMI(MBB, MBBI, DL, TII->get(RISCV::CSetAddr), SPReg) + BuildMI(MBB, MBBI, DL, + TII->get(HasRVY ? RISCV::YADDRW : RISCV::CSetAddr), SPReg) .addReg(SPReg) .addReg(SPAddrDstReg); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 48728d8d6c6d3..5928422244f57 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2405,12 +2405,16 @@ bool RISCVDAGToDAGISel::SelectRegImmCommon(SDValue Addr, SDValue &Base, // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { int64_t Adj = CVal < 0 ? -2048 : 2047; - Base = - SDValue(CurDAG->getMachineNode( - PtrVT.isFatPointer() ? RISCV::CIncOffsetImm : RISCV::ADDI, - DL, PtrVT, Addr.getOperand(0), - CurDAG->getTargetConstant(Adj, DL, XLenVT)), - 0); + const bool HasRVY = Subtarget->hasFeature(RISCV::FeatureStdExtY); + unsigned Opc; + if (PtrVT.isFatPointer()) + Opc = HasRVY ? RISCV::ADDIY : RISCV::CIncOffsetImm; + else + Opc = RISCV::ADDI; + Base = SDValue( + CurDAG->getMachineNode(Opc, DL, PtrVT, Addr.getOperand(0), + CurDAG->getTargetConstant(Adj, DL, XLenVT)), + 0); Offset = CurDAG->getTargetConstant(CVal - Adj, DL, XLenVT); return true; } @@ -2452,6 +2456,25 @@ bool RISCVDAGToDAGISel::SelectCapRegImm(SDValue Cap, SDValue &Base, Subtarget->typeForCapabilities(), false); } +bool RISCVDAGToDAGISel::SelectCSetBndImm(SDValue N, SDValue &Val) { + const ConstantSDNode *C = dyn_cast(N); + if (!C) + return false; + + uint64_t Imm = C->getZExtValue(); + // The csetboundsimm has 5 bits which can optionally be shifted by 4. + if (Imm > 31) { + if (Imm % 16 != 0) + return false; + if (Imm / 16 > 31) + return false; + } + + SDLoc dl(N); + Val = CurDAG->getTargetConstant(Imm, dl, MVT::i32); + return true; +} + bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt) { ShAmt = N; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index b1dd5c825105a..80b5eeaad72ce 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -61,6 +61,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { return SelectAddrRegImm(Addr, Base, Offset, true); } bool SelectCapRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectCSetBndImm(SDValue N, SDValue &Val); bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 62cefffebc417..2c1ed63c1c314 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -146,7 +146,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass); } - if (Subtarget.hasCheri()) { + if (Subtarget.hasCheriOrStdExtY()) { CapType = Subtarget.typeForCapabilities(); NullCapabilityRegister = RISCV::C0; addRegisterClass(CapType, &RISCV::GPCRRegClass); @@ -248,7 +248,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // TODO: add all necessary setOperationAction calls. setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); - if (Subtarget.hasCheri()) + if (Subtarget.hasCheriOrStdExtY()) setOperationAction(ISD::DYNAMIC_STACKALLOC, CapType, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -542,7 +542,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.is64Bit()) setOperationAction(ISD::Constant, MVT::i64, Custom); - if (Subtarget.hasCheri()) { + if (Subtarget.hasCheriOrStdExtY()) { MVT CLenVT = Subtarget.typeForCapabilities(); setOperationAction(ISD::BR_CC, CLenVT, Expand); setOperationAction(ISD::SELECT, CLenVT, Custom); @@ -573,7 +573,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Some CHERI intrinsics return i1, which isn't legal, so we have to custom // lower them in the DAG combine phase before the first type legalization // pass. - if (Subtarget.hasCheri()) + if (Subtarget.hasCheriOrStdExtY()) setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); if (Subtarget.hasStdExtZicbop()) { @@ -587,7 +587,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, else setMinCmpXchgSizeInBits(32); - if (Subtarget.hasCheri()) + if (Subtarget.hasCheriOrStdExtY()) SupportsAtomicCapabilityOperations = true; } else if (Subtarget.hasForcedAtomics()) { setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); @@ -5949,8 +5949,10 @@ SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, DAG.getMachineNode(RISCV::PseudoCIncOffsetTPRel, DL, Ty, TPReg, MNHi, AddrCIncOffset), 0); + const bool HasRVY = Subtarget.hasFeature(RISCV::FeatureStdExtY); return SDValue( - DAG.getMachineNode(RISCV::CIncOffsetImm, DL, Ty, MNAdd, AddrLo), + DAG.getMachineNode(HasRVY ? RISCV::ADDIY : RISCV::CIncOffsetImm, DL, Ty, + MNAdd, AddrLo), 0); } @@ -7470,6 +7472,27 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, switch (IntNo) { default: break; // Don't custom lower most intrinsics. + case Intrinsic::cheri_cap_flags_get: + if (Subtarget.hasStdExtZYHybrid() || Subtarget.hasCheri()) + break; + return DAG.getConstant(0, DL, Subtarget.getXLenVT()); + case Intrinsic::cheri_cap_flags_set: + if (Subtarget.hasStdExtZYHybrid() || Subtarget.hasCheri()) + break; + return Op.getOperand(1); + case Intrinsic::cheri_cap_type_copy: + case Intrinsic::cheri_cap_unseal: + case Intrinsic::cheri_cap_seal: + case Intrinsic::cheri_cap_conditional_seal: + case Intrinsic::cheri_cap_tag_clear: + if (!Subtarget.hasStdExtY() || Subtarget.hasCheri()) + break; + return Op.getOperand(1); + // can't get ddc if zyhybrid isn't available. + case Intrinsic::cheri_ddc_get: + if (Subtarget.hasStdExtZYHybrid() || Subtarget.hasCheri()) + break; + return DAG.getNullCapability(DL); case Intrinsic::cheri_cap_from_pointer: // Expand CFromPtr since the dedicated instruction has been removed. return emitCFromPtrReplacement(DAG, DL, Op.getOperand(1), Op.getOperand(2), @@ -14285,8 +14308,9 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, StoreOpcode = RISCV::SW_DDC; AddOpcode = RISCV::ADDI; } else { + const bool HasRVY = MF.getSubtarget().hasFeature(RISCV::FeatureStdExtY); StoreOpcode = RISCV::SW_CAP; - AddOpcode = RISCV::CIncOffsetImm; + AddOpcode = HasRVY ? RISCV::ADDIY : RISCV::CIncOffsetImm; } Register TmpReg = MI.getOperand(2).getReg(); @@ -15167,8 +15191,8 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); assert(XLen == 32 || XLen == 64); MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; - MVT CLenVT = Subtarget.hasCheri() ? Subtarget.typeForCapabilities() - : MVT(); + MVT CLenVT = + Subtarget.hasCheriOrStdExtY() ? Subtarget.typeForCapabilities() : MVT(); bool IsPureCap = RISCVABI::isCheriPureCapABI(ABI); MVT PtrVT = IsPureCap ? CLenVT : XLenVT; bool IsPureCapVarArgs = !IsFixed && IsPureCap; @@ -16876,14 +16900,16 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case 'r': // Don't try to split/combine capabilities in order to use a GPR; give a // friendlier error message instead. - if (Subtarget.hasCheri() && VT == Subtarget.typeForCapabilities()) + if (Subtarget.hasCheriOrStdExtY() && + VT == Subtarget.typeForCapabilities()) break; // TODO: Support fixed vectors up to XLen for P extension? if (VT.isVector()) break; return std::make_pair(0U, &RISCV::GPRNoX0RegClass); case 'C': - if (Subtarget.hasCheri() && VT == Subtarget.typeForCapabilities()) + if (Subtarget.hasCheriOrStdExtY() && + VT == Subtarget.typeForCapabilities()) return std::make_pair(0U, &RISCV::GPCRRegClass); break; case 'f': @@ -16950,7 +16976,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); // Similarly, allow capability register ABI names to be used in constraint. - if (Subtarget.hasCheri()) { + if (Subtarget.hasCheriOrStdExtY()) { Register CRegFromAlias = StringSwitch(Constraint.lower()) .Case("{cnull}", RISCV::C0) .Case("{cra}", RISCV::C1) @@ -17210,7 +17236,7 @@ EVT RISCVTargetLowering::getOptimalMemOpType( // capability loads/stores or by making a runtime library call. // We can't use capability stores as an optimisation for memset unless zeroing. bool IsNonZeroMemset = Op.isMemset() && !Op.isZeroMemset(); - if (Subtarget.hasCheri() && !IsNonZeroMemset) { + if (Subtarget.hasCheriOrStdExtY() && !IsNonZeroMemset) { unsigned CapSize = Subtarget.typeForCapabilities().getSizeInBits() / 8; if (Op.size() >= CapSize) { Align CapAlign(CapSize); diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsY.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsY.td new file mode 100644 index 0000000000000..e7e3fb13ab3b0 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsY.td @@ -0,0 +1,109 @@ +//===-- RISCVInstrFormatsY.td --------------------------------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V Y extension instruction formats. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Operand definitions. +//===----------------------------------------------------------------------===// + +def CheriCSRSystemRegister : AsmOperandClass { + let Name = "CheriCSRSystemRegister"; + let ParserMethod = "parseCSRSystemRegister"; + let DiagnosticType = "InvalidCheriCSRSystemRegister"; +} + +def cheri_csr_sysreg : Operand { + let ParserMatchClass = CheriCSRSystemRegister; + let PrintMethod = "printCheriCSRSystemRegister"; + let DecoderMethod = "decodeCheriSysReg"; + let OperandType = "OPERAND_UIMM12"; + let OperandNamespace = "RISCVOp"; +} + +def CSetBndImmOperand : AsmOperandClass { + let Name = "CSetBndImm"; + let ParserMethod = "parseCSetBndImmOperand"; + let RenderMethod = "addCSetBndImmOperands"; + let DiagnosticType = "InvalidCSetBndImm"; +} + +def csetbnd_imm : Operand, + ComplexPattern{ + let PrintMethod = "printCSetBndImm"; + let EncoderMethod = "getCSetBndImmOpValue"; + let ParserMatchClass = CSetBndImmOperand; + let DecoderMethod = "DecodeCSetBndImm"; + let MIOperandInfo = (ops i32imm); +} + +//===----------------------------------------------------------------------===// +// Instruction Formats +//===----------------------------------------------------------------------===// + +// Like an RVInstR, except rs2 is now an additional function code. +class RVInstZCheriSrcDst funct7, bits<5> funct5, bits<3> funct3, + RISCVOpcode opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst { + bits<5> rs1; + bits<5> rd; + + let Inst{31-25} = funct7; + let Inst{24-20} = funct5; + let Inst{19-15} = rs1; + let Inst{14-12} = funct3; + let Inst{11-7} = rd; + let Inst{6-0} = opcode.Value; +} + +class RVInstCheriSetBoundsImmFmt funct6, bits<3> funct3, + RISCVOpcode opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst { + + bits<5> rd; + bits<5> rs1; + bits<6> imm; + + let Inst{31-26} = funct6; + let Inst{25} = imm{5}; + let Inst{24-20} = imm{4-0}; + let Inst{19-15} = rs1; + let Inst{14-12} = funct3; + let Inst{11-7} = rd; + let Inst{6-0} = opcode.Value; +} + +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +class CheriModeSwitchInstr funct7, RISCVOpcode opcode, string opcodestr> + : RVInst<(outs ), (ins ), opcodestr, "", [], InstFormatR>{ + + let Inst{31-25} = funct7; + let Inst{24-20} = 0x0; + let Inst{19-15} = 0x0; + let Inst{14-12} = 0x1; + let Inst{11-7} = 0x0; + let Inst{6-0} = opcode.Value; +} + +let hasNoSchedulingInfo = 1, + hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +class CheriCSR_ir funct3, string opcodestr, RegisterClass rs1Class=GPCR> + : RVInstI, Sched<[WriteCSR, ReadCSR]>; + +let hasNoSchedulingInfo = 1, + hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +class CheriCSR_ii funct3, string opcodestr> + : RVInstI, Sched<[WriteCSR]>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index fa1eb765b919f..5e94579d461b2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "RISCVInstrInfo.h" +#include "MCTargetDesc/RISCVMCTargetDesc.h" #include "MCTargetDesc/RISCVMatInt.h" #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" @@ -129,6 +130,10 @@ unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case RISCV::LC_128: MemBytes = 16; break; + case RISCV::CLY: + case RISCV::LY: + MemBytes = STI.isRV32() ? 8 : 16; + break; } if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && @@ -177,8 +182,12 @@ unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, break; case RISCV::SC_128: case RISCV::CSC_128: - MemBytes = 16; - break; + MemBytes = 16; + break; + case RISCV::SY: + case RISCV::CSY: + MemBytes = STI.isRV32() ? 8 : 16; + break; } if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && @@ -346,7 +355,9 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .setMIFlag(Flag); return; } else if (RISCV::GPCRRegClass.contains(DstReg, SrcReg)) { - BuildMI(MBB, MBBI, DL, get(RISCV::CMove), DstReg) + unsigned MoveOpc = + STI.hasFeature(RISCV::FeatureStdExtY) ? RISCV::YMV : RISCV::CMove; + BuildMI(MBB, MBBI, DL, get(MoveOpc), DstReg) .addReg(SrcReg, getKillRegState(KillSrc)) .setMIFlag(Flag); return; @@ -576,9 +587,8 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, : RISCV::CSD; IsScalableVector = false; } else if (RISCV::GPCRRegClass.hasSubClassEq(RC)) { - Opcode = TRI->getRegSizeInBits(RISCV::GPCRRegClass) == 64 - ? RISCV::CSC_64 - : RISCV::CSC_128; + Opcode = ST.hasStdExtY() ? RISCV::CSY + : (ST.isRV64() ? RISCV::CSC_128 : RISCV::CSC_64); IsScalableVector = false; } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::CFSW; @@ -595,8 +605,8 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, RISCV::SW : RISCV::SD; IsScalableVector = false; } else if (RISCV::GPCRRegClass.hasSubClassEq(RC)) { - Opcode = TRI->getRegSizeInBits(RISCV::GPCRRegClass) == 64 ? RISCV::SC_64 - : RISCV::SC_128; + Opcode = ST.hasStdExtY() ? RISCV::SY + : (ST.isRV64() ? RISCV::SC_128 : RISCV::SC_64); IsScalableVector = false; } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxSD; @@ -689,9 +699,8 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, : RISCV::CLD; IsScalableVector = false; } else if (RISCV::GPCRRegClass.hasSubClassEq(RC)) { - Opcode = TRI->getRegSizeInBits(RISCV::GPCRRegClass) == 64 - ? RISCV::CLC_64 - : RISCV::CLC_128; + Opcode = ST.hasStdExtY() ? RISCV::CLY + : (ST.isRV64() ? RISCV::CLC_128 : RISCV::CLC_64); IsScalableVector = false; } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::CFLW; @@ -708,8 +717,8 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, RISCV::LW : RISCV::LD; IsScalableVector = false; } else if (RISCV::GPCRRegClass.hasSubClassEq(RC)) { - Opcode = TRI->getRegSizeInBits(RISCV::GPCRRegClass) == 64 ? RISCV::LC_64 - : RISCV::LC_128; + Opcode = ST.hasStdExtY() ? RISCV::LY + : (ST.isRV64() ? RISCV::LC_128 : RISCV::LC_64); IsScalableVector = false; } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxLD; @@ -1420,8 +1429,10 @@ bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { default: break; case RISCV::CMove: + case RISCV::YMV: return true; case RISCV::CIncOffset: + case RISCV::ADDY: // Creating a NULL-derived capability is fast since it's the same as moving // to another register and zeroing the capability metadata. // While incrementing a capability by zero is not quite as fast as a move @@ -1437,6 +1448,7 @@ bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { MI.getOperand(2).getReg() == RISCV::X0) || (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::C0); case RISCV::CIncOffsetImm: + case RISCV::ADDIY: return (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) || (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::C0); case RISCV::FSGNJ_D: @@ -1485,6 +1497,103 @@ RISCVInstrInfo::getAsIntImmediate(const MachineOperand &Op, return std::nullopt; // Unknown immediate } +static bool expandPseudoPCCGet(MachineInstr &MI, MachineBasicBlock *MBB, + const RISCVSubtarget &Subtarget) { + DebugLoc DL = MI.getDebugLoc(); + const RISCVInstrInfo *TII = Subtarget.getInstrInfo(); + Register DstReg = MI.getOperand(0).getReg(); + + // ymodeswy + // auipcc $dst, 0 + // ymodeswi + BuildMI(*MBB, MI, DL, TII->get(RISCV::YMODESWY)); + BuildMI(*MBB, MI, DL, TII->get(RISCV::AUIPCC), DstReg).addImm(0); + BuildMI(*MBB, MI, DL, TII->get(RISCV::YMODESWI)); + + MI.eraseFromParent(); + return true; +} + +static bool expandLoadWithExplicitCap(MachineInstr &MI, MachineBasicBlock *MBB, + const RISCVSubtarget &Subtarget, + unsigned LoadOpc) { + DebugLoc DL = MI.getDebugLoc(); + const RISCVInstrInfo *TII = Subtarget.getInstrInfo(); + Register DstReg = MI.getOperand(0).getReg(); + Register AuthCap = MI.getOperand(1).getReg(); + + // ymodeswy + // $dst, 0($cap) + // ymodeswi + BuildMI(*MBB, MI, DL, TII->get(RISCV::YMODESWY)); + BuildMI(*MBB, MI, DL, TII->get(LoadOpc), DstReg).addReg(AuthCap).addImm(0); + BuildMI(*MBB, MI, DL, TII->get(RISCV::YMODESWI)); + + MI.eraseFromParent(); + return true; +} + +static bool expandStoreWithExplicitCap(MachineInstr &MI, MachineBasicBlock *MBB, + const RISCVSubtarget &Subtarget, + unsigned StoreOpc) { + DebugLoc DL = MI.getDebugLoc(); + const RISCVInstrInfo *TII = Subtarget.getInstrInfo(); + Register AuthCap = MI.getOperand(0).getReg(); + Register Value = MI.getOperand(1).getReg(); + + // ymodeswy + // $val, 0($cap) + // ymodeswi + BuildMI(*MBB, MI, DL, TII->get(RISCV::YMODESWY)); + BuildMI(*MBB, MI, DL, TII->get(StoreOpc)) + .addReg(Value) + .addReg(AuthCap) + .addImm(0); + BuildMI(*MBB, MI, DL, TII->get(RISCV::YMODESWI)); + + MI.eraseFromParent(); + return true; +} + +bool RISCVInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + + switch (MI.getOpcode()) { + default: + break; + case RISCV::PseudoPCCGet: + return expandPseudoPCCGet(MI, MBB, STI); + case RISCV::PseudoLB_CAP: + return expandLoadWithExplicitCap(MI, MBB, STI, RISCV::CLB); + case RISCV::PseudoLH_CAP: + return expandLoadWithExplicitCap(MI, MBB, STI, RISCV::CLH); + case RISCV::PseudoLW_CAP: + return expandLoadWithExplicitCap(MI, MBB, STI, RISCV::CLW); + case RISCV::PseudoLBU_CAP: + return expandLoadWithExplicitCap(MI, MBB, STI, RISCV::CLBU); + case RISCV::PseudoLHU_CAP: + return expandLoadWithExplicitCap(MI, MBB, STI, RISCV::CLHU); + case RISCV::PseudoLC_CAP: + return expandLoadWithExplicitCap(MI, MBB, STI, RISCV::CLY); + case RISCV::PseudoLWU_CAP: + return expandLoadWithExplicitCap(MI, MBB, STI, RISCV::CLWU); + case RISCV::PseudoLD_CAP: + return expandLoadWithExplicitCap(MI, MBB, STI, RISCV::CLD); + case RISCV::PseudoSB_CAP: + return expandStoreWithExplicitCap(MI, MBB, STI, RISCV::CSB); + case RISCV::PseudoSH_CAP: + return expandStoreWithExplicitCap(MI, MBB, STI, RISCV::CSH); + case RISCV::PseudoSW_CAP: + return expandStoreWithExplicitCap(MI, MBB, STI, RISCV::CSW); + case RISCV::PseudoSD_CAP: + return expandStoreWithExplicitCap(MI, MBB, STI, RISCV::CSD); + case RISCV::PseudoSC_CAP: + return expandStoreWithExplicitCap(MI, MBB, STI, RISCV::CSY); + } + + return false; +} + bool RISCVInstrInfo::isSetBoundsInstr(const MachineInstr &I, const MachineOperand *&Base, const MachineOperand *&Size) const { @@ -1494,6 +1603,9 @@ bool RISCVInstrInfo::isSetBoundsInstr(const MachineInstr &I, case RISCV::CSetBounds: case RISCV::CSetBoundsExact: case RISCV::CSetBoundsImm: + case RISCV::YBNDSW: + case RISCV::YBNDSRW: + case RISCV::YBNDSIW: Base = &I.getOperand(1); Size = &I.getOperand(2); return true; @@ -1513,6 +1625,8 @@ bool RISCVInstrInfo::isPtrAddInstr(const MachineInstr &I, return false; case RISCV::CIncOffsetImm: case RISCV::CIncOffset: + case RISCV::ADDY: + case RISCV::ADDIY: Base = &I.getOperand(1); Increment = &I.getOperand(2); return true; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 83c24605d1f61..2cf7c0767df88 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -149,6 +149,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { getAsIntImmediate(const MachineOperand &Op, const MachineRegisterInfo &MRI) const override; + bool expandPostRAPseudo(MachineInstr &MI) const override; + std::optional isCopyInstrImpl(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 316dbf76e9b08..6058ec7129c26 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -2013,3 +2013,4 @@ include "RISCVInstrInfoXCV.td" //===----------------------------------------------------------------------===// include "RISCVInstrInfoXCheri.td" +include "RISCVInstrInfoY.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td index 8b0bfd05d2778..34320f920fd61 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td @@ -164,10 +164,11 @@ class LR_C_r funct3, string opcodestr> } multiclass LR_C_r_aq_rl funct3, string opcodestr> { - def _ # clenstr : LR_C_r<0, 0, funct3, opcodestr>; - def _AQ_ # clenstr : LR_C_r<1, 0, funct3, opcodestr # ".aq">; - def _RL_ # clenstr : LR_C_r<0, 1, funct3, opcodestr # ".rl">; - def _AQ_RL_ # clenstr : LR_C_r<1, 1, funct3, opcodestr # ".aqrl">; + defvar optUnderScore = !if(!eq(clenstr, ""), "", "_"); + def optUnderScore # clenstr : LR_C_r<0, 0, funct3, opcodestr>; + def _AQ # optUnderScore # clenstr : LR_C_r<1, 0, funct3, opcodestr # ".aq">; + def _RL # optUnderScore # clenstr : LR_C_r<0, 1, funct3, opcodestr # ".rl">; + def _AQ_RL # optUnderScore # clenstr : LR_C_r<1, 1, funct3, opcodestr # ".aqrl">; } let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in @@ -179,12 +180,13 @@ class AMO_C_rr funct5, bit aq, bit rl, bits<3> funct3, string opcodestr, multiclass AMO_C_rr_aq_rl funct5, bits<3> funct3, string opcodestr, RegisterClass rdClass> { - def _ # clenstr : AMO_C_rr; - def _AQ_ # clenstr : AMO_C_rr; + def _AQ # optUnderScore # clenstr : AMO_C_rr; + def _RL # optUnderScore # clenstr : AMO_C_rr; - def _RL_ # clenstr : AMO_C_rr; - def _AQ_RL_ # clenstr : AMO_C_rr; } @@ -269,10 +271,11 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, multiclass CLR_C_r_aq_rl funct3, string opcodestr, string Namespace = "CapModeOnly_"> { - defm _ # clenstr : CLR_C_r<0, 0, funct3, opcodestr, Namespace>; - defm _AQ_ # clenstr : CLR_C_r<1, 0, funct3, opcodestr # ".aq", Namespace>; - defm _RL_ # clenstr : CLR_C_r<0, 1, funct3, opcodestr # ".rl", Namespace>; - defm _AQ_RL_ # clenstr : CLR_C_r<1, 1, funct3, opcodestr # ".aqrl", Namespace>; + defvar optUnderScore = !if(!eq(clenstr, ""), "", "_"); + defm optUnderScore # clenstr : CLR_C_r<0, 0, funct3, opcodestr, Namespace>; + defm _AQ # optUnderScore # clenstr : CLR_C_r<1, 0, funct3, opcodestr # ".aq", Namespace>; + defm _RL # optUnderScore # clenstr : CLR_C_r<0, 1, funct3, opcodestr # ".rl", Namespace>; + defm _AQ_RL # optUnderScore # clenstr : CLR_C_r<1, 1, funct3, opcodestr # ".aqrl", Namespace>; } multiclass CAMO_C_rr funct5, bit aq, bit rl, bits<3> funct3, @@ -291,14 +294,15 @@ multiclass CAMO_C_rr funct5, bit aq, bit rl, bits<3> funct3, multiclass CAMO_C_rr_aq_rl funct5, bits<3> funct3, string opcodestr, RegisterClass rdClass, string Namespace = "CapModeOnly_"> { - defm _ # clenstr : CAMO_C_rr; - defm _AQ_ # clenstr : CAMO_C_rr; - defm _RL_ # clenstr : CAMO_C_rr; - defm _AQ_RL_ # clenstr : CAMO_C_rr; + defvar optUnderScore = !if(!eq(clenstr, ""), "", "_"); + defm optUnderScore # clenstr : CAMO_C_rr; + defm _AQ # optUnderScore # clenstr : CAMO_C_rr; + defm _RL # optUnderScore # clenstr : CAMO_C_rr; + defm _AQ_RL # optUnderScore # clenstr : CAMO_C_rr; } let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in @@ -348,7 +352,7 @@ def CGetHigh : Cheri_r<0x17, "cgethigh">; def : MnemonicAlias<"gchi", "cgethigh">; // For backwards compatibility we still accept cgetaddr from assembly. let mayStore = false, mayLoad = false, Size = 4, isCodeGenOnly = false, -hasSideEffects = false in +hasSideEffects = false, Predicates = [HasCheriOrRVY] in def PseudoCGetAddr : Pseudo<(outs GPR:$rd), (ins GPCR:$cs1), [], "cgetaddr", "$rd, $cs1">; } @@ -717,7 +721,7 @@ defm AMOSWAP_C : AMO_C_rr_aq_rl<"128", 0b00001, 0b100, "amoswap.c", GPCR>; let DecoderNamespace = "CapModeOnly_" in { let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { -let Predicates = [HasCheri, IsCapMode] in { +let Predicates = [HasCheriOrRVY, IsCapMode] in { def AUIPCC : RVInstU; @@ -729,7 +733,7 @@ let isCall = 1 in def CJALR : RVInstI<0b000, OPC_JALR, (outs GPCR:$rd), (ins GPCR:$rs1, simm12:$imm12), "jalr", "$rd, ${imm12}(${rs1})">; -} // Predicates = [HasCheri, IsCapMode] +} // Predicates = [HasCheriOrRVY, IsCapMode] } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 } // DecoderNameSpace = "CapModeOnly_" @@ -739,7 +743,7 @@ multiclass CPrefixedInstAlias { def : InstAlias<"c" # Asm, Result, 0>; } -let Predicates = [HasCheri, IsCapMode] in { +let Predicates = [HasCheriOrRVY, IsCapMode] in { // "cj" is the non-canonical form, but we provide this for completeness. defm: CPrefixedInstAlias<"j $offset", (CJAL C0, simm21_lsb0_jal:$offset), 1>; defm: CPrefixedInstAlias<"jal $offset", (CJAL C1, simm21_lsb0_jal:$offset), 1>; @@ -759,10 +763,10 @@ defm: CPrefixedInstAlias<"ret", (CJALR C0, C1, 0), defm: CPrefixedInstAlias<"jr $rs, $offset", (CJALR C0, GPCR:$rs, simm12:$offset), 0>; defm: CPrefixedInstAlias<"jalr $rs, $offset", (CJALR C1, GPCR:$rs, simm12:$offset), 0>; defm: CPrefixedInstAlias<"jalr $rd, $rs, $offset", (CJALR GPCR:$rd, GPCR:$rs, simm12:$offset), 0>; -} // Predicates = [HasCheri, IsCapMode] +} // Predicates = [HasCheriOrRVY, IsCapMode] -let Predicates = [HasCheri, IsCapMode] in { +let Predicates = [HasCheriOrRVY, IsCapMode] in { defm CLB : CheriLoad_ri<0b000, "lb">; defm CLH : CheriLoad_ri<0b001, "lh">; defm CLW : CheriLoad_ri<0b010, "lw">; @@ -772,13 +776,13 @@ defm CLHU : CheriLoad_ri<0b101, "lhu">; defm CSB : CheriStore_ri<0b000, "sb">; defm CSH : CheriStore_ri<0b001, "sh">; defm CSW : CheriStore_ri<0b010, "sw">; -} // Predicates = [HasCheri, IsCapMode] +} // Predicates = [HasCheriOrRVY, IsCapMode] -let Predicates = [HasCheri, IsRV64, IsCapMode] in { +let Predicates = [HasCheriOrRVY, IsRV64, IsCapMode] in { defm CLWU : CheriLoad_ri<0b110, "lwu">; defm CLD : CheriLoad_ri<0b011, "ld">; defm CSD : CheriStore_ri<0b011, "sd">; -} // Predicates = [HasCheri, IsRV64, IsCapMode] +} // Predicates = [HasCheriOrRVY, IsRV64, IsCapMode] let Predicates = [HasCheri, IsRV32, IsCapMode] in { let DecoderNamespace = "RISCV32CapModeOnly_", @@ -826,7 +830,7 @@ defm : CPrefixedInstAlias<"sc $rs2, (${rs1})", (CSC_128 GPCR:$rs2, GPCR:$rs1, 0)>; } -let Predicates = [HasCheri, IsCapMode] in { +let Predicates = [HasCheriOrRVY, IsCapMode] in { defm : CPrefixedInstAlias<"lb $rd, (${rs1})", (CLB GPR:$rd, GPCR:$rs1, 0)>; defm : CPrefixedInstAlias<"lh $rd, (${rs1})", (CLH GPR:$rd, GPCR:$rs1, 0)>; defm : CPrefixedInstAlias<"lw $rd, (${rs1})", (CLW GPR:$rd, GPCR:$rs1, 0)>; @@ -836,17 +840,17 @@ defm : CPrefixedInstAlias<"lhu $rd, (${rs1})", (CLHU GPR:$rd, GPCR:$rs1, 0)>; defm : CPrefixedInstAlias<"sb $rs2, (${rs1})", (CSB GPR:$rs2, GPCR:$rs1, 0)>; defm : CPrefixedInstAlias<"sh $rs2, (${rs1})", (CSH GPR:$rs2, GPCR:$rs1, 0)>; defm : CPrefixedInstAlias<"sw $rs2, (${rs1})", (CSW GPR:$rs2, GPCR:$rs1, 0)>; -} // Predicates = [HasCheri, IsCapMode] +} // Predicates = [HasCheriOrRVY, IsCapMode] -let Predicates = [HasCheri, IsRV64, IsCapMode] in { +let Predicates = [HasCheriOrRVY, IsRV64, IsCapMode] in { defm : CPrefixedInstAlias<"lwu $rd, (${rs1})", (CLWU GPR:$rd, GPCR:$rs1, 0)>; defm : CPrefixedInstAlias<"ld $rd, (${rs1})", (CLD GPR:$rd, GPCR:$rs1, 0)>; defm : CPrefixedInstAlias<"sd $rs2, (${rs1})", (CSD GPR:$rs2, GPCR:$rs1, 0)>; -} // Predicates = [HasCheri, IsRV64, IsCapMode] +} // Predicates = [HasCheriOrRVY, IsRV64, IsCapMode] /// 'A' (Atomic Instructions) extension -let Predicates = [HasCheri, HasStdExtA, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasStdExtA, IsCapMode] in { defm CLR_B : CLR_r_aq_rl<0b000, "lr.b">; defm CSC_B : CAMO_rr_aq_rl<0b00011, 0b000, "sc.b">; @@ -864,9 +868,9 @@ defm CAMOMIN_W : CAMO_rr_aq_rl<0b10000, 0b010, "amomin.w">; defm CAMOMAX_W : CAMO_rr_aq_rl<0b10100, 0b010, "amomax.w">; defm CAMOMINU_W : CAMO_rr_aq_rl<0b11000, 0b010, "amominu.w">; defm CAMOMAXU_W : CAMO_rr_aq_rl<0b11100, 0b010, "amomaxu.w">; -} // Predicates = [HasCheri, HasStdExtA, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasStdExtA, IsCapMode] -let Predicates = [HasCheri, HasStdExtA, IsRV64, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasStdExtA, IsRV64, IsCapMode] in { defm CLR_D : CLR_r_aq_rl<0b011, "lr.d">; defm CSC_D : CAMO_rr_aq_rl<0b00011, 0b011, "sc.d">; defm CAMOSWAP_D : CAMO_rr_aq_rl<0b00001, 0b011, "amoswap.d">; @@ -878,7 +882,7 @@ defm CAMOMIN_D : CAMO_rr_aq_rl<0b10000, 0b011, "amomin.d">; defm CAMOMAX_D : CAMO_rr_aq_rl<0b10100, 0b011, "amomax.d">; defm CAMOMINU_D : CAMO_rr_aq_rl<0b11000, 0b011, "amominu.d">; defm CAMOMAXU_D : CAMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">; -} // Predicates = [HasCheri, HasStdExtA, IsRV64, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasStdExtA, IsRV64, IsCapMode] let Predicates = [HasCheri, HasStdExtA, IsRV32, IsCapMode] in { defm CLR_C : CLR_C_r_aq_rl<"64", 0b011, "lr.c", "RISCV32CapModeOnly_">; @@ -896,7 +900,7 @@ defm CAMOSWAP_C : CAMO_C_rr_aq_rl<"128", 0b00001, 0b100, "amoswap.c", GPCR>; /// 'F' (Single-Precision Floating-Point) extension -let Predicates = [HasCheri, HasStdExtF, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasStdExtF, IsCapMode] in { let DecoderNamespace = "CapModeOnly_", hasSideEffects = 0, mayLoad = 1, mayStore = 0 in def CFLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd), @@ -916,11 +920,11 @@ def : InstAlias<"cflw $rd, ${imm12}(${rs1})", (CFLW FPR32:$rd, GPCR:$rs1, simm12:$imm12), 0>; def : InstAlias<"cfsw $rs2, ${imm12}(${rs1})", (CFSW FPR32:$rs2, GPCR:$rs1, simm12:$imm12), 0>; -} // Predicates = [HasCheri, HasStdExtF, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasStdExtF, IsCapMode] /// 'D' (Single-Precision Floating-Point) extension -let Predicates = [HasCheri, HasStdExtD, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasStdExtD, IsCapMode] in { let DecoderNamespace = "CapModeOnly_", hasSideEffects = 0, mayLoad = 1, mayStore = 0 in def CFLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd), @@ -940,12 +944,12 @@ def : InstAlias<"cfld $rd, ${imm12}(${rs1})", (CFLD FPR64:$rd, GPCR:$rs1, simm12:$imm12), 0>; def : InstAlias<"cfsd $rs2, ${imm12}(${rs1})", (CFSD FPR64:$rs2, GPCR:$rs1, simm12:$imm12), 0>; -} // Predicates = [HasCheri, HasStdExtD, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasStdExtD, IsCapMode] /// 'C' (Compressed Instructions) extension let DecoderNamespace = "CapModeOnly_" in { -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [C2] in def C_CIncOffsetImm4CSPN : RVInst16CIW<0b000, 0b00, (outs GPCRC:$rd), @@ -959,14 +963,14 @@ def C_CIncOffsetImm4CSPN : RVInst16CIW<0b000, 0b00, (outs GPCRC:$rd), } let DecoderNamespace = "RISCV32CapModeOnly_", - Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in + Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in def C_CFLD : CCheriLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in def C_CLC_128 : CCheriLoad_ri<0b001, "c.lc", GPCRC, uimm9_lsb0000> { bits<9> imm; let Inst{12-11} = imm{5-4}; @@ -982,14 +986,14 @@ def C_CLW : CCheriLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00> { } let DecoderNamespace = "RISCV32CapModeOnly_", - Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in + Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in def C_CLC_64 : CCheriLoad_ri<0b011, "c.lc", GPCRC, uimm8_lsb000> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in def C_CLD : CCheriLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000> { bits<8> imm; let Inst{12-10} = imm{5-3}; @@ -997,14 +1001,14 @@ def C_CLD : CCheriLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000> { } let DecoderNamespace = "RISCV32CapModeOnly_", - Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in + Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in def C_CFSD : CCheriStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in def C_CSC_128 : CCheriStore_rri<0b101, "c.sc", GPCRC, uimm9_lsb0000> { bits<9> imm; let Inst{12-11} = imm{5-4}; @@ -1020,14 +1024,14 @@ def C_CSW : CCheriStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00> { } let DecoderNamespace = "RISCV32CapModeOnly_", - Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in + Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in def C_CSC_64 : CCheriStore_rri<0b111, "c.sc", GPCRC, uimm8_lsb000> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in def C_CSD : CCheriStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000> { bits<8> imm; let Inst{12-10} = imm{5-3}; @@ -1036,7 +1040,7 @@ def C_CSD : CCheriStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000> { let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1, DecoderNamespace = "RISCV32CapModeOnly_", Defs = [C1], - Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in + Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in def C_CJAL : RVInst16CJ<0b001, 0b01, (outs), (ins simm12_lsb0:$offset), "c.jal", "$offset">; @@ -1054,13 +1058,13 @@ def C_CIncOffsetImm16CSP : RVInst16CI<0b011, 0b01, (outs CSP:$rd_wb), } let DecoderNamespace = "RISCV32CapModeOnly_", - Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in + Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in def C_CFLDCSP : CCheriStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000> { let Inst{6-5} = imm{4-3}; let Inst{4-2} = imm{8-6}; } -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in def C_CLCCSP_128 : CCheriStackLoad<0b001, "c.lcsp", GPCRNoC0, uimm10_lsb0000> { let Inst{6} = imm{4}; let Inst{5-2} = imm{9-6}; @@ -1072,13 +1076,13 @@ def C_CLWCSP : CCheriStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00> { } let DecoderNamespace = "RISCV32CapModeOnly_", - Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in + Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in def C_CLCCSP_64 : CCheriStackLoad<0b011, "c.lcsp", GPCRNoC0, uimm9_lsb000> { let Inst{6-5} = imm{4-3}; let Inst{4-2} = imm{8-6}; } -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in def C_CLDCSP : CCheriStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000> { let Inst{6-5} = imm{4-3}; let Inst{4-2} = imm{8-6}; @@ -1100,13 +1104,13 @@ def C_CJALR : RVInst16CR<0b1001, 0b10, (outs), (ins GPCRNoC0:$rs1), "c.jalr", "$rs1">; let DecoderNamespace = "RISCV32CapModeOnly_", - Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in + Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in def C_CFSDCSP : CCheriStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000> { let Inst{12-10} = imm{5-3}; let Inst{9-7} = imm{8-6}; } -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in def C_CSCCSP_128 : CCheriStackStore<0b101, "c.scsp", GPCR, uimm10_lsb0000> { let Inst{12-11} = imm{5-4}; let Inst{10-7} = imm{9-6}; @@ -1118,19 +1122,19 @@ def C_CSWCSP : CCheriStackStore<0b110, "c.swsp", GPR, uimm8_lsb00> { } let DecoderNamespace = "RISCV32CapModeOnly_", - Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in + Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in def C_CSCCSP_64 : CCheriStackStore<0b111, "c.scsp", GPCR, uimm9_lsb000> { let Inst{12-10} = imm{5-3}; let Inst{9-7} = imm{8-6}; } -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in def C_CSDCSP : CCheriStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> { let Inst{12-10} = imm{5-3}; let Inst{9-7} = imm{8-6}; } -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] } // DecoderNamespace = "CapModeOnly_" let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] in { @@ -1177,7 +1181,7 @@ def : InstAlias<"c.cj $offset", (C_J simm12_lsb0:$offset), 0>; // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// -let Predicates = [HasCheri, IsCapMode] in { +let Predicates = [HasCheriOrRVY, IsCapMode] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, isAsmParserOnly = 1, Size = 8 in def PseudoCLLC : Pseudo<(outs GPCR:$dst), (ins bare_symbol:$src), [], @@ -1213,12 +1217,12 @@ def : Pat<(riscv_clc_tls_gd tglobaltlsaddr:$in), let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0 in def PseudoCIncOffsetTPRel : Pseudo<(outs GPCR:$rd), - (ins GPCR:$rs1, GPR:$rs2, + (ins GPCR:$rs1, GPRNoX0:$rs2, tprel_add_symbol:$src), [], "cincoffset", "$rd, $rs1, $rs2, $src">; } -let Predicates = [HasCheri, HasStdExtD, IsRV32] in { +let Predicates = [HasCheriOrRVY, HasStdExtD, IsRV32] in { // Stores an FPR via splitting to two GPRs. let hasSideEffects = 0, mayLoad = 0, mayStore = 1, @@ -1236,7 +1240,7 @@ def CheriSplitStoreF64Pseudo : Pseudo<(outs GPR:$tmplo, GPR:$tmphi, GPCR:$tmpdst), (ins FPR64:$src, GPCR:$dst), []>; -} // Predicates = [HasCheri, HasStdExtD, IsRV32] +} // Predicates = [HasCheriOrRVY, HasStdExtD, IsRV32] class PatGpcr : Pat<(RetVt (OpNode GPCR:$rs1)), (Inst GPCR:$rs1)>; @@ -1258,6 +1262,7 @@ class PatGprGpcr /// Capability-Inspection Instructions +let Predicates = [HasCheri] in { def : PatGpcr; def : PatGpcr; def : PatGpcr; @@ -1266,11 +1271,13 @@ def : PatGpcr; def : PatGpcr; def : PatGpcr; def : PatGpcr; -def : Pat<(XLenVT (int_cheri_cap_address_get GPCR:$cs1)), (PseudoCGetAddr GPCR:$cs1)>; +def : Pat<(XLenVT (int_cheri_cap_address_get GPCR:$cs1)), (PseudoCGetAddr GPCR:$cs1)>{ let Predicates = [HasCheriOrRVY]; } def : PatGpcr; +} /// Capability-Modification Instructions +let Predicates = [HasCheri] in { def : PatGpcrGpcr; def : PatGpcrGpcr; def : PatGpcrGpr; @@ -1297,6 +1304,7 @@ def : PatGpcrUimm12; def : Pat<(CapFrameAddrRegImm GPCR:$rs1, simm12:$imm12), (CIncOffsetImm GPCR:$rs1, simm12:$imm12)>; +} // Predicates = [HasCheri] /// Pointer-Arithmetic Instructions @@ -1306,7 +1314,7 @@ def : Pat<(XLenVT (int_cheri_cap_diff GPCR:$cs1, GPCR:$cs2)), (SUB (XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)), (XLenVT (EXTRACT_SUBREG GPCR:$cs2, sub_cap_addr)))>; -let Predicates = [IsPureCapABI] in { +let Predicates = [IsPureCapABI, HasCheri] in { def : Pat<(inttoptr (XLenVT GPR:$rs2)), (CIncOffset C0, GPR:$rs2)>; def : Pat<(inttoptr simm12:$imm12), (CIncOffsetImm C0, simm12:$imm12)>; def : Pat<(XLenVT (ptrtoint GPCR:$rs1)), (PseudoCGetAddr GPCR:$rs1)>; @@ -1426,16 +1434,19 @@ def : CheriBccSwapPat; /// Assertion Instructions +let Predicates = [HasCheri] in { def : PatGpcrGpcr; def : PatGpcrGpcr; +} /// Special Capability Register Access Instructions +let Predicates = [HasCheri] in def : Pat<(int_cheri_ddc_get), (CSpecialRW SCR_DDC.Encoding, C0)>; -let Predicates = [HasCheri, IsPureCapABI] in +let Predicates = [HasCheriOrRVY, IsPureCapABI] in def : Pat<(int_cheri_stack_cap_get), (CLenVT (COPY C2))>; -let Predicates = [HasCheri, IsCapMode] in +let Predicates = [HasCheriOrRVY, IsCapMode] in def : Pat<(int_cheri_pcc_get), (AUIPCC 0)>; let Predicates = [HasCheri, NotCapMode] in @@ -1445,11 +1456,14 @@ def : Pat<(int_cheri_pcc_get), (CSpecialRW SCR_PCC.Encoding, C0)>; /// Adjusting to Compressed Capability Precision Instructions +let Predicates = [HasCheri] in { def : PatGpr; def : PatGpr; +} /// Tag-Memory Access Instructions +let Predicates = [HasCheri] in def : PatGpcr; /// Memory-Access with Explicit Address Type Instructions @@ -1590,7 +1604,7 @@ let Predicates = [HasCheri, NotCapMode, HasStdExtF] in def : Pat<(store FPR32:$rs2, GPCR:$rs1), (SW_CAP (FMV_X_W FPR32:$rs2), GPCR:$rs1)>; -let Predicates = [HasCheri, NotCapMode, HasStdExtD, IsRV32] in +let Predicates = [HasCheriOrRVY, NotCapMode, HasStdExtD, IsRV32] in def : Pat<(store FPR64:$rs2, GPCR:$rs1), (KILL (CheriSplitStoreF64Pseudo FPR64:$rs2, GPCR:$rs1))>; @@ -1603,16 +1617,17 @@ def : Pat<(store FPR64:$rs2, GPCR:$rs1), /// Memory-Access Instructions multiclass AMOCapPat { + defvar optUnderScore = !if(!eq(CLenStr, ""), "", "_"); def : PatGprGpcr(AtomicOp#"_monotonic"), - !cast(BaseInst#"_"#CLenStr)>; + !cast(BaseInst#optUnderScore#CLenStr)>; def : PatGprGpcr(AtomicOp#"_acquire"), - !cast(BaseInst#"_AQ_"#CLenStr)>; + !cast(BaseInst#"_AQ"#optUnderScore#CLenStr)>; def : PatGprGpcr(AtomicOp#"_release"), - !cast(BaseInst#"_RL_"#CLenStr)>; + !cast(BaseInst#"_RL"#optUnderScore#CLenStr)>; def : PatGprGpcr(AtomicOp#"_acq_rel"), - !cast(BaseInst#"_AQ_RL_"#CLenStr)>; + !cast(BaseInst#"_AQ_RL"#optUnderScore#CLenStr)>; def : PatGprGpcr(AtomicOp#"_seq_cst"), - !cast(BaseInst#"_AQ_RL_"#CLenStr)>; + !cast(BaseInst#"_AQ_RL"#optUnderScore#CLenStr)>; } let Predicates = [HasCheri, IsRV32, NotCapMode] in { @@ -1637,7 +1652,7 @@ def : AtomicStPat; defm : AMOCapPat<"128", "atomic_swap_cap", "AMOSWAP_C">; } // Predicates = [HasCheri, HasStdExtA, IsRV64, NotCapMode] -let Predicates = [HasCheri, HasStdExtA] in { +let Predicates = [HasCheriOrRVY, HasStdExtA] in { def PseudoAtomicLoadAddCap : PseudoAMO { let Size = 16; } def PseudoAtomicLoadSubCap : PseudoAMO { let Size = 20; } def PseudoAtomicLoadAndCap : PseudoAMO { let Size = 20; } @@ -1649,9 +1664,9 @@ def PseudoAtomicLoadUMaxCap : PseudoAMO { let Size = 24; } def PseudoAtomicLoadUMinCap : PseudoAMO { let Size = 24; } def PseudoAtomicLoadNandCap : PseudoAMO { let Size = 24; } def PseudoCmpXchgCap : PseudoCmpXchg { let Size = 16; } -} // Predicates = [HasCheri, HasStdExtA]f +} // Predicates = [HasCheriOrRVY, HasStdExtA]f -let Predicates = [HasCheri, HasStdExtA, NotCapMode] in { +let Predicates = [HasCheriOrRVY, HasStdExtA, NotCapMode] in { defm : PseudoAMOPat<"atomic_load_add_cap", PseudoAtomicLoadAddCap, GPCR>; defm : PseudoAMOPat<"atomic_load_sub_cap", PseudoAtomicLoadSubCap, GPCR>; defm : PseudoAMOPat<"atomic_load_and_cap", PseudoAtomicLoadAndCap, GPCR>; @@ -1663,7 +1678,7 @@ defm : PseudoAMOPat<"atomic_load_umax_cap", PseudoAtomicLoadUMaxCap, GPCR>; defm : PseudoAMOPat<"atomic_load_umin_cap", PseudoAtomicLoadUMinCap, GPCR>; defm : PseudoAMOPat<"atomic_load_nand_cap", PseudoAtomicLoadNandCap, GPCR>; defm : PseudoCmpXchgPat<"atomic_cmp_swap_cap", PseudoCmpXchgCap, CLenVT, GPCR>; -} // Predicates = [HasCheri, HasStdExtA, NotCapMode] +} // Predicates = [HasCheriOrRVY, HasStdExtA, NotCapMode] /// Capability Mode Instructions @@ -1755,19 +1770,20 @@ multiclass PseudoCheriCmpXchgPat { + defvar optUnderScore = !if(!eq(CLenStr, ""), "", "_"); def : PatGpcrGpcr(AtomicOp#"_monotonic"), - !cast(BaseInst#"_"#CLenStr), CLenVT>; + !cast(BaseInst#optUnderScore#CLenStr), CLenVT>; def : PatGpcrGpcr(AtomicOp#"_acquire"), - !cast(BaseInst#"_AQ_"#CLenStr), CLenVT>; + !cast(BaseInst#"_AQ"#optUnderScore#CLenStr), CLenVT>; def : PatGpcrGpcr(AtomicOp#"_release"), - !cast(BaseInst#"_RL_"#CLenStr), CLenVT>; + !cast(BaseInst#"_RL"#optUnderScore#CLenStr), CLenVT>; def : PatGpcrGpcr(AtomicOp#"_acq_rel"), - !cast(BaseInst#"_AQ_RL_"#CLenStr), CLenVT>; + !cast(BaseInst#"_AQ_RL"#optUnderScore#CLenStr), CLenVT>; def : PatGpcrGpcr(AtomicOp#"_seq_cst"), - !cast(BaseInst#"_AQ_RL_"#CLenStr), CLenVT>; + !cast(BaseInst#"_AQ_RL"#optUnderScore#CLenStr), CLenVT>; } -let Predicates = [HasCheri, HasStdExtA] in { +let Predicates = [HasCheriOrRVY, HasStdExtA] in { def PseudoCheriAtomicSwap8 : PseudoCheriAMO { let Size = 16; } def PseudoCheriAtomicLoadAdd8 : PseudoCheriAMO { let Size = 16; } def PseudoCheriAtomicLoadSub8 : PseudoCheriAMO { let Size = 16; } @@ -1808,16 +1824,16 @@ def PseudoCheriAtomicLoadUMaxCap : PseudoCheriAMO { let Size = 24; } def PseudoCheriAtomicLoadUMinCap : PseudoCheriAMO { let Size = 24; } def PseudoCheriAtomicLoadNandCap : PseudoCheriAMO { let Size = 24; } def PseudoCheriCmpXchgCap : PseudoCheriCmpXchg { let Size = 16; } -} // Predicates = [HasCheri, HasStdExtA] +} // Predicates = [HasCheriOrRVY, HasStdExtA] -let Predicates = [HasCheri, HasStdExtA, IsRV64] in { +let Predicates = [HasCheriOrRVY, HasStdExtA, IsRV64] in { def PseudoCheriAtomicLoadNand64 : PseudoCheriAMO { let Size = 20; } def PseudoCheriCmpXchg64 : PseudoCheriCmpXchg { let Size = 16; } -} // Predicates = [HasCheri, HasStdExtA, IsRV64] +} // Predicates = [HasCheriOrRVY, HasStdExtA, IsRV64] /// 'I' (Integer) base -let Predicates = [HasCheri, IsCapMode, IsPureCapABI] in { +let Predicates = [HasCheriOrRVY, IsCapMode, IsPureCapABI] in { let isBarrier = 1, isBranch = 1, isTerminator = 1 in def PseudoCBR : Pseudo<(outs), (ins simm21_lsb0_jal:$imm20), [(br bb:$imm20)]>, PseudoInstExpansion<(CJAL C0, simm21_lsb0_jal:$imm20)>; @@ -1830,9 +1846,9 @@ def PseudoCBRIND : Pseudo<(outs), (ins GPCR:$rs1, simm12:$imm12), []>, def : Pat<(brind GPCR:$rs1), (PseudoCBRIND GPCR:$rs1, 0)>; def : Pat<(brind (cptradd GPCR:$rs1, simm12:$imm12)), (PseudoCBRIND GPCR:$rs1, simm12:$imm12)>; -} // Predicates = [HasCheri, IsCapMode, IsPureCapABI] +} // Predicates = [HasCheriOrRVY, IsCapMode, IsPureCapABI] -let Predicates = [HasCheri, IsCapMode], +let Predicates = [HasCheriOrRVY, IsCapMode], isCall = 1, isBarrier = 1, isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0, Size = 8 in def PseudoCCALLReg : Pseudo<(outs GPCR:$rd), @@ -1840,51 +1856,51 @@ def PseudoCCALLReg : Pseudo<(outs GPCR:$rd), let AsmString = "ccall\t$rd, $func"; } -let Predicates = [HasCheri, IsCapMode], +let Predicates = [HasCheriOrRVY, IsCapMode], isCall = 1, Defs = [C1], isCodeGenOnly = 0, Size = 8 in def PseudoCCALL : Pseudo<(outs), (ins cap_call_symbol:$func), []> { let AsmString = "ccall\t$func"; } -let Predicates = [HasCheri, IsCapMode, IsPureCapABI] in { +let Predicates = [HasCheriOrRVY, IsCapMode, IsPureCapABI] in { def : Pat<(riscv_cap_call tglobaladdr:$func), (PseudoCCALL tglobaladdr:$func)>; def : Pat<(riscv_cap_call texternalsym:$func), (PseudoCCALL texternalsym:$func)>; -} // Predicates = [HasCheri, IsCapMode, IsPureCapABI] +} // Predicates = [HasCheriOrRVY, IsCapMode, IsPureCapABI] -let Predicates = [HasCheri, IsCapMode, IsPureCapABI], +let Predicates = [HasCheriOrRVY, IsCapMode, IsPureCapABI], isCall = 1, Defs = [C1] in def PseudoCCALLIndirect : Pseudo<(outs), (ins GPCR:$rs1), [(riscv_cap_call GPCR:$rs1)]>, PseudoInstExpansion<(CJALR C1, GPCR:$rs1, 0)>; -let Predicates = [HasCheri, IsCapMode, IsPureCapABI], +let Predicates = [HasCheriOrRVY, IsCapMode, IsPureCapABI], isBarrier = 1, isReturn = 1, isTerminator = 1 in def PseudoCRET : Pseudo<(outs), (ins), [(riscv_ret_glue)]>, PseudoInstExpansion<(CJALR C0, C1, 0)>; -let Predicates = [HasCheri, IsCapMode], +let Predicates = [HasCheriOrRVY, IsCapMode], isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [C2], isCodeGenOnly = 0, Size = 8 in def PseudoCTAIL : Pseudo<(outs), (ins cap_call_symbol:$dst), []> { let AsmString = "ctail\t$dst"; } -let Predicates = [HasCheri, IsCapMode, IsPureCapABI], +let Predicates = [HasCheriOrRVY, IsCapMode, IsPureCapABI], isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [C2] in def PseudoCTAILIndirect : Pseudo<(outs), (ins GPCRTC:$rs1), [(riscv_cap_tail GPCRTC:$rs1)]>, PseudoInstExpansion<(CJALR C0, GPCR:$rs1, 0)>; -let Predicates = [HasCheri, IsCapMode, IsPureCapABI] in { +let Predicates = [HasCheriOrRVY, IsCapMode, IsPureCapABI] in { def : Pat<(riscv_cap_tail tglobaladdr:$dst), (PseudoCTAIL texternalsym:$dst)>; def : Pat<(riscv_cap_tail texternalsym:$dst), (PseudoCTAIL texternalsym:$dst)>; -} // Predicates = [HasCheri, IsCapMode, IsPureCapABI] +} // Predicates = [HasCheriOrRVY, IsCapMode, IsPureCapABI] -let Predicates = [HasCheri, IsCapMode, IsPureCapABI] in +let Predicates = [HasCheriOrRVY, IsCapMode, IsPureCapABI] in let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1, Size = 8, isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in def PseudoCJump : Pseudo<(outs GPCR:$rd), @@ -1892,19 +1908,19 @@ def PseudoCJump : Pseudo<(outs GPCR:$rd), let AsmString = "cjump\t$target, $rd"; } -defm : CheriLdPat, Requires<[HasCheri, IsCapMode]>; -defm : CheriLdPat, Requires<[HasCheri, IsCapMode]>; -defm : CheriLdPat, Requires<[HasCheri, IsCapMode]>; -defm : CheriLdPat, Requires<[HasCheri, IsCapMode]>; -defm : CheriLdPat, Requires<[HasCheri, IsRV32, IsCapMode]>; -defm : CheriLdPat, Requires<[HasCheri, IsCapMode]>; -defm : CheriLdPat, Requires<[HasCheri, IsCapMode]>; +defm : CheriLdPat, Requires<[HasCheriOrRVY, IsCapMode]>; +defm : CheriLdPat, Requires<[HasCheriOrRVY, IsCapMode]>; +defm : CheriLdPat, Requires<[HasCheriOrRVY, IsCapMode]>; +defm : CheriLdPat, Requires<[HasCheriOrRVY, IsCapMode]>; +defm : CheriLdPat, Requires<[HasCheriOrRVY, IsRV32, IsCapMode]>; +defm : CheriLdPat, Requires<[HasCheriOrRVY, IsCapMode]>; +defm : CheriLdPat, Requires<[HasCheriOrRVY, IsCapMode]>; -defm : CheriStPat, Requires<[HasCheri, IsCapMode]>; -defm : CheriStPat, Requires<[HasCheri, IsCapMode]>; -defm : CheriStPat, Requires<[HasCheri, IsRV32, IsCapMode]>; +defm : CheriStPat, Requires<[HasCheriOrRVY, IsCapMode]>; +defm : CheriStPat, Requires<[HasCheriOrRVY, IsCapMode]>; +defm : CheriStPat, Requires<[HasCheriOrRVY, IsRV32, IsCapMode]>; -let Predicates = [HasCheri, IsRV64, IsCapMode] in { +let Predicates = [HasCheriOrRVY, IsRV64, IsCapMode] in { /// Loads @@ -1917,23 +1933,23 @@ defm : CheriLdPat; defm : CheriStPat; defm : CheriStPat; -} // Predicates = [HasCheri, IsRV64, IsCapMode] +} // Predicates = [HasCheriOrRVY, IsRV64, IsCapMode] /// Other pseudo-instructions // Pessimistically assume the stack pointer will be clobbered -let Predicates = [HasCheri, IsPureCapABI] in { +let Predicates = [HasCheriOrRVY, IsPureCapABI] in { let Defs = [C2], Uses = [C2] in { def ADJCALLSTACKDOWNCAP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), [(callseq_start timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUPCAP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), [(callseq_end timm:$amt1, timm:$amt2)]>; } // Defs = [C2], Uses = [C2] -} // Predicates = [HasCheri, IsPureCapABI] +} // Predicates = [HasCheriOrRVY, IsPureCapABI] /// 'A' (Atomic Instructions) extension -let Predicates = [HasCheri, HasStdExtA, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasStdExtA, IsCapMode] in { /// Atomic loads and stores @@ -2008,9 +2024,9 @@ defm : PseudoCheriCmpXchgPat<"atomic_cmp_swap_32", PseudoCheriCmpXchg32>; defm : PseudoCheriCmpXchgPat<"atomic_cmp_swap_cap", PseudoCheriCmpXchgCap, GPCR>; -} // Predicates = [HasCheri, HasStdExtA, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasStdExtA, IsCapMode] -let Predicates = [HasCheri, HasStdExtA, IsRV64, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasStdExtA, IsRV64, IsCapMode] in { /// 64-bit atomic loads and stores @@ -2040,7 +2056,7 @@ defm : PseudoCheriAMOPat<"atomic_load_nand_64", PseudoCheriAtomicLoadNand64>; defm : PseudoCheriCmpXchgPat<"atomic_cmp_swap_64", PseudoCheriCmpXchg64>; -} // Predicates = [HasCheri, HasStdExtA, IsRV64, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasStdExtA, IsRV64, IsCapMode] let Predicates = [HasCheri, HasStdExtA, IsRV32, IsCapMode] in { defm : CheriLdPat; @@ -2056,7 +2072,7 @@ defm : CheriAMOCapPat<"128", "atomic_swap_cap", "CAMOSWAP_C">; /// 'F' (Single-Precision Floating-Point) extension -let Predicates = [HasCheri, HasStdExtF, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasStdExtF, IsCapMode] in { /// Loads @@ -2066,11 +2082,11 @@ defm : CheriLdPat; defm : CheriStPat; -} // Predicates = [HasCheri, HasStdExtF, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasStdExtF, IsCapMode] /// 'D' (Single-Precision Floating-Point) extension -let Predicates = [HasCheri, HasStdExtD, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasStdExtD, IsCapMode] in { /// Loads @@ -2080,7 +2096,7 @@ defm : CheriLdPat; defm : CheriStPat; -} // Predicates = [HasCheri, HasStdExtD, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasStdExtD, IsCapMode] /// 'XCheri' extension @@ -2104,129 +2120,129 @@ def : CompressPat<(CIncOffsetImm GPCRC:$rd, CSP:$rs1, uimm10_lsb00nonzero:$imm), (C_CIncOffsetImm4CSPN GPCRC:$rd, CSP:$rs1, uimm10_lsb00nonzero:$imm)>; } // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in { def : CompressPat<(CFLD FPR64C:$rd, GPCRC:$rs1, uimm8_lsb000:$imm), (C_CFLD FPR64C:$rd, GPCRC:$rs1, uimm8_lsb000:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { def : CompressPat<(CLC_128 GPCRC:$rd, GPCRC:$rs1, uimm9_lsb0000:$imm), (C_CLC_128 GPCRC:$rd, GPCRC:$rs1, uimm9_lsb0000:$imm)>; } // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] in { def : CompressPat<(CLW GPRC:$rd, GPCRC:$rs1, uimm7_lsb00:$imm), (C_CLW GPRC:$rd, GPCRC:$rs1, uimm7_lsb00:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in { def : CompressPat<(CLC_64 GPCRC:$rd, GPCRC:$rs1, uimm8_lsb000:$imm), (C_CLC_64 GPCRC:$rd, GPCRC:$rs1, uimm8_lsb000:$imm)>; } // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { def : CompressPat<(CLD GPRC:$rd, GPCRC:$rs1, uimm8_lsb000:$imm), (C_CLD GPRC:$rd, GPCRC:$rs1, uimm8_lsb000:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in { def : CompressPat<(CFSD FPR64C:$rs2, GPCRC:$rs1, uimm8_lsb000:$imm), (C_CFSD FPR64C:$rs2, GPCRC:$rs1, uimm8_lsb000:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { def : CompressPat<(CSC_128 GPCRC:$rs2, GPCRC:$rs1, uimm9_lsb0000:$imm), (C_CSC_128 GPCRC:$rs2, GPCRC:$rs1, uimm9_lsb0000:$imm)>; } // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] in { def : CompressPat<(CSW GPRC:$rs2, GPCRC:$rs1, uimm7_lsb00:$imm), (C_CSW GPRC:$rs2, GPCRC:$rs1, uimm7_lsb00:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in { def : CompressPat<(CSC_64 GPCRC:$rs2, GPCRC:$rs1, uimm8_lsb000:$imm), (C_CSC_64 GPCRC:$rs2, GPCRC:$rs1, uimm8_lsb000:$imm)>; } // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { def : CompressPat<(CSD GPRC:$rs2, GPCRC:$rs1, uimm8_lsb000:$imm), (C_CSD GPRC:$rs2, GPCRC:$rs1, uimm8_lsb000:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] // Quadrant 1 -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in { def : CompressPat<(CJAL C1, simm12_lsb0:$offset), (C_CJAL simm12_lsb0:$offset)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] in { def : CompressPat<(CIncOffsetImm C2, C2, simm10_lsb0000nonzero:$imm), (C_CIncOffsetImm16CSP C2, simm10_lsb0000nonzero:$imm)>; } // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] in { def : CompressPat<(CJAL C0, simm12_lsb0:$offset), (C_J simm12_lsb0:$offset)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] // Quadrant 2 -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in { def : CompressPat<(CFLD FPR64:$rd, CSP:$rs1, uimm9_lsb000:$imm), (C_CFLDCSP FPR64:$rd, CSP:$rs1, uimm9_lsb000:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { def : CompressPat<(CLC_128 GPCRNoC0:$rd, CSP:$rs1, uimm10_lsb0000:$imm), (C_CLCCSP_128 GPCRNoC0:$rd, CSP:$rs1, uimm10_lsb0000:$imm)>; } // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] in { def : CompressPat<(CLW GPRNoX0:$rd, CSP:$rs1, uimm8_lsb00:$imm), (C_CLWCSP GPRNoX0:$rd, CSP:$rs1, uimm8_lsb00:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in { def : CompressPat<(CLC_64 GPCRNoC0:$rd, CSP:$rs1, uimm9_lsb000:$imm), (C_CLCCSP_64 GPCRNoC0:$rd, CSP:$rs1, uimm9_lsb000:$imm)>; } // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { def : CompressPat<(CLD GPRNoX0:$rd, CSP:$rs1, uimm9_lsb000:$imm), (C_CLDCSP GPRNoX0:$rd, CSP:$rs1, uimm9_lsb000:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] in { def : CompressPat<(CJALR C0, GPCRNoC0:$rs1, 0), (C_CJR GPCRNoC0:$rs1)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] in { def : CompressPat<(CJALR C1, GPCRNoC0:$rs1, 0), (C_CJALR GPCRNoC0:$rs1)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] in { def : CompressPat<(CFSD FPR64:$rs2, CSP:$rs1, uimm9_lsb000:$imm), (C_CFSDCSP FPR64:$rs2, CSP:$rs1, uimm9_lsb000:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, HasStdExtD, IsRV32, IsCapMode] let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { def : CompressPat<(CSC_128 GPCR:$rs2, CSP:$rs1, uimm10_lsb0000:$imm), (C_CSCCSP_128 GPCR:$rs2, CSP:$rs1, uimm10_lsb0000:$imm)>; } // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] in { def : CompressPat<(CSW GPR:$rs2, CSP:$rs1, uimm8_lsb00:$imm), (C_CSWCSP GPR:$rs2, CSP:$rs1, uimm8_lsb00:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsCapMode] let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in { def : CompressPat<(CSC_64 GPCR:$rs2, CSP:$rs1, uimm9_lsb000:$imm), (C_CSCCSP_64 GPCR:$rs2, CSP:$rs1, uimm9_lsb000:$imm)>; } // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] -let Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { +let Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { def : CompressPat<(CSD GPR:$rs2, CSP:$rs1, uimm9_lsb000:$imm), (C_CSDCSP GPR:$rs2, CSP:$rs1, uimm9_lsb000:$imm)>; -} // Predicates = [HasCheri, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] +} // Predicates = [HasCheriOrRVY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoY.td b/llvm/lib/Target/RISCV/RISCVInstrInfoY.td new file mode 100644 index 0000000000000..5801ddc68cd31 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoY.td @@ -0,0 +1,624 @@ +//===-- RISCVInstrInfoY.td - RISCV instructions -------------*- tblgen-*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction Formats +//===----------------------------------------------------------------------===// + +include "RISCVInstrFormatsY.td" + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// + +multiclass ZCheri_r funct5, string opcodestr, RegisterClass rdClass=GPR, + DAGOperand rs1Operand=GPCR, list altMnemonics = [], + bit defsSealed = false> { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, defsCanBeSealed = defsSealed in +def "" : RVInstZCheriSrcDst<0x8, funct5, 0x0, OPC_OP, (outs rdClass:$rd), + (ins rs1Operand:$rs1), opcodestr, "$rd, $rs1">; +foreach alt = altMnemonics in { +def : InstAlias(NAME) rdClass:$rd, rs1Operand:$rs1), 0>; +} +} + +multiclass ZCheri_rr funct7, bits<3> funct3, string opcodestr, + RegisterClass rdClass=GPCR, RegisterClass rs1Class=GPCR, + RegisterClass rs2Class=GPR, list altMnemonics = [], + bit defsSealed = false> { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, defsCanBeSealed = defsSealed in +def "" : RVInstR; +foreach alt = altMnemonics in { +def : InstAlias(NAME) rdClass:$rd, rs1Class:$rs1, rs2Class:$rs2), 0>; +} +} + +multiclass ZCheri_setboundsimm altMnemonics = []> { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def "" : RVInstCheriSetBoundsImmFmt<0x1, 0x5, OPC_OP_IMM, (outs GPCR:$rd), + (ins GPCR:$rs1, csetbnd_imm:$imm), + opcodestr, "$rd, $rs1, $imm" + >; +foreach alt = altMnemonics in { +def : InstAlias(NAME) GPCR:$rd, GPCR:$rs1, csetbnd_imm:$rs2), 0>; +} +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class Cheri_SrcDst funct7, bits<5> funct5, bits<3> funct3, RISCVOpcode opcode, + string opcodestr, RegisterClass rdClass=GPR, + DAGOperand rs1Operand=GPCR> + : RVInstCheriSrcDst; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class CheriRVInstR funct7, bits<3> funct3, RISCVOpcode opcode, + string opcodestr, RegisterClass rdClass=GPR, + RegisterClass rs1Class=GPR, RegisterClass rs2Class=GPR> + : RVInstR; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class CheriRVInstrI funct3, RISCVOpcode opcode, string opcodestr, bit simm> + : RVInstI; + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in +class CheriPseudoLoad + : Pseudo<(outs rdClass:$rd), (ins rs1Operand:$rs1), []>; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1 in +class CheriPseudoStore + : Pseudo<(outs), (ins rs2Class:$rs2, rs1Operand:$rs1), []>; + +multiclass LR_C_r_aq_rl_aliases altMnemonics, RegisterOperand rs1Class = GPRMemZeroOffset> { +foreach alias = altMnemonics in { + def : InstAlias(NAME) GPCR:$rd, rs1Class:$rs1)>; + def : InstAlias(NAME # _AQ) GPCR:$rd, rs1Class:$rs1)>; + def : InstAlias(NAME # _RL) GPCR:$rd, rs1Class:$rs1)>; + def : InstAlias(NAME # _AQ_RL) GPCR:$rd, rs1Class:$rs1)>; +} +} + +multiclass AMO_C_rr_aq_rl_aliases altMnemonics, RegisterClass rdClass, + RegisterOperand rs1Class = GPRMemZeroOffset> { +foreach alias = altMnemonics in { + def : InstAlias(NAME) rdClass:$rd, rs1Class:$rs1, GPCR:$rs2), 0>; + def : InstAlias(NAME # _AQ) rdClass:$rd, rs1Class:$rs1, GPCR:$rs2), 0>; + def : InstAlias(NAME # _RL) rdClass:$rd, rs1Class:$rs1, GPCR:$rs2), 0>; + def : InstAlias(NAME # _AQ_RL) rdClass:$rd, rs1Class:$rs1, GPCR:$rs2), 0>; +} +} + + +//===----------------------------------------------------------------------===// +// Capability-Inspection Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtY] in { +defm YTAGR : ZCheri_r<0x0, "ytagr", GPR, GPCR, ["cgettag", "gctag"]>; +defm YPERMR : ZCheri_r<0x1, "ypermr", GPR, GPCR, ["gcperm", "cgetperm"]>; +defm YHIR : ZCheri_r<0x4, "yhir", GPR, GPCR, ["gchi", "cgethigh"]>; +defm YBASER : ZCheri_r<0x5, "ybaser", GPR, GPCR, ["gcbase", "cgetbase"]>; +defm YLENR : ZCheri_r<0x6, "ylenr", GPR, GPCR, ["gclen", "cgetlen"]>; +defm YTYPER : ZCheri_r<0x2, "ytyper", GPR, GPCR, ["gctype", "cgettype"]>; +} // Predicates = [HasStdExtY] + +let Predicates = [HasStdExtZYHybrid] in { +defm YMODER : ZCheri_r<0x3, "ymoder", GPR, GPCR, ["gcmode"]>; +} // Predicates = [HasStdExtZYHybrid] + +//===----------------------------------------------------------------------===// +// Capability-Modification Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtY] in { +defm YADDRW : ZCheri_rr<0x6, 0x1, "yaddrw", GPCR, GPCR, GPR, ["scaddr", "csetaddr"]>; +defm YPERMC : ZCheri_rr<0x6, 0x2, "ypermc", GPCR, GPCR, GPR, ["acperm", "candperm"]>; +defm YHIW : ZCheri_rr<0x6, 0x3, "yhiw", GPCR, GPCR, GPR, ["schi", "csethigh"]>; +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { +def ADDY : CheriRVInstR<0x6, 0x0, OPC_OP, "addy", GPCR, GPCR, GPRNoX0>; +def ADDIY : CheriRVInstrI<0x2, OPC_OP_IMM_32, "addiy", 1>; +} +defm YSENTRY : ZCheri_r<0x8, "ysentry", GPCR, GPCR, ["sentry", "csealentry"], 1>; +defm YBLD : ZCheri_rr<0x6, 0x5, "ybld", GPCR, GPCRNoC0, GPCR, ["cbld", "cbuildcap"], 1>; + +defm YBNDSW : ZCheri_rr<0x7, 0x0, "ybndsw", GPCR, GPCR, GPR, ["scbnds", "csetboundsexact"]>; +defm YBNDSRW : ZCheri_rr<0x7, 0x1, "ybndsrw", GPCR, GPCR, GPR, ["scbndsr", "csetbounds"]>; +defm YBNDSIW : ZCheri_setboundsimm<"ybndsiw", ["ybndsw", "scbndsi", "scbnds", "csetbounds", "csetboundsimm"]>; + +foreach alias = ["add", "cadd", "cincoffset"] in +def : InstAlias; + +foreach alias = ["add", "cadd", "caddi", "cincoffset", "cincoffsetimm"] in +def : InstAlias; +} // HasStdExtY + +let Predicates = [HasStdExtZYHybrid] in { +defm YMODEW : ZCheri_rr<0x6, 0x7, "ymodew">; +def : InstAlias<"scmode $cd, $cs1, $rs2", + (YMODEW GPCR:$cd, GPCR:$cs1, GPR:$rs2), 0>; +} + +//===----------------------------------------------------------------------===// +// Mode Switch Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZYHybrid] in { +def YMODESWY : CheriModeSwitchInstr<0x9, OPC_OP, "ymodeswy">; +def : InstAlias<"modesw.cap", (YMODESWY)>; +def YMODESWI : CheriModeSwitchInstr<0xA, OPC_OP, "ymodeswi">; +def : InstAlias<"modesw.int", (YMODESWI)>; +} // HasStdExtZYHybrid + +//===----------------------------------------------------------------------===// +// Pointer-Arithmetic Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtY] in { +let isMoveReg = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, + defsCanBeSealed = 1 in +def YMV : Cheri_SrcDst<0x6, 0x0, 0x0, OPC_OP, "ymv", GPCR>; + +foreach alias = ["mv", "cmv", "cmove"] in +def : InstAlias; +} // HasStdExtY + + +//===----------------------------------------------------------------------===// +// Adjusting to Compressed Capability Precision Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtY] in { +defm YAMASK : ZCheri_r<0x7, "yamask", GPR, GPR, ["cram", "crepresentablealignmentmask"]>; +} // HasStdExtY + +//===----------------------------------------------------------------------===// +// Assertion Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtY] in { +defm YLT : ZCheri_rr<0x6, 0x6, "ylt", GPR, GPCRNoC0, GPCR, ["scss", "ctestsubset"]>; +defm SYEQ : ZCheri_rr<0x6, 0x4, "syeq", GPR, GPCR, GPCR, ["sceq", "csetequalexact"]>; +} + +//===----------------------------------------------------------------------===// +// Special Capabilty Register Access Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtY] in { + +let DecoderNamespace = "CapModeOnly_" in { +def YCSRRW : CheriCSR_ir<0b001, "csrrw">; +def YCSRRS : CheriCSR_ir<0b010, "csrrs", GPR>; +def YCSRRC : CheriCSR_ir<0b011, "csrrc", GPR>; +def YCSRRWI: CheriCSR_ii<0b101, "csrrwi">; +def YCSRRSI: CheriCSR_ii<0b110, "csrrsi">; +def YCSRRCI: CheriCSR_ii<0b111, "csrrci">; +} + +def : InstAlias<"csrr $cd, $csr", + (YCSRRS GPCR: $cd, cheri_csr_sysreg:$csr, X0)>; +def : InstAlias<"csrw $csr, $cs", + (YCSRRW C0, cheri_csr_sysreg:$csr, GPCR:$cs)>; +def : InstAlias<"csrs $csr, $cs", + (YCSRRS C0, cheri_csr_sysreg:$csr, GPR:$cs)>; +def : InstAlias<"csrc $csr, $cs", + (YCSRRC C0, cheri_csr_sysreg:$csr, GPR:$cs)>; + +def : InstAlias<"csrwi $csr, $imm", + (YCSRRWI C0, cheri_csr_sysreg:$csr, uimm5:$imm)>; +def : InstAlias<"csrsi $csr, $imm", + (YCSRRSI C0, cheri_csr_sysreg:$csr, uimm5:$imm)>; +def : InstAlias<"csrci $csr, $imm", + (YCSRRCI C0, cheri_csr_sysreg:$csr, uimm5:$imm)>; + +let EmitPriority = 0 in { +def : InstAlias<"csrw $csr, $imm", + (YCSRRWI C0, cheri_csr_sysreg:$csr, uimm5:$imm)>; +def : InstAlias<"csrs $csr, $imm", + (YCSRRSI C0, cheri_csr_sysreg:$csr, uimm5:$imm)>; +def : InstAlias<"csrc $csr, $imm", + (YCSRRCI C0, cheri_csr_sysreg:$csr, uimm5:$imm)>; + +def : InstAlias<"csrrw $cd, $csr, $imm", + (YCSRRWI GPCR:$cd, cheri_csr_sysreg:$csr, uimm5:$imm)>; +def : InstAlias<"csrrs $cd, $csr, $imm", + (YCSRRSI GPCR:$cd, cheri_csr_sysreg:$csr, uimm5:$imm)>; +def : InstAlias<"csrrc $cd, $csr, $imm", + (YCSRRCI GPCR:$cd, cheri_csr_sysreg:$csr, uimm5:$imm)>; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in +def PseudoPCCGet : Pseudo<(outs GPCR:$rd), (ins), []>; + +} // Predicates = [HasStdExtY] + +//===----------------------------------------------------------------------===// +// Memory-Access with Explicit Address Type Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZYHybrid, NotCapMode] in { +def PseudoLB_CAP : CheriPseudoLoad; +def PseudoLH_CAP : CheriPseudoLoad; +def PseudoLW_CAP : CheriPseudoLoad; +def PseudoLBU_CAP : CheriPseudoLoad; +def PseudoLHU_CAP : CheriPseudoLoad; +def PseudoLC_CAP : CheriPseudoLoad; +} + +let Predicates = [HasStdExtZYHybrid, IsRV64, NotCapMode] in { +def PseudoLWU_CAP : CheriPseudoLoad; +def PseudoLD_CAP : CheriPseudoLoad; +} + + +let Predicates = [HasStdExtZYHybrid, NotCapMode] in { +def PseudoSB_CAP : CheriPseudoStore; +def PseudoSH_CAP : CheriPseudoStore; +def PseudoSW_CAP : CheriPseudoStore; +def PseudoSC_CAP : CheriPseudoStore; +} + +let Predicates = [HasStdExtZYHybrid, IsRV64] in { +def PseudoSD_CAP : CheriPseudoStore; +} + +//===----------------------------------------------------------------------===// +// Memory-Access Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtY, NotCapMode] in { +let DecoderNamespace = "RVYOnly_", hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def LY : RVInstI<0b100, OPC_MISC_MEM, (outs GPCR:$rd), + (ins GPR:$rs1, simm12:$imm12), + "ly", "$rd, ${imm12}(${rs1})">; + +let DecoderNamespace = "RVYOnly_", hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def SY : RVInstS<0b100, OPC_STORE, (outs), + (ins GPCR:$rs2, GPR:$rs1, simm12:$imm12), + "sy", "$rs2, ${imm12}(${rs1})">; + +foreach alias = ["ly", "lc"] in { +def : InstAlias; +def : InstAlias; +} + +foreach alias = ["sy", "sc"] in { +def : InstAlias<"sc $rs2, ${imm12}(${rs1})", + (SY GPCR:$rs2, GPR:$rs1, simm12:$imm12), 0>; +def : InstAlias; +} +} // Predicates = [HasStdExtY, NotCapMode] + +let Predicates = [HasStdExtY, HasStdExtA, NotCapMode] in { +let DecoderNamespace = "RVYOnly_" in { +defm LR_Y : LR_C_r_aq_rl<"", 0b100, "lr.y">; +defm SC_Y : AMO_C_rr_aq_rl<"", 0b00011, 0b100, "sc.y", GPR>; +defm AMOSWAP_Y : AMO_C_rr_aq_rl<"", 0b00001, 0b100, "amoswap.y", GPCR>; +} +defm LR_Y : LR_C_r_aq_rl_aliases<["lr.c"]>; +defm SC_Y : AMO_C_rr_aq_rl_aliases<["sc.c"], GPR>; +defm AMOSWAP_Y : AMO_C_rr_aq_rl_aliases<["amoswap.c"], GPCR>; +} // Predicates = [HasStdExtY, HasStdExtA, NotCapMode] + +//===----------------------------------------------------------------------===// +// Capability Mode Instructions +//===----------------------------------------------------------------------===// + +/// 'I' (Integer) base + +let Predicates = [HasStdExtY, IsCapMode] in { +def : InstAlias<"auipc $rd, $imm20", + (AUIPCC GPCR:$rd, uimm20_auipc:$imm20)>; +def : InstAlias<"call $rd, $func", + (PseudoCCALLReg GPCR:$rd, cap_call_symbol:$func)>; +def : InstAlias<"call $func", + (PseudoCCALL cap_call_symbol:$func)>; +def : InstAlias<"tail $dst", + (PseudoCTAIL cap_call_symbol:$dst)>; +def : InstAlias<"jump $target, $rd", + (PseudoCJump GPCR:$rd, pseudo_cap_jump_symbol:$target)>; +} + +let Predicates = [HasStdExtY, IsCapMode] in { +let DecoderNamespace = "RVYCapModeOnly_", + hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { +def CLY : RVInstI<0b100, OPC_MISC_MEM, (outs GPCR:$rd), + (ins GPCR:$rs1, simm12:$imm12), + "ly", "$rd, ${imm12}(${rs1})">; +} +foreach alias = ["ly", "lc", "clc"] in { +def : InstAlias; +def : InstAlias; +} +} // Predicates = [HasStdExtY, IsCapMode] + +let Predicates = [HasStdExtY, IsCapMode] in { +let DecoderNamespace = "RVYCapModeOnly_", hasSideEffects = 0, + mayLoad = 0, mayStore = 1 in +def CSY : RVInstS<0b100, OPC_STORE, (outs), + (ins GPCR:$rs2, GPCR:$rs1, simm12:$imm12), + "sy", "$rs2, ${imm12}(${rs1})">; +foreach alias = ["sy", "sc", "csc"] in { +def : InstAlias; +def : InstAlias; +} +} // Predicates = [HasStdExtY, IsCapMode] + + +/// 'A' (Atomic Instructions) extension + +let Predicates = [HasStdExtY, HasStdExtA, IsCapMode] in { +defm CLR_Y : CLR_C_r_aq_rl<"", 0b100, "lr.y", "RVYCapModeOnly_">; +defm CSC_Y : CAMO_C_rr_aq_rl<"", 0b00011, 0b100, "sc.y", GPR, "RVYCapModeOnly_">; +defm CAMOSWAP_Y : CAMO_C_rr_aq_rl<"", 0b00001, 0b100, "amoswap.y", GPCR, "RVYCapModeOnly_">; + +defm CLR_Y : LR_C_r_aq_rl_aliases<["lr.c"], GPCRMemZeroOffset>; +defm CSC_Y : AMO_C_rr_aq_rl_aliases<["sc.c"], GPR, GPCRMemZeroOffset>; +defm CAMOSWAP_Y : AMO_C_rr_aq_rl_aliases<["amoswap.c"], GPCR, GPCRMemZeroOffset>; +} + + +//===----------------------------------------------------------------------===// +// Pseudo-instructions and codegen patterns +//===----------------------------------------------------------------------===// +let Predicates = [HasStdExtY, IsCapMode] in { +def : InstAlias<"cadd $rd, $rs1, $rs2, $src", + (PseudoCIncOffsetTPRel GPCR:$rd, GPCR:$rs1, GPRNoX0:$rs2, tprel_add_symbol:$src), 0>; +def : InstAlias<"llc $dst, $src", (PseudoCLLC GPCR:$dst, bare_symbol:$src), 0>; +def : InstAlias<"lgc $dst, $src", (PseudoCLGC GPCR:$dst, bare_symbol:$src), 0>; +} + +class PatCapInspect + : Pat<(XLenVT (OpNode GPCR:$rs1)), (Inst GPCR:$rs1)>; +class PatZpGpcr + : Pat<(OpNode GPCR:$rs1), (Inst GPCR:$rs1)>; + +/// Capability-Inspection Instructions + +let Predicates = [HasStdExtY] in { +def : PatCapInspect; +def : PatCapInspect; +def : PatCapInspect; +def : PatCapInspect; +def : PatCapInspect; +def : PatCapInspect; +def : PatCapInspect; +let Predicates = [HasStdExtZYHybrid] in +def : PatCapInspect; + +def : Pat<(XLenVT (int_cheri_cap_offset_get GPCR:$cs1)), + (SUB (XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)), + (YBASER GPCR:$cs1))>; +def : Pat<(int_cheri_cap_offset_set GPCR:$cs1, (XLenVT GPR:$rs2)), + (ADDY (YADDRW GPCR:$cs1, (YBASER GPCR:$cs1)), + GPR:$rs2)>; +} // Predicates [HasStdExtY] + + +// Capability-Modification Instructions + +let Predicates = [HasStdExtY] in { +def : PatGpcrGpr; +def : PatGpcrGpr; +def : PatGpcrGpr; +def : PatZpGpcr; +def : PatGpcrGpcr; +def : PatGpcrGpr; +def : PatGpcrGpr; +def : Pat<(int_cheri_cap_bounds_set_exact GPCR:$rs1, csetbnd_imm:$imm), + (YBNDSIW GPCR:$rs1, csetbnd_imm:$imm)>; +def : PatGpcrGpr; +def : PatGpcrSimm12; + +def : PatGpcrGpr; +def : Pat<(int_cheri_bounded_stack_cap GPCR:$rs1, csetbnd_imm:$imm), + (YBNDSIW GPCR:$rs1, csetbnd_imm:$imm)>; +def : PatGpcrGpr; +def : Pat<(int_cheri_bounded_stack_cap_dynamic GPCR:$rs1, csetbnd_imm:$imm), + (YBNDSIW GPCR:$rs1, csetbnd_imm:$imm)>; +def : Pat<(CapFrameAddrRegImm GPCR:$rs1, simm12:$imm12), + (ADDIY GPCR:$rs1, simm12:$imm12)>; + +} // HasStdExtY + +let Predicates = [HasStdExtZYHybrid] in +def : PatGpcrGpr; + +/// Pointer-Arithmetic Instructions +let Predicates = [IsPureCapABI, HasStdExtY] in { +def : Pat<(inttoptr (XLenVT GPR:$rs2)), (ADDY C0, GPR:$rs2)>; +def : Pat<(inttoptr simm12:$imm12), (ADDIY C0, simm12:$imm12)>; +def : Pat<(XLenVT (ptrtoint GPCR:$rs1)), (PseudoCGetAddr GPCR:$rs1)>; +} + +/// Assertion Instructions + +let Predicates = [HasStdExtY] in { +def : PatGpcrGpcr; +def : PatGpcrGpcr; +} + +/// Special Capability Register Access Instructions + +let Predicates = [HasStdExtZYHybrid] in +def : Pat<(int_cheri_ddc_get), (YCSRRC CSR_DDC.Encoding, (XLenVT X0))>; + +let Predicates = [HasStdExtZYHybrid, NotCapMode] in +def : Pat<(int_cheri_pcc_get), (PseudoPCCGet)>; + +/// Adjusting to Compressed Capability Precision Instructions + +let Predicates = [HasStdExtY] in { +def : Pat<(XLenVT (int_cheri_round_representable_length (XLenVT GPR:$rs1))), + (AND (ADD GPR:$rs1, (XORI (YAMASK GPR:$rs1), -1)), (YAMASK GPR:$rs1))>; +def : PatGpr; +} + +/// Capability loads + +let Predicates = [HasStdExtZYHybrid, NotCapMode] in { +defm : CheriExplicitLdPat; +defm : CheriExplicitLdPat; +defm : CheriExplicitLdPat; +defm : CheriExplicitLdPat; +defm : CheriExplicitLdPat; +defm : CheriExplicitLdPat; +defm : CheriExplicitLdPat; +} + +let Predicates = [HasStdExtZYHybrid, NotCapMode, IsRV32] in { +defm : CheriExplicitLdPat; +} + +let Predicates = [HasStdExtZYHybrid, NotCapMode, IsRV64] in { +defm : CheriExplicitLdPat; +defm : CheriExplicitLdPat; +defm : CheriExplicitLdPat; +defm : CheriExplicitLdPat; +} + +let Predicates = [HasStdExtZYHybrid, NotCapMode, HasStdExtF] in +def : Pat<(load GPCR:$rs1), (FMV_W_X (PseudoLW_CAP GPCR:$rs1))>; + +let Predicates = [HasStdExtZYHybrid, NotCapMode, HasStdExtD, IsRV32] in +def : Pat<(load GPCR:$rs1), + (BuildPairF64Pseudo (PseudoLW_CAP GPCR:$rs1), + (PseudoLW_CAP (CIncOffsetImm GPCR:$rs1, 4)))>; + +let Predicates = [HasCheriOrRVY, NotCapMode, HasStdExtD, IsRV64] in +def : Pat<(load GPCR:$rs1), (FMV_D_X (PseudoLD_CAP GPCR:$rs1))>; + +/// Capability Stores + +let Predicates = [HasStdExtZYHybrid, NotCapMode] in { +defm : CheriExplicitStPat; +defm : CheriExplicitStPat; +defm : CheriExplicitStPat; +} + +let Predicates = [HasStdExtZYHybrid, NotCapMode, IsRV32] in { +defm : CheriExplicitStPat; +} + +let Predicates = [HasCheriOrRVY, NotCapMode, IsRV64] in { +defm : CheriExplicitStPat; +defm : CheriExplicitStPat; +} + +let Predicates = [HasCheriOrRVY, NotCapMode, HasStdExtF] in +def : Pat<(store FPR32:$rs2, GPCR:$rs1), + (PseudoSW_CAP (FMV_X_W FPR32:$rs2), GPCR:$rs1)>; + +let Predicates = [HasCheriOrRVY, NotCapMode, HasStdExtD, IsRV64] in +def : Pat<(store FPR64:$rs2, GPCR:$rs1), + (PseudoSD_CAP (FMV_X_D FPR64:$rs2), GPCR:$rs1)>; + +/// Memory-Access Instructions + +let Predicates = [HasStdExtY, NotCapMode] in { +def : LdPat; +def : StPat; +def : LdPat; +def : AtomicStPat; +defm : AMOCapPat<"", "atomic_swap_cap", "AMOSWAP_Y">; +} + +let Predicates = [HasStdExtY, HasStdExtA, IsCapMode] in { +defm : CheriLdPat; +defm : CheriAtomicStPat; +defm : CheriAMOCapPat<"","atomic_swap_cap", "CAMOSWAP_Y">; +} + +let Predicates = [HasStdExtY, IsCapMode] in { +defm : CheriLdPat; +defm : CheriStPat; +} + +//===----------------------------------------------------------------------===// +// Compress Instruction tablegen backend. +//===----------------------------------------------------------------------===// + +// Quadrant 0 +let Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsCapMode] in { +def : CompressPat<(ADDIY GPCRC:$rd, CSP:$rs1, uimm10_lsb00nonzero:$imm), + (C_CIncOffsetImm4CSPN GPCRC:$rd, CSP:$rs1, uimm10_lsb00nonzero:$imm)>; +} // Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsCapMode] + +let Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { +def : CompressPat<(CLY GPCRC:$rd, GPCRC:$rs1, uimm9_lsb0000:$imm), + (C_CLC_128 GPCRC:$rd, GPCRC:$rs1, uimm9_lsb0000:$imm)>; +} // Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] + +let Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in { +def : CompressPat<(CLY GPCRC:$rd, GPCRC:$rs1, uimm8_lsb000:$imm), + (C_CLC_64 GPCRC:$rd, GPCRC:$rs1, uimm8_lsb000:$imm)>; +} // Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] + +let Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { +def : CompressPat<(CSY GPCRC:$rs2, GPCRC:$rs1, uimm9_lsb0000:$imm), + (C_CSC_128 GPCRC:$rs2, GPCRC:$rs1, uimm9_lsb0000:$imm)>; +} // Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] + +let Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in { +def : CompressPat<(CSY GPCRC:$rs2, GPCRC:$rs1, uimm8_lsb000:$imm), + (C_CSC_64 GPCRC:$rs2, GPCRC:$rs1, uimm8_lsb000:$imm)>; +} // Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] + +// Quadrant 1 +let Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsCapMode] in { +def : CompressPat<(ADDIY C2, C2, simm10_lsb0000nonzero:$imm), + (C_CIncOffsetImm16CSP C2, simm10_lsb0000nonzero:$imm)>; +} // Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsCapMode] + +// Quadrant 2 +let Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { +def : CompressPat<(CLY GPCRNoC0:$rd, CSP:$rs1, uimm10_lsb0000:$imm), + (C_CLCCSP_128 GPCRNoC0:$rd, CSP:$rs1, uimm10_lsb0000:$imm)>; +} // Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] + +let Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in { +def : CompressPat<(CLY GPCRNoC0:$rd, CSP:$rs1, uimm9_lsb000:$imm), + (C_CLCCSP_64 GPCRNoC0:$rd, CSP:$rs1, uimm9_lsb000:$imm)>; +} // Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] + +let Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] in { +def : CompressPat<(CSY GPCR:$rs2, CSP:$rs1, uimm10_lsb0000:$imm), + (C_CSCCSP_128 GPCR:$rs2, CSP:$rs1, uimm10_lsb0000:$imm)>; +} // Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV64, IsCapMode] + +let Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] in { +def : CompressPat<(CSY GPCR:$rs2, CSP:$rs1, uimm9_lsb000:$imm), + (C_CSCCSP_64 GPCR:$rs2, CSP:$rs1, uimm9_lsb000:$imm)>; +} // Predicates = [HasStdExtY, HasCheriRVC, HasStdExtC, IsRV32, IsCapMode] diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index fda7b9b97c31a..0b0181c2a6055 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -65,13 +65,15 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_NoRegs_SaveList; if (MF->getFunction().hasFnAttribute("interrupt")) { if (Subtarget.hasStdExtD()) - return Subtarget.hasCheri() ? CSR_XLEN_CLEN_F64_Interrupt_SaveList - : CSR_XLEN_F64_Interrupt_SaveList; + return Subtarget.hasCheriOrStdExtY() + ? CSR_XLEN_CLEN_F64_Interrupt_SaveList + : CSR_XLEN_F64_Interrupt_SaveList; if (Subtarget.hasStdExtF()) - return Subtarget.hasCheri() ? CSR_XLEN_CLEN_F32_Interrupt_SaveList - : CSR_XLEN_F32_Interrupt_SaveList; - return Subtarget.hasCheri() ? CSR_XLEN_CLEN_Interrupt_SaveList - : CSR_Interrupt_SaveList; + return Subtarget.hasCheriOrStdExtY() + ? CSR_XLEN_CLEN_F32_Interrupt_SaveList + : CSR_XLEN_F32_Interrupt_SaveList; + return Subtarget.hasCheriOrStdExtY() ? CSR_XLEN_CLEN_Interrupt_SaveList + : CSR_Interrupt_SaveList; } switch (Subtarget.getTargetABI()) { @@ -214,9 +216,10 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, unsigned Opc; unsigned OpcImm; const bool IsPureCapABI = RISCVABI::isCheriPureCapABI(ST.getTargetABI()); + const bool HasRVY = ST.hasFeature(RISCV::FeatureStdExtY); if (IsPureCapABI) { - Opc = RISCV::CIncOffset; - OpcImm = RISCV::CIncOffsetImm; + Opc = HasRVY ? RISCV::ADDY : RISCV::CIncOffset; + OpcImm = HasRVY ? RISCV::ADDIY : RISCV::CIncOffsetImm; } else { Opc = RISCV::ADD; OpcImm = RISCV::ADDI; @@ -482,7 +485,9 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (Offset.getScalable() || Offset.getFixed()) { Register DestReg; - if (MI.getOpcode() == RISCV::ADDI || MI.getOpcode() == RISCV::CIncOffsetImm) + if (MI.getOpcode() == RISCV::ADDI || + MI.getOpcode() == RISCV::CIncOffsetImm || + MI.getOpcode() == RISCV::ADDIY) DestReg = MI.getOperand(0).getReg(); else if (RISCVABI::isCheriPureCapABI(ST.getTargetABI())) DestReg = MRI.createVirtualRegister(&RISCV::GPCRRegClass); @@ -633,8 +638,9 @@ Register RISCVRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned Opc; Register BaseReg; + const bool HasRVY = ST.hasFeature(RISCV::FeatureStdExtY); if (RISCVABI::isCheriPureCapABI(ST.getTargetABI())) { - Opc = RISCV::CIncOffsetImm; + Opc = HasRVY ? RISCV::ADDIY : RISCV::CIncOffsetImm; BaseReg = MFI.createVirtualRegister(&RISCV::GPCRRegClass); } else { Opc = RISCV::ADDI; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 733f6dd2229dc..11da51223d66c 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -128,6 +128,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { return HasStdExtZfh || HasStdExtZfhmin || HasStdExtZfbfmin || HasStdExtZvfbfwma; } + bool hasCheriOrStdExtY() const { return HasStdExtY || HasCheri; } bool is64Bit() const { return IsRV64; } MVT getXLenVT() const { return XLenVT; } unsigned getXLen() const { return XLen; } @@ -165,7 +166,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { return UserReservedRegister[i]; } MVT typeForCapabilities() const { - assert(HasCheri && "Cannot get capability type for non-CHERI"); + assert(hasCheriOrStdExtY() && "Cannot get capability type for non-CHERI"); return is64Bit() ? MVT::c128 : MVT::c64; } diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td index 5a878ec98c020..f3989a61564c9 100644 --- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td +++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td @@ -35,6 +35,7 @@ class SysReg op> { // bits<6> Number = op{5 - 0}; code FeaturesRequired = [{ {} }]; bit isRV32Only = 0; + bit isDisabledInCapMode = 0; } def SysRegsList : GenericTable { @@ -42,7 +43,7 @@ def SysRegsList : GenericTable { // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed. let Fields = [ "Name", "DeprecatedName", "Encoding", "FeaturesRequired", - "isRV32Only", + "isRV32Only", "isDisabledInCapMode" ]; let PrimaryKey = [ "Encoding" ]; @@ -66,7 +67,7 @@ def SiFiveRegsList : GenericTable { // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed. let Fields = [ "Name", "DeprecatedName", "Encoding", "FeaturesRequired", - "isRV32Only", + "isRV32Only", "isDisabledInCapMode" ]; let PrimaryKey = [ "Encoding" ]; @@ -121,6 +122,7 @@ foreach i = 3...31 in //===----------------------------------------------------------------------===// def : SysReg<"sstatus", 0x100>; def : SysReg<"sie", 0x104>; +let isDisabledInCapMode = 1 in def : SysReg<"stvec", 0x105>; def : SysReg<"scounteren", 0x106>; def : SysReg<"stimecmp", 0x14D>; @@ -136,8 +138,10 @@ def : SysReg<"senvcfg", 0x10A>; //===----------------------------------------------------------------------===// // Supervisor Trap Handling //===----------------------------------------------------------------------===// +let isDisabledInCapMode = 1 in { def : SysReg<"sscratch", 0x140>; def : SysReg<"sepc", 0x141>; +} // isDisabledInCapMode = 1 def : SysReg<"scause", 0x142>; let DeprecatedName = "sbadaddr" in def : SysReg<"stval", 0x143>; @@ -216,9 +220,11 @@ def : SysReg<"htimedeltah", 0x615>; def : SysReg<"vsstatus", 0x200>; def : SysReg<"vsie", 0x204>; +let isDisabledInCapMode = 1 in { def : SysReg<"vstvec", 0x205>; def : SysReg<"vsscratch", 0x240>; def : SysReg<"vsepc", 0x241>; +} def : SysReg<"vscause", 0x242>; def : SysReg<"vstval", 0x243>; def : SysReg<"vsip", 0x244>; @@ -245,6 +251,7 @@ def : SysReg<"misa", 0x301>; def : SysReg<"medeleg", 0x302>; def : SysReg<"mideleg", 0x303>; def : SysReg<"mie", 0x304>; +let isDisabledInCapMode = 1 in def : SysReg<"mtvec", 0x305>; def : SysReg<"mcounteren", 0x306>; let isRV32Only = 1 in @@ -253,8 +260,10 @@ def : SysReg<"mstatush", 0x310>; //===----------------------------------------------------------------------===// // Machine Trap Handling //===----------------------------------------------------------------------===// +let isDisabledInCapMode = 1 in { def : SysReg<"mscratch", 0x340>; def : SysReg<"mepc", 0x341>; +} // isDisabledInCapMode = 1 def : SysReg<"mcause", 0x342>; let DeprecatedName = "mbadaddr" in def : SysReg<"mtval", 0x343>; @@ -349,13 +358,16 @@ def : SysReg<"mcontext", 0x7A8>; // Debug Mode Registers //===----------------------------------------------------------------------===// def : SysReg<"dcsr", 0x7B0>; +let isDisabledInCapMode = 1 in def : SysReg<"dpc", 0x7B1>; // "dscratch" is an alternative name for "dscratch0" which appeared in earlier // drafts of the RISC-V debug spec +let isDisabledInCapMode = 1 in { let DeprecatedName = "dscratch" in def : SysReg<"dscratch0", 0x7B2>; def : SysReg<"dscratch1", 0x7B3>; +} // isDisabledInCapMode = 1 //===----------------------------------------------------------------------===// // User Vector CSRs @@ -450,3 +462,4 @@ def : SysReg<"jvt", 0x017>; //===----------------------------------------------- include "RISCVSystemOperandsXCheri.td" +include "RISCVSystemOperandsY.td" diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperandsY.td b/llvm/lib/Target/RISCV/RISCVSystemOperandsY.td new file mode 100644 index 0000000000000..ff8acea5666a6 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSystemOperandsY.td @@ -0,0 +1,90 @@ +//===- RISCVSystemOperandsY.td -----------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the symbolic operands permitted for various kinds of +// CHERI RISC-V system instruction. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// CHERI CSR (control and status register read/write) instruction options. +//===----------------------------------------------------------------------===// + + +class CheriSysReg op> { + string Name = name; + bits<12> Encoding = op; +} + +def CheriSysRegsList : GenericTable { + let FilterClass = "CheriSysReg"; + let Fields = [ + "Name", "Encoding" + ]; + let PrimaryKey = [ "Encoding" ]; + let PrimaryKeyName = "lookupCheriSysRegByEncoding"; +} + +def lookupCheriSysRegByName : SearchIndex { + let Table = CheriSysRegsList; + let Key = [ "Name" ]; +} + +//===----------------------------------------------------------------------===// +// Debug-mode Cheri CSRs +//===----------------------------------------------------------------------===// +def : CheriSysReg<"dpcc", 0x7b1>; +def : CheriSysReg<"dscratch0c", 0x7b2>; +def : CheriSysReg<"dscratch1c", 0x7b3>; +def : CheriSysReg<"dinfc", 0x7bd>; + +//===----------------------------------------------------------------------===// +// Machine-mode Cheri CSRs +//===----------------------------------------------------------------------===// +def : CheriSysReg<"mtvecc", 0x305>; +def : CheriSysReg<"mscratchc", 0x340>; +def : CheriSysReg<"mepcc", 0x341>; +def : CheriSysReg<"mtidc", 0x780>; + +//===----------------------------------------------------------------------===// +// Supervisor-mode Cheri CSRs +//===----------------------------------------------------------------------===// +def : CheriSysReg<"stvecc", 0x105>; +def : CheriSysReg<"sscratchc", 0x140>; +def : CheriSysReg<"sepcc", 0x141>; +def : SysReg<"stval2", 0x14b>; +def : CheriSysReg<"stidc", 0x580>; + +//===----------------------------------------------------------------------===// +// User-mode Cheri CSRs +//===----------------------------------------------------------------------===// +def : CheriSysReg<"jvtc", 0x017>; +def : CheriSysReg<"utidc", 0x480>; + +//===----------------------------------------------------------------------===// +// Zyhybrid CSRs +//===----------------------------------------------------------------------===// +def : CheriSysReg<"dddc", 0x7bc>; +def : CheriSysReg<"mtdc", 0x74c>; +def : CheriSysReg<"stdc", 0x163>; +def CSR_DDC : CheriSysReg<"ddc" , 0x416>; + +//===----------------------------------------------------------------------===// +// Hypervisor Trap Handling +//===----------------------------------------------------------------------===// +def : SysReg<"htval2", 0x64b>; + +//===----------------------------------------------------------------------===// +// Virtual Supervisor Registers +//===----------------------------------------------------------------------===// +def : CheriSysReg<"vstvecc", 0x205>; +def : CheriSysReg<"vsscratchc", 0x240>; +def : CheriSysReg<"vsepcc", 0x241>; +def : CheriSysReg<"vstdc", 0x245>; +def : SysReg<"vstval2", 0x24b>; +def : CheriSysReg<"vstidc", 0xa80>; diff --git a/llvm/test/MC/RISCV/cheri/rv32cxcheri-cap-mode-invalid.s b/llvm/test/MC/RISCV/cheri/rv32cxcheri-cap-mode-invalid.s index 1788467d1a5b2..12d393ebb7c93 100644 --- a/llvm/test/MC/RISCV/cheri/rv32cxcheri-cap-mode-invalid.s +++ b/llvm/test/MC/RISCV/cheri/rv32cxcheri-cap-mode-invalid.s @@ -62,4 +62,4 @@ c.sc a5, 16(ca3) c.sc ca5, 16(a3) # CHECK: :[[#@LINE-1]]:14: error: invalid operand for instruction -# CHECK-RV32-C: {{.*}} \ No newline at end of file +# CHECK-RV32-C: {{.*}} From 8ac1cd81c24e07d527f9af33066d8692b4a9356b Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Thu, 21 Aug 2025 15:10:53 +0100 Subject: [PATCH 10/13] [RISCV] Emit no/capmode directives around ymodesw instructions --- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 51 ++++++++++++++++++++--- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index b787ddf7ca0b1..7cba35e3a6e36 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -91,6 +91,24 @@ class RISCVAsmPrinter : public AsmPrinter { void emitNTLHint(const MachineInstr *MI); bool lowerToMCInst(const MachineInstr *MI, MCInst &OutMI); + + void emitCheriModeSwitchAnnotations(const MachineInstr *MI); + + MCSubtargetInfo ©STI(); + + void setFeatureBits(uint64_t Feature) { + if (!STI->hasFeature(Feature)) { + MCSubtargetInfo &STI = copySTI(); + STI.ToggleFeature(Feature); + } + } + + void clearFeatureBits(uint64_t Feature) { + if (STI->hasFeature(Feature)) { + MCSubtargetInfo &STI = copySTI(); + STI.ToggleFeature(Feature); + } + } }; } @@ -139,10 +157,10 @@ void RISCVAsmPrinter::emitNTLHint(const MachineInstr *MI) { } void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) { - RISCV_MC::verifyInstructionPredicates(MI->getOpcode(), - getSubtargetInfo().getFeatureBits()); + RISCV_MC::verifyInstructionPredicates(MI->getOpcode(), STI->getFeatureBits()); emitNTLHint(MI); + emitCheriModeSwitchAnnotations(MI); // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(*OutStreamer, MI)) @@ -283,10 +301,9 @@ void RISCVAsmPrinter::emitFunctionEntryLabel() { RTS.emitDirectiveVariantCC(*CurrentFnSym); } AsmPrinter::emitFunctionEntryLabel(); - auto &Subtarget = MF->getSubtarget(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - if (RISCVABI::isCheriPureCapABI(Subtarget.getTargetABI()) && - MJTI && !MJTI->isEmpty()) { + if (RISCVABI::isCheriPureCapABI(STI->getTargetABI()) && MJTI && + !MJTI->isEmpty()) { MCSymbol *Sym = getSymbolWithGlobalValueBase(&MF->getFunction(), "$jump_table_base"); OutStreamer->emitLabel(Sym); @@ -864,3 +881,27 @@ bool RISCVAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { } return false; } + +void RISCVAsmPrinter::emitCheriModeSwitchAnnotations(const MachineInstr *MI) { + auto &RTS = + static_cast(*OutStreamer->getTargetStreamer()); + switch (MI->getOpcode()) { + default: + break; + case RISCV::YMODESWY: + RTS.emitDirectiveOptionCapMode(); + setFeatureBits(RISCV::FeatureCapMode); + break; + case RISCV::YMODESWI: + RTS.emitDirectiveOptionNoCapMode(); + clearFeatureBits(RISCV::FeatureCapMode); + break; + } +} + +MCSubtargetInfo &RISCVAsmPrinter::copySTI() { + RISCVSubtarget &STICopy = *static_cast( + &OutStreamer->getContext().getSubtargetCopy(*STI)); + STI = &STICopy; + return STICopy; +} From 8fdd55821da72eadcc1c57e832a7f2c43b35a1d3 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Tue, 19 Aug 2025 15:19:14 +0100 Subject: [PATCH 11/13] [RISCV] Adds MC tests for rvy instructions --- .../test/MC/RISCV/cheri/rvy/compress-rv32yi.s | 46 ++++ .../test/MC/RISCV/cheri/rvy/compress-rv64yi.s | 46 ++++ .../RISCV/cheri/rvy/rv32ay-cap-mode-valid.s | 88 ++++++ llvm/test/MC/RISCV/cheri/rvy/rv32ay-valid.s | 85 ++++++ .../MC/RISCV/cheri/rvy/rv32y-cap-mode-valid.s | 50 ++++ llvm/test/MC/RISCV/cheri/rvy/rv32y-valid.s | 258 ++++++++++++++++++ .../MC/RISCV/cheri/rvy/rv32zyhybrid-valid.s | 49 ++++ .../RISCV/cheri/rvy/rv64ya-cap-mode-valid.s | 191 +++++++++++++ .../MC/RISCV/cheri/rvy/zyhybrid-invalid.s | 29 ++ 9 files changed, 842 insertions(+) create mode 100644 llvm/test/MC/RISCV/cheri/rvy/compress-rv32yi.s create mode 100644 llvm/test/MC/RISCV/cheri/rvy/compress-rv64yi.s create mode 100644 llvm/test/MC/RISCV/cheri/rvy/rv32ay-cap-mode-valid.s create mode 100644 llvm/test/MC/RISCV/cheri/rvy/rv32ay-valid.s create mode 100644 llvm/test/MC/RISCV/cheri/rvy/rv32y-cap-mode-valid.s create mode 100644 llvm/test/MC/RISCV/cheri/rvy/rv32y-valid.s create mode 100644 llvm/test/MC/RISCV/cheri/rvy/rv32zyhybrid-valid.s create mode 100644 llvm/test/MC/RISCV/cheri/rvy/rv64ya-cap-mode-valid.s create mode 100644 llvm/test/MC/RISCV/cheri/rvy/zyhybrid-invalid.s diff --git a/llvm/test/MC/RISCV/cheri/rvy/compress-rv32yi.s b/llvm/test/MC/RISCV/cheri/rvy/compress-rv32yi.s new file mode 100644 index 0000000000000..8b9a054b3ce8f --- /dev/null +++ b/llvm/test/MC/RISCV/cheri/rvy/compress-rv32yi.s @@ -0,0 +1,46 @@ +# RUN: llvm-mc -triple riscv32 -mattr=+c,+y,+cap-mode -show-encoding < %s \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-ALIAS %s +# RUN: llvm-mc -triple riscv32 -mattr=+c,+y,+cap-mode -show-encoding \ +# RUN: -riscv-no-aliases < %s | FileCheck -check-prefixes=CHECK,CHECK-INST %s +# RUN: llvm-mc -triple riscv32 -mattr=+c,+y,+cap-mode -filetype=obj < %s \ +# RUN: | llvm-objdump --triple=riscv32 --mattr=+c,+y,+cap-mode,-xcheri -d - \ +# RUN: | FileCheck -check-prefixes=CHECK-BYTES,CHECK-ALIAS %s +# RUN: llvm-mc -triple riscv32 -mattr=+c,+y,+cap-mode -filetype=obj < %s \ +# RUN: | llvm-objdump --triple=riscv32 --mattr=+c,+y,+cap-mode -d -M no-aliases - \ +# RUN: | FileCheck -check-prefixes=CHECK-BYTES,CHECK-INST %s + +# CHECK-BYTES: e0 1f +# CHECK-ALIAS: addiy cs0, csp, 1020 +# CHECK-INST: c.cincoffset4cspn cs0, csp, 1020 +# CHECK: # encoding: [0xe0,0x1f] +addiy cs0, csp, 1020 + +# CHECK-BYTES: a0 7b +# CHECK-ALIAS: ly cs0, 112(ca5) +# CHECK-INST: c.lc +# CHECK64: # encoding: [0xa0,0x7b] +ly cs0, 112(ca5) + +# CHECK-BYTES: a0 fb +# CHECK-ALIAS: sy cs0, 112(ca5) +# CHECK-INST: c.sc +# CHECK: # encoding: [0xa0,0xfb] +sy cs0, 112(ca5) + +# CHECK-BYTES: 39 71 +# CHECK-ALIAS: addiy csp, csp, -64 +# CHECK-INST: c.cincoffset16csp csp, -64 +# CHECK: # encoding: [0x39,0x71] +addiy csp, csp, -64 + +# CHECK-BYTES: ce 70 +# CHECK-ALIAS: ly cra, 240(csp) +# CHECK-INST: c.lcsp +# CHECK: # encoding: [0xce,0x70] +ly cra, 240(csp) + +# CHECK-BYTES: 86 f9 +# CHECK-ALIAS: sy cra, 240(csp) +# CHECK-INST: c.scsp +# CHECK: # encoding: [0x86,0xf9] +sc cra, 240(csp) diff --git a/llvm/test/MC/RISCV/cheri/rvy/compress-rv64yi.s b/llvm/test/MC/RISCV/cheri/rvy/compress-rv64yi.s new file mode 100644 index 0000000000000..648b7da755e0d --- /dev/null +++ b/llvm/test/MC/RISCV/cheri/rvy/compress-rv64yi.s @@ -0,0 +1,46 @@ +# RUN: llvm-mc -triple riscv64 -mattr=+c,+y,+cap-mode -show-encoding < %s \ +# RUN: | FileCheck -check-prefixes=CHECK-ALIAS %s +# RUN: llvm-mc -triple riscv64 -mattr=+c,+y,+cap-mode -show-encoding \ +# RUN: -riscv-no-aliases < %s | FileCheck -check-prefixes=CHECK-INST %s +# RUN: llvm-mc -triple riscv64 -mattr=+c,+y,+cap-mode -filetype=obj < %s \ +# RUN: | llvm-objdump --triple=riscv64 --mattr=+c,+y,+cap-mode,-xcheri -d - \ +# RUN: | FileCheck -check-prefixes=CHECK-BYTES,CHECK-ALIAS %s +# RUN: llvm-mc -triple riscv64 -mattr=+c,+y,+cap-mode -filetype=obj < %s \ +# RUN: | llvm-objdump --triple=riscv64 --mattr=+c,+y,+cap-mode -d -M no-aliases - \ +# RUN: | FileCheck -check-prefixes=CHECK-BYTES,CHECK-INST %s + +# CHECK-BYTES: e0 1f +# CHECK-ALIAS: addiy cs0, csp, 1020 +# CHECK-INST: c.cincoffset4cspn cs0, csp, 1020 +# CHECK: # encoding: [0xe0,0x1f] +addiy cs0, csp, 1020 + +# CHECK-BYTES: a0 3b +# CHECK-ALIAS: ly cs0, 112(ca5) +# CHECK-INST: c.lc +# CHECK64: # encoding: [0xa0,0x7b] +ly cs0, 112(ca5) + +# CHECK-BYTES: a0 bb +# CHECK-ALIAS: sy cs0, 112(ca5) +# CHECK-INST: c.sc +# CHECK: # encoding: [0xa0,0xfb] +sy cs0, 112(ca5) + +# CHECK-BYTES: 39 71 +# CHECK-ALIAS: addiy csp, csp, -64 +# CHECK-INST: c.cincoffset16csp csp, -64 +# CHECK: # encoding: [0x39,0x71] +addiy csp, csp, -64 + +# CHECK-BYTES: ce 30 +# CHECK-ALIAS: ly cra, 240(csp) +# CHECK-INST: c.lcsp +# CHECK: # encoding: [0xce,0x70] +ly cra, 240(csp) + +# CHECK-BYTES: 86 b9 +# CHECK-ALIAS: sy cra, 240(csp) +# CHECK-INST: c.scsp +# CHECK: # encoding: [0x86,0xf9] +sc cra, 240(csp) diff --git a/llvm/test/MC/RISCV/cheri/rvy/rv32ay-cap-mode-valid.s b/llvm/test/MC/RISCV/cheri/rvy/rv32ay-cap-mode-valid.s new file mode 100644 index 0000000000000..ba5f12b7dcd4c --- /dev/null +++ b/llvm/test/MC/RISCV/cheri/rvy/rv32ay-cap-mode-valid.s @@ -0,0 +1,88 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+y,+zyhybrid,+cap-mode -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+y,+zyhybrid,+cap-mode < %s \ +# RUN: | llvm-objdump --mattr=+a,+y,+zyhybrid,+cap-mode -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s + +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+y,+zyhybrid,+cap-mode -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+y,+zyhybrid,+cap-mode < %s \ +# RUN: | llvm-objdump --mattr=+a,+y,+zyhybrid,+cap-mode -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s + +# Tests instructions available in purecap + A, rv32 and rv64. + +# CHECK-ASM-AND-OBJ: lr.y ca1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0x05,0x10] +lr.y ca1, 0(ca0) +# CHECK-ASM-AND-OBJ: lr.y.aq ca1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0x05,0x14] +lr.y.aq ca1, 0(ca0) +# CHECK-ASM-AND-OBJ: lr.y.rl ca1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0x05,0x12] +lr.y.rl ca1, 0(ca0) +# CHECK-ASM-AND-OBJ: lr.y.aqrl ca1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0x05,0x16] +lr.y.aqrl ca1, 0(ca0) +# CHECK-ASM-AND-OBJ: lr.y ca1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0x05,0x10] +lr.c ca1, 0(ca0) +# CHECK-ASM-AND-OBJ: lr.y.aq ca1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0x05,0x14] +lr.c.aq ca1, 0(ca0) +# CHECK-ASM-AND-OBJ: lr.y.rl ca1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0x05,0x12] +lr.c.rl ca1, 0(ca0) +# CHECK-ASM-AND-OBJ: lr.y.aqrl ca1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0x05,0x16] +lr.c.aqrl ca1, 0(ca0) + +# CHECK-ASM-AND-OBJ: amoswap.y ca1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x08] +amoswap.y ca1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: amoswap.y.aq ca1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x0c] +amoswap.y.aq ca1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: amoswap.y.rl ca1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x0a] +amoswap.y.rl ca1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: amoswap.y.aqrl ca1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x0e] +amoswap.y.aqrl ca1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: amoswap.y ca1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x08] +amoswap.c ca1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: amoswap.y.aq ca1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x0c] +amoswap.c.aq ca1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: amoswap.y.rl ca1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x0a] +amoswap.c.rl ca1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: amoswap.y.aqrl ca1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x0e] +amoswap.c.aqrl ca1, ca2, 0(ca0) + +# CHECK-ASM-AND-OBJ: sc.y a1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x18] +sc.y a1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: sc.y.aq a1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x1c] +sc.y.aq a1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: sc.y.rl a1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x1a] +sc.y.rl a1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: sc.y.aqrl a1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x1e] +sc.y.aqrl a1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: sc.y a1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x18] +sc.c a1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: sc.y.aq a1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x1c] +sc.c.aq a1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: sc.y.rl a1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x1a] +sc.c.rl a1, ca2, 0(ca0) +# CHECK-ASM-AND-OBJ: sc.y.aqrl a1, ca2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x45,0xc5,0x1e] +sc.c.aqrl a1, ca2, 0(ca0) diff --git a/llvm/test/MC/RISCV/cheri/rvy/rv32ay-valid.s b/llvm/test/MC/RISCV/cheri/rvy/rv32ay-valid.s new file mode 100644 index 0000000000000..029b42777cc3e --- /dev/null +++ b/llvm/test/MC/RISCV/cheri/rvy/rv32ay-valid.s @@ -0,0 +1,85 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+y -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+y -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+a,+y < %s \ +# RUN: | llvm-objdump -M no-aliases --mattr=+a,+y -d - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+a,+y < %s \ +# RUN: | llvm-objdump -M no-aliases --mattr=+a,+y -d - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: lr.y ct0, (t1) +# CHECK-ASM: encoding: [0xaf,0x42,0x03,0x10] +lr.y ct0, (t1) +# CHECK-ASM-AND-OBJ: lr.y ct0, (t1) +# CHECK-ASM: encoding: [0xaf,0x42,0x03,0x10] +lr.c ct0, (t1) +# CHECK-ASM-AND-OBJ: lr.y.aq ct0, (t1) +# CHECK-ASM: encoding: [0xaf,0x42,0x03,0x14] +lr.y.aq ct0, (t1) +# CHECK-ASM-AND-OBJ: lr.y.aq ct0, (t1) +# CHECK-ASM: encoding: [0xaf,0x42,0x03,0x14] +lr.c.aq ct0, (t1) +# CHECK-ASM-AND-OBJ: lr.y.rl ct0, (t1) +# CHECK-ASM: encoding: [0xaf,0x42,0x03,0x12] +lr.y.rl ct0, (t1) +# CHECK-ASM-AND-OBJ: lr.y.rl ct0, (t1) +# CHECK-ASM: encoding: [0xaf,0x42,0x03,0x12] +lr.c.rl ct0, (t1) +# CHECK-ASM-AND-OBJ: lr.y.aqrl ct0, (t1) +# CHECK-ASM: encoding: [0xaf,0x42,0x03,0x16] +lr.y.aqrl ct0, (t1) +# CHECK-ASM-AND-OBJ: lr.y.aqrl ct0, (t1) +# CHECK-ASM: encoding: [0xaf,0x42,0x03,0x16] +lr.c.aqrl ct0, (t1) + +# CHECK-ASM-AND-OBJ: sc.y t0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x18] +sc.y t0, ct1, (t2) +# CHECK-ASM-AND-OBJ: sc.y t0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x18] +sc.c t0, ct1, (t2) +# CHECK-ASM-AND-OBJ: sc.y.aq t0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x1c] +sc.y.aq t0, ct1, (t2) +# CHECK-ASM-AND-OBJ: sc.y.aq t0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x1c] +sc.c.aq t0, ct1, (t2) +# CHECK-ASM-AND-OBJ: sc.y.rl t0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x1a] +sc.y.rl t0, ct1, (t2) +# CHECK-ASM-AND-OBJ: sc.y.rl t0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x1a] +sc.c.rl t0, ct1, (t2) +# CHECK-ASM-AND-OBJ: sc.y.aqrl t0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x1e] +sc.y.aqrl t0, ct1, (t2) +# CHECK-ASM-AND-OBJ: sc.y.aqrl t0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x1e] +sc.c.aqrl t0, ct1, (t2) + +# CHECK-ASM-AND-OBJ: amoswap.y ct0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x08] +amoswap.y ct0, ct1, (t2) +# CHECK-ASM-AND-OBJ: amoswap.y ct0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x08] +amoswap.c ct0, ct1, (t2) +# CHECK-ASM-AND-OBJ: amoswap.y.aq ct0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x0c] +amoswap.y.aq ct0, ct1, (t2) +# CHECK-ASM-AND-OBJ: amoswap.y.aq ct0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x0c] +amoswap.c.aq ct0, ct1, (t2) +# CHECK-ASM-AND-OBJ: amoswap.y.rl ct0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x0a] +amoswap.y.rl ct0, ct1, (t2) +# CHECK-ASM-AND-OBJ: amoswap.y.rl ct0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x0a] +amoswap.c.rl ct0, ct1, (t2) +# CHECK-ASM-AND-OBJ: amoswap.y.aqrl ct0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x0e] +amoswap.y.aqrl ct0, ct1, (t2) +# CHECK-ASM-AND-OBJ: amoswap.y.aqrl ct0, ct1, (t2) +# CHECK-ASM: encoding: [0xaf,0xc2,0x63,0x0e] +amoswap.c.aqrl ct0, ct1, (t2) diff --git a/llvm/test/MC/RISCV/cheri/rvy/rv32y-cap-mode-valid.s b/llvm/test/MC/RISCV/cheri/rvy/rv32y-cap-mode-valid.s new file mode 100644 index 0000000000000..0e1f88b72c8b2 --- /dev/null +++ b/llvm/test/MC/RISCV/cheri/rvy/rv32y-cap-mode-valid.s @@ -0,0 +1,50 @@ +# RUN: llvm-mc -triple=riscv32 -mattr=+y,+cap-mode -riscv-no-aliases -show-encoding < %s \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -triple=riscv64 -mattr=+y,+cap-mode -riscv-no-aliases -show-encoding < %s \ +# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+y,+cap-mode < %s \ +# RUN: | llvm-objdump -M no-aliases --mattr=+y,+cap-mode -d - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+y,+cap-mode < %s \ +# RUN: | llvm-objdump -M no-aliases --mattr=+y,+cap-mode -d - \ +# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: ly ca2, 17(ca0) +# CHECK-ASM-SAME: encoding: [0x0f,0x46,0x15,0x01] +ly ca2, 17(ca0) +# CHECK-ASM-AND-OBJ: ly ca2, 17(ca0) +# CHECK-ASM-SAME: encoding: [0x0f,0x46,0x15,0x01] +lc ca2, 17(ca0) +# CHECK-ASM-AND-OBJ: ly ca2, 17(ca0) +# CHECK-ASM-SAME: encoding: [0x0f,0x46,0x15,0x01] +clc ca2, 17(ca0) + +# CHECK-ASM-AND-OBJ: ly ca2, 0(ca0) +# CHECK-ASM-SAME: encoding: [0x0f,0x46,0x05,0x00] +ly ca2, (ca0) +# CHECK-ASM-AND-OBJ: ly ca2, 0(ca0) +# CHECK-ASM-SAME: encoding: [0x0f,0x46,0x05,0x00] +lc ca2, (ca0) +# CHECK-ASM-AND-OBJ: ly ca2, 0(ca0) +# CHECK-ASM-SAME: encoding: [0x0f,0x46,0x05,0x00] +clc ca2, (ca0) + +# CHECK-ASM-AND-OBJ: sy ca5, 25(ca3) +# CHECK-ASM-SAME: encoding: [0xa3,0xcc,0xf6,0x00] +sy ca5, 25(ca3) +# CHECK-ASM-AND-OBJ: sy ca5, 25(ca3) +# CHECK-ASM-SAME: encoding: [0xa3,0xcc,0xf6,0x00] +sc ca5, 25(ca3) +# CHECK-ASM-AND-OBJ: sy ca5, 25(ca3) +# CHECK-ASM-SAME: encoding: [0xa3,0xcc,0xf6,0x00] +csc ca5, 25(ca3) + +# CHECK-ASM-AND-OBJ: sy ca5, 0(ca3) +# CHECK-ASM-SAME: encoding: [0x23,0xc0,0xf6,0x00] +sy ca5, (ca3) +# CHECK-ASM-AND-OBJ: sy ca5, 0(ca3) +# CHECK-ASM-SAME: encoding: [0x23,0xc0,0xf6,0x00] +sc ca5, (ca3) +# CHECK-ASM-AND-OBJ: sy ca5, 0(ca3) +# CHECK-ASM-SAME: encoding: [0x23,0xc0,0xf6,0x00] +csc ca5, (ca3) diff --git a/llvm/test/MC/RISCV/cheri/rvy/rv32y-valid.s b/llvm/test/MC/RISCV/cheri/rvy/rv32y-valid.s new file mode 100644 index 0000000000000..27fb21a97dd01 --- /dev/null +++ b/llvm/test/MC/RISCV/cheri/rvy/rv32y-valid.s @@ -0,0 +1,258 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+y -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+y < %s \ +# RUN: | llvm-objdump --mattr=+y -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s +# +# RUN: llvm-mc %s -triple=riscv64 -mattr=+y -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+y < %s \ +# RUN: | llvm-objdump --mattr=+y -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: ytagr a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x05,0x10] +ytagr a0, ca0 +# CHECK-ASM-AND-OBJ: ytagr a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x05,0x10] +gctag a0, ca0 +# CHECK-ASM-AND-OBJ: ytagr a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x05,0x10] +cgettag a0, ca0 + +# CHECK-ASM-AND-OBJ: ypermr a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x15,0x10] +ypermr a0, ca0 +# CHECK-ASM-AND-OBJ: ypermr a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x15,0x10] +gcperm a0, ca0 +# CHECK-ASM-AND-OBJ: ypermr a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x15,0x10] +cgetperm a0, ca0 + +# CHECK-ASM-AND-OBJ: yhir a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x45,0x10] +yhir a0, ca0 +# CHECK-ASM-AND-OBJ: yhir a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x45,0x10] +gchi a0, ca0 +# CHECK-ASM-AND-OBJ: yhir a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x45,0x10] +cgethigh a0, ca0 + +# CHECK-ASM-AND-OBJ: ybaser a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x55,0x10] +ybaser a0, ca0 +# CHECK-ASM-AND-OBJ: ybaser a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x55,0x10] +gcbase a0, ca0 +# CHECK-ASM-AND-OBJ: ybaser a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x55,0x10] +cgetbase a0, ca0 + +# CHECK-ASM-AND-OBJ: ylenr a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x65,0x10] +ylenr a0, ca0 +# CHECK-ASM-AND-OBJ: ylenr a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x65,0x10] +gclen a0, ca0 +# CHECK-ASM-AND-OBJ: ylenr a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x65,0x10] +cgetlen a0, ca0 + +# CHECK-ASM-AND-OBJ: ytyper a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x25,0x10] +ytyper a0, ca0 +# CHECK-ASM-AND-OBJ: ytyper a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x25,0x10] +gctype a0, ca0 +# CHECK-ASM-AND-OBJ: ytyper a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x25,0x10] +cgettype a0, ca0 + +# CHECK-ASM-AND-OBJ: yaddrw ca0, ca0, a1 +# CHECK-ASM: encoding: [0x33,0x15,0xb5,0x0c] +yaddrw ca0, ca0, a1 +# CHECK-ASM-AND-OBJ: yaddrw ca0, ca0, a1 +# CHECK-ASM: encoding: [0x33,0x15,0xb5,0x0c] +scaddr ca0, ca0, a1 +# CHECK-ASM-AND-OBJ: yaddrw ca0, ca0, a1 +# CHECK-ASM: encoding: [0x33,0x15,0xb5,0x0c] +csetaddr ca0, ca0, a1 + +# CHECK-ASM-AND-OBJ: ypermc ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x25,0xa5,0x0c] +ypermc ca0, ca0, a0 +# CHECK-ASM-AND-OBJ: ypermc ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x25,0xa5,0x0c] +acperm ca0, ca0, a0 +# CHECK-ASM-AND-OBJ: ypermc ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x25,0xa5,0x0c] +candperm ca0, ca0, a0 + +# CHECK-ASM-AND-OBJ: yhiw ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x35,0xa5,0x0c] +yhiw ca0, ca0, a0 +# CHECK-ASM-AND-OBJ: yhiw ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x35,0xa5,0x0c] +schi ca0, ca0, a0 +# CHECK-ASM-AND-OBJ: yhiw ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x35,0xa5,0x0c] +csethigh ca0, ca0, a0 + +# CHECK-ASM-AND-OBJ: addy ca0, ca0, a1 +# CHECK-ASM: encoding: [0x33,0x05,0xb5,0x0c] +addy ca0, ca0, a1 +# CHECK-ASM-AND-OBJ: addy ca0, ca0, a1 +# CHECK-ASM: encoding: [0x33,0x05,0xb5,0x0c] +cadd ca0, ca0, a1 +# CHECK-ASM-AND-OBJ: addy ca0, ca0, a1 +# CHECK-ASM: encoding: [0x33,0x05,0xb5,0x0c] +cincoffset ca0, ca0, a1 + +# CHECK-ASM-AND-OBJ: addiy ca0, ca0, 12 +# CHECK-ASM: encoding: [0x1b,0x25,0xc5,0x00] +addiy ca0, ca0, 12 +# CHECK-ASM-AND-OBJ: addiy ca0, ca0, 12 +# CHECK-ASM: encoding: [0x1b,0x25,0xc5,0x00] +add ca0, ca0, 12 +# CHECK-ASM-AND-OBJ: addiy ca0, ca0, 12 +# CHECK-ASM: encoding: [0x1b,0x25,0xc5,0x00] +cadd ca0, ca0, 12 +# CHECK-ASM-AND-OBJ: addiy ca0, ca0, 12 +# CHECK-ASM: encoding: [0x1b,0x25,0xc5,0x00] +caddi ca0, ca0, 12 +# CHECK-ASM-AND-OBJ: addiy ca0, ca0, 12 +# CHECK-ASM: encoding: [0x1b,0x25,0xc5,0x00] +cincoffset ca0, ca0, 12 +# CHECK-ASM-AND-OBJ: addiy ca0, ca0, 12 +# CHECK-ASM: encoding: [0x1b,0x25,0xc5,0x00] +cincoffsetimm ca0, ca0, 12 + +# CHECK-ASM-AND-OBJ: ysentry ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x85,0x10] +ysentry ca0, ca0 +# CHECK-ASM-AND-OBJ: ysentry ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x85,0x10] +sentry ca0, ca0 +# CHECK-ASM-AND-OBJ: ysentry ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x85,0x10] +csealentry ca0, ca0 + +# CHECK-ASM-AND-OBJ: ybld ca0, ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x55,0xa5,0x0c] +ybld ca0, ca0, ca0 +# CHECK-ASM-AND-OBJ: ybld ca0, ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x55,0xa5,0x0c] +cbld ca0, ca0, ca0 +# CHECK-ASM-AND-OBJ: ybld ca0, ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x55,0xa5,0x0c] +cbuildcap ca0, ca0, ca0 + +# CHECK-ASM-AND-OBJ: ybndsw ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x05,0xa5,0x0e] +ybndsw ca0, ca0, a0 +# CHECK-ASM-AND-OBJ: ybndsw ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x05,0xa5,0x0e] +scbnds ca0, ca0, a0 +# CHECK-ASM-AND-OBJ: ybndsw ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x05,0xa5,0x0e] +csetboundsexact ca0, ca0, a0 + +# CHECK-ASM-AND-OBJ: ybndsrw ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x15,0xa5,0x0e] +ybndsrw ca0, ca0, a0 +# CHECK-ASM-AND-OBJ: ybndsrw ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x15,0xa5,0x0e] +scbndsr ca0, ca0, a0 +# CHECK-ASM-AND-OBJ: ybndsrw ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x15,0xa5,0x0e] +csetbounds ca0, ca0, a0 + +# CHECK-ASM-AND-OBJ: ybndsiw ca0, ca0, 12 +# CHECK-ASM: encoding: [0x13,0x55,0xc5,0x04] +ybndsiw ca0, ca0, 12 +# CHECK-ASM-AND-OBJ: ybndsiw ca0, ca0, 12 +# CHECK-ASM: encoding: [0x13,0x55,0xc5,0x04] +ybndsw ca0, ca0, 12 +# CHECK-ASM-AND-OBJ: ybndsiw ca0, ca0, 12 +# CHECK-ASM: encoding: [0x13,0x55,0xc5,0x04] +scbndsi ca0, ca0, 12 +# CHECK-ASM-AND-OBJ: ybndsiw ca0, ca0, 12 +# CHECK-ASM: encoding: [0x13,0x55,0xc5,0x04] +scbnds ca0, ca0, 12 +# CHECK-ASM-AND-OBJ: ybndsiw ca0, ca0, 12 +# CHECK-ASM: encoding: [0x13,0x55,0xc5,0x04] +csetbounds ca0, ca0, 12 +# CHECK-ASM-AND-OBJ: ybndsiw ca0, ca0, 12 +# CHECK-ASM: encoding: [0x13,0x55,0xc5,0x04] +csetboundsimm ca0, ca0, 12 + +# CHECK-ASM-AND-OBJ: ymv ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x05,0x0c] +ymv ca0, ca0 +# CHECK-ASM-AND-OBJ: ymv ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x05,0x0c] +mv ca0, ca0 +# CHECK-ASM-AND-OBJ: ymv ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x05,0x0c] +cmv ca0, ca0 +# CHECK-ASM-AND-OBJ: ymv ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x05,0x0c] +cmove ca0, ca0 + +# CHECK-ASM-AND-OBJ: yamask a0, a0 +# CHECK-ASM: encoding: [0x33,0x05,0x75,0x10] +yamask a0, a0 +# CHECK-ASM-AND-OBJ: yamask a0, a0 +# CHECK-ASM: encoding: [0x33,0x05,0x75,0x10] +cram a0, a0 +# CHECK-ASM-AND-OBJ: yamask a0, a0 +# CHECK-ASM: encoding: [0x33,0x05,0x75,0x10] +crepresentablealignmentmask a0, a0 + +# CHECK-ASM-AND-OBJ: ylt a0, ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x65,0xa5,0x0c] +ylt a0, ca0, ca0 +# CHECK-ASM-AND-OBJ: ylt a0, ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x65,0xa5,0x0c] +scss a0, ca0, ca0 +# CHECK-ASM-AND-OBJ: ylt a0, ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x65,0xa5,0x0c] +ctestsubset a0, ca0, ca0 + +# CHECK-ASM-AND-OBJ: syeq a0, ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x45,0xa5,0x0c] +syeq a0, ca0, ca0 +# CHECK-ASM-AND-OBJ: syeq a0, ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x45,0xa5,0x0c] +sceq a0, ca0, ca0 +# CHECK-ASM-AND-OBJ: syeq a0, ca0, ca0 +# CHECK-ASM: encoding: [0x33,0x45,0xa5,0x0c] +csetequalexact a0, ca0, ca0 + +# CHECK-ASM-AND-OBJ: ly ca0, 0(a0) +# CHECK-ASM: encoding: [0x0f,0x45,0x05,0x00] +ly ca0, 0(a0) +# CHECK-ASM-AND-OBJ: ly ca0, 0(a0) +# CHECK-ASM: encoding: [0x0f,0x45,0x05,0x00] +lc ca0, 0(a0) +# CHECK-ASM-AND-OBJ: ly ca0, 0(a0) +# CHECK-ASM: encoding: [0x0f,0x45,0x05,0x00] +ly ca0, (a0) +# CHECK-ASM-AND-OBJ: ly ca0, 0(a0) +# CHECK-ASM: encoding: [0x0f,0x45,0x05,0x00] +lc ca0, (a0) + +# CHECK-ASM-AND-OBJ: sy ca0, 0(a0) +# CHECK-ASM: encoding: [0x23,0x40,0xa5,0x00] +sy ca0, 0(a0) +# CHECK-ASM-AND-OBJ: sy ca0, 0(a0) +# CHECK-ASM: encoding: [0x23,0x40,0xa5,0x00] +sc ca0, 0(a0) +# CHECK-ASM-AND-OBJ: sy ca0, 0(a0) +# CHECK-ASM: encoding: [0x23,0x40,0xa5,0x00] +sy ca0, (a0) +# CHECK-ASM-AND-OBJ: sy ca0, 0(a0) +# CHECK-ASM: encoding: [0x23,0x40,0xa5,0x00] +sc ca0, (a0) diff --git a/llvm/test/MC/RISCV/cheri/rvy/rv32zyhybrid-valid.s b/llvm/test/MC/RISCV/cheri/rvy/rv32zyhybrid-valid.s new file mode 100644 index 0000000000000..5b35f576221f1 --- /dev/null +++ b/llvm/test/MC/RISCV/cheri/rvy/rv32zyhybrid-valid.s @@ -0,0 +1,49 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+y,+zyhybrid,+cap-mode -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+y,+zyhybrid,+cap-mode < %s \ +# RUN: | llvm-objdump --mattr=+y,+zyhybrid,+cap-mode -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv32 -mattr=+y,+zyhybrid -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+y,+zyhybrid < %s \ +# RUN: | llvm-objdump --mattr=+y,+zyhybrid -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s +# +# RUN: llvm-mc %s -triple=riscv64 -mattr=+y,+zyhybrid,+cap-mode -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+y,+zyhybrid,+cap-mode < %s \ +# RUN: | llvm-objdump --mattr=+y,+zyhybrid,+cap-mode -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+y,+zyhybrid -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+y,+zyhybrid < %s \ +# RUN: | llvm-objdump --mattr=+y,+zyhybrid -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: ymoder a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x35,0x10] +ymoder a0, ca0 +# CHECK-ASM-AND-OBJ: ymoder a0, ca0 +# CHECK-ASM: encoding: [0x33,0x05,0x35,0x10] +gcmode a0, ca0 + +# CHECK-ASM-AND-OBJ: ymodew ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x75,0xa5,0x0c] +ymodew ca0, ca0, a0 +# CHECK-ASM-AND-OBJ: ymodew ca0, ca0, a0 +# CHECK-ASM: encoding: [0x33,0x75,0xa5,0x0c] +scmode ca0, ca0, a0 + +# CHECK-ASM-AND-OBJ: ymodeswy +# CHECK-ASM: encoding: [0x33,0x10,0x00,0x12] +ymodeswy +# CHECK-ASM-AND-OBJ: ymodeswy +# CHECK-ASM: encoding: [0x33,0x10,0x00,0x12] +modesw.cap + +# CHECK-ASM-AND-OBJ: ymodeswi +# CHECK-ASM: encoding: [0x33,0x10,0x00,0x14] +ymodeswi +# CHECK-ASM-AND-OBJ: ymodeswi +# CHECK-ASM: encoding: [0x33,0x10,0x00,0x14] +modesw.int diff --git a/llvm/test/MC/RISCV/cheri/rvy/rv64ya-cap-mode-valid.s b/llvm/test/MC/RISCV/cheri/rvy/rv64ya-cap-mode-valid.s new file mode 100644 index 0000000000000..f512b1a60d872 --- /dev/null +++ b/llvm/test/MC/RISCV/cheri/rvy/rv64ya-cap-mode-valid.s @@ -0,0 +1,191 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+y,+zyhybrid -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+y,+zyhybrid < %s \ +# RUN: | llvm-objdump --mattr=+a,+y,+zyhybrid,+cap-mode -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s + +# Tests instructions available in purecap + A, rv64. + +.option capmode + +# CHECK-ASM-AND-OBJ: amoswap.d a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x08] +amoswap.d a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoswap.d.aq a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x0c] +amoswap.d.aq a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoswap.d.rl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x0a] +amoswap.d.rl a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoswap.d.aqrl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x0e] +amoswap.d.aqrl a1, a2, 0(ca3) + +# CHECK-ASM-AND-OBJ: amoadd.d a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x00] +amoadd.d a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoadd.d.aq a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x04] +amoadd.d.aq a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoadd.d.rl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x02] +amoadd.d.rl a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoadd.d.aqrl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x06] +amoadd.d.aqrl a1, a2, 0(ca3) + +# CHECK-ASM-AND-OBJ: amoxor.d a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x20] +amoxor.d a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoxor.d.aq a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x24] +amoxor.d.aq a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoxor.d.rl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x22] +amoxor.d.rl a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoxor.d.aqrl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x26] +amoxor.d.aqrl a1, a2, 0(ca3) + +# CHECK-ASM-AND-OBJ: amoadd.d a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x00] +amoadd.d a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoadd.d.aq a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x04] +amoadd.d.aq a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoadd.d.rl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x02] +amoadd.d.rl a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoadd.d.aqrl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x06] +amoadd.d.aqrl a1, a2, 0(ca3) + +# CHECK-ASM-AND-OBJ: amoor.d a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x40] +amoor.d a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoor.d.aq a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x44] +amoor.d.aq a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoor.d.rl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x42] +amoor.d.rl a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoor.d.aqrl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x46] +amoor.d.aqrl a1, a2, 0(ca3) + +# CHECK-ASM-AND-OBJ: amomin.d a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x80] +amomin.d a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amomin.d.aq a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x84] +amomin.d.aq a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amomin.d.rl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x82] +amomin.d.rl a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amomin.d.aqrl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0x86] +amomin.d.aqrl a1, a2, 0(ca3) + +# CHECK-ASM-AND-OBJ: amomax.d a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xa0] +amomax.d a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amomax.d.aq a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xa4] +amomax.d.aq a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amomax.d.rl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xa2] +amomax.d.rl a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amomax.d.aqrl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xa6] +amomax.d.aqrl a1, a2, 0(ca3) + +# CHECK-ASM-AND-OBJ: amominu.d a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xc0] +amominu.d a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amominu.d.aq a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xc4] +amominu.d.aq a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amominu.d.rl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xc2] +amominu.d.rl a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amominu.d.aqrl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xc6] +amominu.d.aqrl a1, a2, 0(ca3) + +# CHECK-ASM-AND-OBJ: amomaxu.d a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xe0] +amomaxu.d a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amomaxu.d.aq a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xe4] +amomaxu.d.aq a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amomaxu.d.rl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xe2] +amomaxu.d.rl a1, a2, 0(ca3) +# CHECK-ASM-AND-OBJ: amomaxu.d.aqrl a1, a2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xb5,0xc6,0xe6] +amomaxu.d.aqrl a1, a2, 0(ca3) + +# CHECK-ASM-AND-OBJ: lr.d a1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x35,0x05,0x10] +lr.d a1, 0(ca0) +# CHECK-ASM-AND-OBJ: lr.d.aq a1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x35,0x05,0x14] +lr.d.aq a1, 0(ca0) +# CHECK-ASM-AND-OBJ: lr.d.rl a1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x35,0x05,0x12] +lr.d.rl a1, 0(ca0) +# CHECK-ASM-AND-OBJ: lr.d.aqrl a1, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x35,0x05,0x16] +lr.d.aqrl a1, 0(ca0) + +# CHECK-ASM-AND-OBJ: sc.d a1, a2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x35,0xc5,0x18] +sc.d a1, a2, 0(ca0) +# CHECK-ASM-AND-OBJ: sc.d.aq a1, a2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x35,0xc5,0x1c] +sc.d.aq a1, a2, 0(ca0) +# CHECK-ASM-AND-OBJ: sc.d.rl a1, a2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x35,0xc5,0x1a] +sc.d.rl a1, a2, 0(ca0) +# CHECK-ASM-AND-OBJ: sc.d.aqrl a1, a2, (ca0) +# CHECK-ASM: # encoding: [0xaf,0x35,0xc5,0x1e] +sc.d.aqrl a1, a2, 0(ca0) + +# CHECK-ASM-AND-OBJ: lr.y ca1, (ca2) +# CHECK-ASM: # encoding: [0xaf,0x45,0x06,0x10] +lr.y ca1, 0(ca2) +# CHECK-ASM-AND-OBJ: lr.y.aq ca1, (ca2) +# CHECK-ASM: # encoding: [0xaf,0x45,0x06,0x14] +lr.y.aq ca1, 0(ca2) +# CHECK-ASM-AND-OBJ: lr.y.rl ca1, (ca2) +# CHECK-ASM: # encoding: [0xaf,0x45,0x06,0x12] +lr.y.rl ca1, 0(ca2) +# CHECK-ASM-AND-OBJ: lr.y.aqrl ca1, (ca2) +# CHECK-ASM: # encoding: [0xaf,0x45,0x06,0x16] +lr.y.aqrl ca1, 0(ca2) + +# CHECK-ASM-AND-OBJ: amoswap.y ca1, ca2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xc5,0xc6,0x08] +amoswap.y ca1, ca2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoswap.y.aq ca1, ca2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xc5,0xc6,0x0c] +amoswap.y.aq ca1, ca2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoswap.y.rl ca1, ca2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xc5,0xc6,0x0a] +amoswap.y.rl ca1, ca2, 0(ca3) +# CHECK-ASM-AND-OBJ: amoswap.y.aqrl ca1, ca2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xc5,0xc6,0x0e] +amoswap.y.aqrl ca1, ca2, 0(ca3) + +# CHECK-ASM-AND-OBJ: sc.y a1, ca2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xc5,0xc6,0x18] +sc.y a1, ca2, 0(ca3) +# CHECK-ASM-AND-OBJ: sc.y.aq a1, ca2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xc5,0xc6,0x1c] +sc.y.aq a1, ca2, 0(ca3) +# CHECK-ASM-AND-OBJ: sc.y.rl a1, ca2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xc5,0xc6,0x1a] +sc.y.rl a1, ca2, 0(ca3) +# CHECK-ASM-AND-OBJ: sc.y.aqrl a1, ca2, (ca3) +# CHECK-ASM: # encoding: [0xaf,0xc5,0xc6,0x1e] +sc.y.aqrl a1, ca2, 0(ca3) diff --git a/llvm/test/MC/RISCV/cheri/rvy/zyhybrid-invalid.s b/llvm/test/MC/RISCV/cheri/rvy/zyhybrid-invalid.s new file mode 100644 index 0000000000000..62e44c525f432 --- /dev/null +++ b/llvm/test/MC/RISCV/cheri/rvy/zyhybrid-invalid.s @@ -0,0 +1,29 @@ +# RUN: not llvm-mc -triple riscv32 -mattr=+y,+zyhybrid <%s 2>&1 \ +# RUN: | FileCheck %s -check-prefixes=CHECK +# RUN: not llvm-mc -triple riscv64 -mattr=+y,+zyhybrid <%s 2>&1 \ +# RUN: | FileCheck %s -check-prefixes=CHECK + +cgetsealed a0, ca0 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +cgetflags a0, ca0 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +cunseal ca0, ca0, ca0 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +csetoffset ca0, ca0, a0 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +ccleartag ca0, ca0 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +ccopytype ca0, ca0, ca0 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +ccseal ca0, ca0, ca0 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +cseal ca0, ca0, ca0 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +cinvoke ca0, ca0 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +cclear 1, 0x42 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +fpclear 1, 0x42 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +crrl a0, a0 # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +cloadtags a0, (ca0) # CHECK: :[[#@LINE]]:1: error: instruction requires the following: CHERI Extension +jalr ca0, 42(ca0) # CHECK: :[[#@LINE]]:1: error: instruction requires the following: Capability Mode + +ybndsiw ca0, ca0, 33 # CHECK: :[[#@LINE]]:23: error: immediate must be an integer in range [0, 31] or be a multiple of 16 in the range [0, 496] +ybndsiw ca0, ca0, 104 # CHECK: :[[#@LINE]]:23: error: immediate must be an integer in range [0, 31] or be a multiple of 16 in the range [0, 496] +ybndsiw ca0, ca0, 512 # CHECK: :[[#@LINE]]:23: error: immediate must be an integer in range [0, 31] or be a multiple of 16 in the range [0, 496] +ybndsiw ca0, ca0, sz # CHECK: :[[#@LINE]]:23: error: immediate must be an integer in range [0, 31] or be a multiple of 16 in the range [0, 496] +cmv a0 , a0 # CHECK: :[[#@LINE]]:13: error: invalid operand for instruction +cbld ca0, c0, ca0 # CHECK: :[[#@LINE]]:18: error: invalid operand for instruction +scss a0 , c0, ca0 # CHECK: :[[#@LINE]]:18: error: invalid operand for instruction +gctype ca0, ca0 # CHECK: :[[#@LINE]]:13: error: invalid operand for instruction +gctype a0, a0 # CHECK: :[[#@LINE]]:17: error: invalid operand for instruction From f57e0e95df71cee1dd1b1d216c186936c3637736 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Wed, 13 Aug 2025 14:07:18 +0100 Subject: [PATCH 12/13] [CHERI] Generate CodeGen tests for RVY32/64 --- .../Inputs/global-capinit-hybrid.ll | 4 + .../CHERI-Generic/Inputs/hoist-alloca.ll | 50 +- .../RISCV32Y/atomic-rmw-cap-ptr-arg.ll | 1043 +++++++++++++ .../RISCV32Y/atomic-rmw-cap-ptr.ll | 755 ++++++++++ .../RISCV32Y/bounded-allocas-lifetimes.ll | 71 + .../CHERI-Generic/RISCV32Y/cap-from-ptr.ll | 204 +++ .../CHERI-Generic/RISCV32Y/cheri-csub.ll | 20 + ...insics-folding-broken-module-regression.ll | 66 + .../RISCV32Y/cheri-memfn-call.ll | 94 ++ .../RISCV32Y/cheri-pointer-comparison.ll | 1324 +++++++++++++++++ .../CHERI-Generic/RISCV32Y/cmpxchg-cap-ptr.ll | 670 +++++++++ .../dagcombine-ptradd-deleted-regression.ll | 46 + .../RISCV32Y/frameindex-arith.ll | 31 + .../RISCV32Y/function-alias-size.ll | 54 + .../RISCV32Y/global-capinit-hybrid.ll | 163 ++ .../gvn-capability-store-to-load-fwd.ll | 111 ++ .../CHERI-Generic/RISCV32Y/hoist-alloca.ll | 188 +++ .../RISCV32Y/intrinsics-purecap-only.ll | 18 + .../CHERI-Generic/RISCV32Y/intrinsics.ll | 563 +++++++ .../RISCV32Y/landingpad-non-preemptible.ll | 167 +++ .../RISCV32Y/machinelicm-hoist-csetbounds.ll | 113 ++ .../RISCV32Y/memcpy-from-constant.ll | 165 ++ .../RISCV32Y/memcpy-no-preserve-tags-attr.ll | 125 ++ .../memcpy-preserve-tags-assume-aligned.ll | 53 + .../memcpy-preserve-tags-size-not-multiple.ll | 61 + .../CHERI-Generic/RISCV32Y/memcpy-zeroinit.ll | 50 + .../optsize-preserve-tags-memcpy-crash.ll | 124 ++ .../RISCV32Y/ptradd-immediate.ll | 165 ++ .../CHERI-Generic/RISCV32Y/ptrtoint.ll | 109 ++ .../RISCV32Y/purecap-jumptable.ll | 184 +++ .../RISCV32Y/setoffset-multiple-uses.ll | 96 ++ .../RISCV32Y/stack-bounds-dynamic-alloca.ll | 311 ++++ .../stack-bounds-opaque-spill-too-early.ll | 72 + .../RISCV32Y/stack-bounds-pass-phi.ll | 169 +++ .../RISCV32Y/stack-spill-unnecessary.c.ll | 150 ++ .../RISCV32Y/stackframe-intrinsics.ll | 69 + .../RISCV32Y/strcpy-to-memcpy-no-tags.ll | 166 +++ .../subobject-bounds-redundant-setbounds.c.ll | 317 ++++ .../CHERI-Generic/RISCV32Y/trunc-load.ll | 126 ++ .../RISCV32Y/unaligned-loads-stores-hybrid.ll | 368 +++++ .../unaligned-loads-stores-purecap.ll | 152 ++ .../RISCV64Y/atomic-rmw-cap-ptr-arg.ll | 1043 +++++++++++++ .../RISCV64Y/atomic-rmw-cap-ptr.ll | 755 ++++++++++ .../RISCV64Y/bounded-allocas-lifetimes.ll | 72 + .../CHERI-Generic/RISCV64Y/cap-from-ptr.ll | 204 +++ .../CHERI-Generic/RISCV64Y/cheri-csub.ll | 20 + ...insics-folding-broken-module-regression.ll | 66 + .../RISCV64Y/cheri-memfn-call.ll | 88 ++ .../RISCV64Y/cheri-pointer-comparison.ll | 1324 +++++++++++++++++ .../CHERI-Generic/RISCV64Y/cmpxchg-cap-ptr.ll | 656 ++++++++ .../dagcombine-ptradd-deleted-regression.ll | 46 + .../RISCV64Y/frameindex-arith.ll | 31 + .../RISCV64Y/function-alias-size.ll | 54 + .../RISCV64Y/global-capinit-hybrid.ll | 163 ++ .../gvn-capability-store-to-load-fwd.ll | 113 ++ .../CHERI-Generic/RISCV64Y/hoist-alloca.ll | 176 +++ .../RISCV64Y/intrinsics-purecap-only.ll | 18 + .../CHERI-Generic/RISCV64Y/intrinsics.ll | 563 +++++++ .../RISCV64Y/landingpad-non-preemptible.ll | 167 +++ .../RISCV64Y/machinelicm-hoist-csetbounds.ll | 113 ++ .../RISCV64Y/memcpy-from-constant.ll | 165 ++ .../RISCV64Y/memcpy-no-preserve-tags-attr.ll | 113 ++ .../memcpy-preserve-tags-assume-aligned.ll | 45 + .../memcpy-preserve-tags-size-not-multiple.ll | 53 + .../CHERI-Generic/RISCV64Y/memcpy-zeroinit.ll | 41 + .../optsize-preserve-tags-memcpy-crash.ll | 114 ++ .../RISCV64Y/ptradd-immediate.ll | 165 ++ .../CHERI-Generic/RISCV64Y/ptrtoint.ll | 109 ++ .../RISCV64Y/purecap-jumptable.ll | 187 +++ .../RISCV64Y/setoffset-multiple-uses.ll | 96 ++ .../RISCV64Y/stack-bounds-dynamic-alloca.ll | 305 ++++ .../stack-bounds-opaque-spill-too-early.ll | 72 + .../RISCV64Y/stack-bounds-pass-phi.ll | 169 +++ .../RISCV64Y/stack-spill-unnecessary.c.ll | 150 ++ .../RISCV64Y/stackframe-intrinsics.ll | 69 + .../RISCV64Y/strcpy-to-memcpy-no-tags.ll | 170 +++ .../subobject-bounds-redundant-setbounds.c.ll | 317 ++++ .../CHERI-Generic/RISCV64Y/trunc-load.ll | 126 ++ .../RISCV64Y/unaligned-loads-stores-hybrid.ll | 371 +++++ .../unaligned-loads-stores-purecap.ll | 159 ++ .../CodeGen/CHERI-Generic/regenerate-all.py | 19 +- 81 files changed, 17467 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/atomic-rmw-cap-ptr-arg.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/atomic-rmw-cap-ptr.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/bounded-allocas-lifetimes.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cap-from-ptr.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-csub.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-intrinsics-folding-broken-module-regression.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-memfn-call.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-pointer-comparison.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cmpxchg-cap-ptr.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/dagcombine-ptradd-deleted-regression.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/frameindex-arith.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/function-alias-size.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/global-capinit-hybrid.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/gvn-capability-store-to-load-fwd.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/hoist-alloca.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/intrinsics-purecap-only.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/intrinsics.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/landingpad-non-preemptible.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/machinelicm-hoist-csetbounds.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-from-constant.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-no-preserve-tags-attr.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-preserve-tags-assume-aligned.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-preserve-tags-size-not-multiple.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-zeroinit.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/optsize-preserve-tags-memcpy-crash.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/ptradd-immediate.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/ptrtoint.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/purecap-jumptable.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/setoffset-multiple-uses.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-dynamic-alloca.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-opaque-spill-too-early.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-pass-phi.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-spill-unnecessary.c.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stackframe-intrinsics.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/strcpy-to-memcpy-no-tags.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/subobject-bounds-redundant-setbounds.c.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/trunc-load.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/unaligned-loads-stores-hybrid.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV32Y/unaligned-loads-stores-purecap.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/atomic-rmw-cap-ptr-arg.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/atomic-rmw-cap-ptr.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/bounded-allocas-lifetimes.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cap-from-ptr.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-csub.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-intrinsics-folding-broken-module-regression.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-memfn-call.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-pointer-comparison.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cmpxchg-cap-ptr.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/dagcombine-ptradd-deleted-regression.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/frameindex-arith.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/function-alias-size.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/global-capinit-hybrid.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/gvn-capability-store-to-load-fwd.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/hoist-alloca.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/intrinsics-purecap-only.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/intrinsics.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/landingpad-non-preemptible.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/machinelicm-hoist-csetbounds.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-from-constant.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-no-preserve-tags-attr.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-preserve-tags-assume-aligned.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-preserve-tags-size-not-multiple.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-zeroinit.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/optsize-preserve-tags-memcpy-crash.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/ptradd-immediate.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/ptrtoint.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/purecap-jumptable.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/setoffset-multiple-uses.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-dynamic-alloca.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-opaque-spill-too-early.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-pass-phi.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-spill-unnecessary.c.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stackframe-intrinsics.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/strcpy-to-memcpy-no-tags.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/subobject-bounds-redundant-setbounds.c.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/trunc-load.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/unaligned-loads-stores-hybrid.ll create mode 100644 llvm/test/CodeGen/CHERI-Generic/RISCV64Y/unaligned-loads-stores-purecap.ll diff --git a/llvm/test/CodeGen/CHERI-Generic/Inputs/global-capinit-hybrid.ll b/llvm/test/CodeGen/CHERI-Generic/Inputs/global-capinit-hybrid.ll index 3c2c2fbec5206..f7c52bf67dd86 100644 --- a/llvm/test/CodeGen/CHERI-Generic/Inputs/global-capinit-hybrid.ll +++ b/llvm/test/CodeGen/CHERI-Generic/Inputs/global-capinit-hybrid.ll @@ -3,10 +3,14 @@ @IF-MIPS@; RUN: FileCheck %s --check-prefix=ASM -DPTR_DIRECTIVE=.8byte @IF-RISCV64@; RUN: FileCheck %s --check-prefix=ASM -DPTR_DIRECTIVE=.quad @IF-RISCV32@; RUN: FileCheck %s --check-prefix=ASM -DPTR_DIRECTIVE=.word +@IF-RISCV64Y@; RUN: FileCheck %s --check-prefix=ASM -DPTR_DIRECTIVE=.quad +@IF-RISCV32Y@; RUN: FileCheck %s --check-prefix=ASM -DPTR_DIRECTIVE=.word ; RUN: llc @HYBRID_HARDFLOAT_ARGS@ %s -filetype=obj -o - | llvm-objdump -r -t - | \ @IF-MIPS@; RUN: FileCheck %s --check-prefix=RELOCS '-DINTEGER_RELOC=R_MIPS_64/R_MIPS_NONE/R_MIPS_NONE' '-DCAPABILITY_RELOC=R_MIPS_CHERI_CAPABILITY/R_MIPS_NONE/R_MIPS_NONE' @IF-RISCV64@; RUN: FileCheck %s --check-prefix=RELOCS -DINTEGER_RELOC=R_RISCV_64 '-DCAPABILITY_RELOC=R_RISCV_CHERI_CAPABILITY' @IF-RISCV32@; RUN: FileCheck %s --check-prefix=RELOCS -DINTEGER_RELOC=R_RISCV_32 '-DCAPABILITY_RELOC=R_RISCV_CHERI_CAPABILITY' +@IF-RISCV64Y@; RUN: FileCheck %s --check-prefix=RELOCS -DINTEGER_RELOC=R_RISCV_64 '-DCAPABILITY_RELOC=R_RISCV_CHERI_CAPABILITY' +@IF-RISCV32Y@; RUN: FileCheck %s --check-prefix=RELOCS -DINTEGER_RELOC=R_RISCV_32 '-DCAPABILITY_RELOC=R_RISCV_CHERI_CAPABILITY' target datalayout = "@HYBRID_DATALAYOUT@" declare void @extern_fn() diff --git a/llvm/test/CodeGen/CHERI-Generic/Inputs/hoist-alloca.ll b/llvm/test/CodeGen/CHERI-Generic/Inputs/hoist-alloca.ll index 3c1e23a775561..db11e1b0ee76c 100644 --- a/llvm/test/CodeGen/CHERI-Generic/Inputs/hoist-alloca.ll +++ b/llvm/test/CodeGen/CHERI-Generic/Inputs/hoist-alloca.ll @@ -32,28 +32,66 @@ ; RUN: FileCheck --input-file=%t.dbg --check-prefix=MACHINELICM-DBG %s ; Check that MachineLICM hoists the CheriBoundedStackPseudoImm (MIPS) / IncOffset+SetBoundsImm (RISCV) instructions ; MACHINELICM-DBG-LABEL: ******** Pre-regalloc Machine LICM: hoist_alloca_uncond +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 512 +@IF-RISCV32Y@; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 492 +@IF-RISCV64Y@; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 @IF-MIPS@; MACHINELICM-DBG: Hoisting %{{[0-9]+}}:cherigpr = CheriBoundedStackPseudoImm %stack.0.buf1, 0, 492 -@IF-RISCV@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.0.buf1, 0 +@IF-RISCV32@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.0.buf1, 0 +@IF-RISCV64@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.0.buf1, 0 +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.0.buf1, 0 +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.0.buf1, 0 ; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 @IF-RISCV32@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = CSetBoundsImm [[INC]]:gpcr, 512 @IF-RISCV64@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = CSetBoundsImm [[INC]]:gpcr, 492 +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr @IF-RISCV@; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 88 +@IF-RISCV32Y@; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 88 +@IF-RISCV64Y@; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 @IF-MIPS@; MACHINELICM-DBG: Hoisting %{{[0-9]+}}:cherigpr = CheriBoundedStackPseudoImm %stack.1.buf2, 0, 88 -@IF-RISCV@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.1.buf2, 0 +@IF-RISCV32@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.1.buf2, 0 +@IF-RISCV64@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.1.buf2, 0 +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.1.buf2, 0 +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.1.buf2, 0 ; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 -@IF-RISCV@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = CSetBoundsImm [[INC]]:gpcr, 88 +@IF-RISCV64@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = CSetBoundsImm [[INC]]:gpcr, 88 +@IF-RISCV32@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = CSetBoundsImm [[INC]]:gpcr, 88 +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr @IF-RISCV@; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 ; MACHINELICM-DBG-LABEL: ******** Pre-regalloc Machine LICM: hoist_alloca_cond @IF-MIPS@; MACHINELICM-DBG: Hoisting %{{[0-9]+}}:cherigpr = CheriBoundedStackPseudoImm %stack.0.buf1, 0, 492 -@IF-RISCV@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.0.buf1, 0 +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 512 +@IF-RISCV32Y@; from %bb.3 to %bb.0 +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 492 +@IF-RISCV64Y@; from %bb.3 to %bb.0 +@IF-RISCV32@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.0.buf1, 0 +@IF-RISCV64@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.0.buf1, 0 +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.0.buf1, 0 +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.0.buf1, 0 ; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 @IF-RISCV32@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = CSetBoundsImm [[INC]]:gpcr, 512 @IF-RISCV64@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = CSetBoundsImm [[INC]]:gpcr, 492 +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr @IF-RISCV@; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 88 +@IF-RISCV32Y@; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 88 +@IF-RISCV64Y@; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 @IF-MIPS@; MACHINELICM-DBG: Hoisting %{{[0-9]+}}:cherigpr = CheriBoundedStackPseudoImm %stack.1.buf2, 0, 88 -@IF-RISCV@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.1.buf2, 0 +@IF-RISCV32@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.1.buf2, 0 +@IF-RISCV64@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = CIncOffsetImm %stack.1.buf2, 0 +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.1.buf2, 0 +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.1.buf2, 0 ; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 -@IF-RISCV@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = CSetBoundsImm [[INC]]:gpcr, 88 +@IF-RISCV64@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = CSetBoundsImm [[INC]]:gpcr, 88 +@IF-RISCV32@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = CSetBoundsImm [[INC]]:gpcr, 88 +@IF-RISCV32Y@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +@IF-RISCV64Y@; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr @IF-RISCV@; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 ; RUN: llc @PURECAP_HARDFLOAT_ARGS@ -O1 -o - < %s | FileCheck %s diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/atomic-rmw-cap-ptr-arg.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/atomic-rmw-cap-ptr-arg.ll new file mode 100644 index 0000000000000..66db256f37fcd --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/atomic-rmw-cap-ptr-arg.ll @@ -0,0 +1,1043 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/atomic-rmw-cap-ptr-arg.ll +; Check that we can generate sensible code for atomic operations using capability pointers on capabilities +; See https://github.com/CTSRD-CHERI/llvm-project/issues/470 +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -mattr=+a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -mattr=-a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-LIBCALLS --allow-unused-prefixes +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f -mattr=+a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f -mattr=-a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-LIBCALLS --allow-unused-prefixes + +define i32 addrspace(200)* @atomic_cap_ptr_xchg_sc(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_sc: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y.aqrl ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_sc: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_sc: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_xchg_relaxed(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_relaxed: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_relaxed: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 0 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_relaxed: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 0 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val monotonic + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_xchg_acquire(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_acquire: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y.aq ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_acquire: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_acquire: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 2 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val acquire + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_xchg_rel(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_rel: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y.rl ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_rel: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 3 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_rel: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 3 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val release + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_xchg_acq_rel(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_acq_rel: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y.aqrl ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_acq_rel: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 4 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_acq_rel: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 4 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val acq_rel + ret i32 addrspace(200)* %tmp +} + +; Also check non-i8* xchg: +define i32 addrspace(200)* @atomic_cap_ptr_xchg_i32ptr(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_i32ptr: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y.aqrl ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_i32ptr: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 4 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_i32ptr: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 4 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val acq_rel + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_add(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_add: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: addy ca3, ca2, a1 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB6_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_add: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: .LBB6_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: add a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB6_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_add: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .LBB6_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 8-byte Folded Reload +; HYBRID-NEXT: add a0, a3, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 16(sp) +; HYBRID-NEXT: addi a1, sp, 16 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 16(sp) +; HYBRID-NEXT: beqz a0, .LBB6_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw add i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_sub(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_sub: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: sub a3, a2, a1 +; PURECAP-ATOMICS-NEXT: yaddrw ca3, ca2, a3 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB7_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_sub: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: .LBB7_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: sub a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB7_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_sub: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .LBB7_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 8-byte Folded Reload +; HYBRID-NEXT: sub a0, a3, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 16(sp) +; HYBRID-NEXT: addi a1, sp, 16 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 16(sp) +; HYBRID-NEXT: beqz a0, .LBB7_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw sub i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_and(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_and: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: and a3, a2, a1 +; PURECAP-ATOMICS-NEXT: yaddrw ca3, ca2, a3 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB8_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_and: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: .LBB8_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: and a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB8_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_and: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .LBB8_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 8-byte Folded Reload +; HYBRID-NEXT: and a0, a3, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 16(sp) +; HYBRID-NEXT: addi a1, sp, 16 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 16(sp) +; HYBRID-NEXT: beqz a0, .LBB8_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw and i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_nand(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_nand: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: and a3, a2, a1 +; PURECAP-ATOMICS-NEXT: not a3, a3 +; PURECAP-ATOMICS-NEXT: yaddrw ca3, ca2, a3 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB9_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_nand: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: .LBB9_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: and a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: not a0, a0 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB9_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_nand: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .LBB9_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 8-byte Folded Reload +; HYBRID-NEXT: and a0, a3, a0 +; HYBRID-NEXT: not a0, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 16(sp) +; HYBRID-NEXT: addi a1, sp, 16 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 16(sp) +; HYBRID-NEXT: beqz a0, .LBB9_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw nand i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_or(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_or: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: or a3, a2, a1 +; PURECAP-ATOMICS-NEXT: yaddrw ca3, ca2, a3 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB10_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_or: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: .LBB10_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: or a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB10_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_or: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .LBB10_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 8-byte Folded Reload +; HYBRID-NEXT: or a0, a3, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 16(sp) +; HYBRID-NEXT: addi a1, sp, 16 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 16(sp) +; HYBRID-NEXT: beqz a0, .LBB10_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw or i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_xor(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xor: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: xor a3, a2, a1 +; PURECAP-ATOMICS-NEXT: yaddrw ca3, ca2, a3 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB11_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xor: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: .LBB11_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: xor a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB11_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xor: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .LBB11_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 8-byte Folded Reload +; HYBRID-NEXT: xor a0, a3, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 16(sp) +; HYBRID-NEXT: addi a1, sp, 16 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 16(sp) +; HYBRID-NEXT: beqz a0, .LBB11_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw xor i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_max(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_max: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: bge a3, a1, .LBB12_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB12_1 +; PURECAP-ATOMICS-NEXT: # %bb.4: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_max: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: j .LBB12_2 +; PURECAP-LIBCALLS-NEXT: .LBB12_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB12_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sy ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB12_4 +; PURECAP-LIBCALLS-NEXT: .LBB12_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, ca3 +; PURECAP-LIBCALLS-NEXT: blt s1, a3, .LBB12_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB12_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, cs1 +; PURECAP-LIBCALLS-NEXT: j .LBB12_1 +; PURECAP-LIBCALLS-NEXT: .LBB12_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_max: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: j .LBB12_2 +; HYBRID-NEXT: .LBB12_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB12_2 Depth=1 +; HYBRID-NEXT: sy ca3, 16(sp) +; HYBRID-NEXT: addi a1, sp, 16 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 16(sp) +; HYBRID-NEXT: bnez a0, .LBB12_4 +; HYBRID-NEXT: .LBB12_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ymv ca2, ca3 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: blt a0, a3, .LBB12_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB12_2 Depth=1 +; HYBRID-NEXT: ly ca2, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: j .LBB12_1 +; HYBRID-NEXT: .LBB12_4: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw max i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_min(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_min: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: bge a1, a3, .LBB13_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB13_1 +; PURECAP-ATOMICS-NEXT: # %bb.4: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_min: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: j .LBB13_2 +; PURECAP-LIBCALLS-NEXT: .LBB13_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB13_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sy ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB13_4 +; PURECAP-LIBCALLS-NEXT: .LBB13_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, ca3 +; PURECAP-LIBCALLS-NEXT: bge s1, a3, .LBB13_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB13_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, cs1 +; PURECAP-LIBCALLS-NEXT: j .LBB13_1 +; PURECAP-LIBCALLS-NEXT: .LBB13_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_min: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: j .LBB13_2 +; HYBRID-NEXT: .LBB13_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB13_2 Depth=1 +; HYBRID-NEXT: sy ca3, 16(sp) +; HYBRID-NEXT: addi a1, sp, 16 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 16(sp) +; HYBRID-NEXT: bnez a0, .LBB13_4 +; HYBRID-NEXT: .LBB13_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ymv ca2, ca3 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: bge a0, a3, .LBB13_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB13_2 Depth=1 +; HYBRID-NEXT: ly ca2, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: j .LBB13_1 +; HYBRID-NEXT: .LBB13_4: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw min i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_umax(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_umax: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: bgeu a3, a1, .LBB14_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: .LBB14_3: # in Loop: Header=BB14_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB14_1 +; PURECAP-ATOMICS-NEXT: # %bb.4: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_umax: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: j .LBB14_2 +; PURECAP-LIBCALLS-NEXT: .LBB14_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB14_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sy ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB14_4 +; PURECAP-LIBCALLS-NEXT: .LBB14_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, ca3 +; PURECAP-LIBCALLS-NEXT: bltu s1, a3, .LBB14_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB14_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, cs1 +; PURECAP-LIBCALLS-NEXT: j .LBB14_1 +; PURECAP-LIBCALLS-NEXT: .LBB14_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_umax: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: j .LBB14_2 +; HYBRID-NEXT: .LBB14_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB14_2 Depth=1 +; HYBRID-NEXT: sy ca3, 16(sp) +; HYBRID-NEXT: addi a1, sp, 16 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 16(sp) +; HYBRID-NEXT: bnez a0, .LBB14_4 +; HYBRID-NEXT: .LBB14_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ymv ca2, ca3 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: bltu a0, a3, .LBB14_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB14_2 Depth=1 +; HYBRID-NEXT: ly ca2, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: j .LBB14_1 +; HYBRID-NEXT: .LBB14_4: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw umax i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_umin(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_umin: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: bgeu a1, a3, .LBB15_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: .LBB15_3: # in Loop: Header=BB15_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB15_1 +; PURECAP-ATOMICS-NEXT: # %bb.4: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_umin: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: j .LBB15_2 +; PURECAP-LIBCALLS-NEXT: .LBB15_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB15_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sy ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 8(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB15_4 +; PURECAP-LIBCALLS-NEXT: .LBB15_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, ca3 +; PURECAP-LIBCALLS-NEXT: bgeu s1, a3, .LBB15_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB15_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, cs1 +; PURECAP-LIBCALLS-NEXT: j .LBB15_1 +; PURECAP-LIBCALLS-NEXT: .LBB15_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_umin: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: j .LBB15_2 +; HYBRID-NEXT: .LBB15_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB15_2 Depth=1 +; HYBRID-NEXT: sy ca3, 16(sp) +; HYBRID-NEXT: addi a1, sp, 16 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 16(sp) +; HYBRID-NEXT: bnez a0, .LBB15_4 +; HYBRID-NEXT: .LBB15_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ymv ca2, ca3 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: bgeu a0, a3, .LBB15_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB15_2 Depth=1 +; HYBRID-NEXT: ly ca2, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: j .LBB15_1 +; HYBRID-NEXT: .LBB15_4: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw umin i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/atomic-rmw-cap-ptr.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/atomic-rmw-cap-ptr.ll new file mode 100644 index 0000000000000..7bd1fde6676de --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/atomic-rmw-cap-ptr.ll @@ -0,0 +1,755 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/atomic-rmw-cap-ptr.ll +; Check that we can generate sensible code for atomic operations using capability pointers +; https://github.com/CTSRD-CHERI/llvm-project/issues/470 +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -mattr=+a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -mattr=-a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-LIBCALLS --allow-unused-prefixes +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f -mattr=+a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f -mattr=-a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-LIBCALLS --allow-unused-prefixes + +define i32 @atomic_cap_ptr_xchg(i32 addrspace(200)* %ptr, i32 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.w.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_4 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_exchange_4_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* %ptr, i32 %val seq_cst + ret i32 %tmp +} + +define i32 @atomic_cap_ptr_add(i32 addrspace(200)* %ptr, i32 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_add: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoadd.w.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_add: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_add_4 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_add: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_add_4_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw add i32 addrspace(200)* %ptr, i32 %val seq_cst + ret i32 %tmp +} + +define i32 @atomic_cap_ptr_sub(i32 addrspace(200)* %ptr, i32 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_sub: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: neg a1, a1 +; PURECAP-ATOMICS-NEXT: amoadd.w.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_sub: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_sub_4 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_sub: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_sub_4_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw sub i32 addrspace(200)* %ptr, i32 %val seq_cst + ret i32 %tmp +} + +define i32 @atomic_cap_ptr_and(i32 addrspace(200)* %ptr, i32 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_and: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoand.w.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_and: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_and_4 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_and: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_and_4_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw and i32 addrspace(200)* %ptr, i32 %val seq_cst + ret i32 %tmp +} + +define i32 @atomic_cap_ptr_nand(i32 addrspace(200)* %ptr, i32 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_nand: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.w.aqrl a2, (ca0) +; PURECAP-ATOMICS-NEXT: and a3, a2, a1 +; PURECAP-ATOMICS-NEXT: not a3, a3 +; PURECAP-ATOMICS-NEXT: sc.w.rl a3, a3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB4_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: mv a0, a2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_nand: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_nand_4 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_nand: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_nand_4_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw nand i32 addrspace(200)* %ptr, i32 %val seq_cst + ret i32 %tmp +} + +define i32 @atomic_cap_ptr_or(i32 addrspace(200)* %ptr, i32 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_or: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoor.w.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_or: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_or_4 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_or: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_or_4_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw or i32 addrspace(200)* %ptr, i32 %val seq_cst + ret i32 %tmp +} + +define i32 @atomic_cap_ptr_xor(i32 addrspace(200)* %ptr, i32 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xor: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoxor.w.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xor: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_xor_4 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xor: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_xor_4_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xor i32 addrspace(200)* %ptr, i32 %val seq_cst + ret i32 %tmp +} + +define i32 @atomic_cap_ptr_max(i32 addrspace(200)* %ptr, i32 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_max: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amomax.w.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_max: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: lw a3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: mv s1, a1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 12 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 4 +; PURECAP-LIBCALLS-NEXT: j .LBB7_2 +; PURECAP-LIBCALLS-NEXT: .LBB7_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB7_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sw a3, 12(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: lw a3, 12(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB7_4 +; PURECAP-LIBCALLS-NEXT: .LBB7_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, a3 +; PURECAP-LIBCALLS-NEXT: blt s1, a3, .LBB7_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB7_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, s1 +; PURECAP-LIBCALLS-NEXT: j .LBB7_1 +; PURECAP-LIBCALLS-NEXT: .LBB7_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: mv a0, a3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_max: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lw a3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: mv s0, a1 +; HYBRID-NEXT: j .LBB7_2 +; HYBRID-NEXT: .LBB7_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB7_2 Depth=1 +; HYBRID-NEXT: sw a3, 20(sp) +; HYBRID-NEXT: addi a1, sp, 20 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: lw a3, 20(sp) +; HYBRID-NEXT: bnez a0, .LBB7_4 +; HYBRID-NEXT: .LBB7_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: mv a2, a3 +; HYBRID-NEXT: blt s0, a3, .LBB7_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB7_2 Depth=1 +; HYBRID-NEXT: mv a2, s0 +; HYBRID-NEXT: j .LBB7_1 +; HYBRID-NEXT: .LBB7_4: # %atomicrmw.end +; HYBRID-NEXT: mv a0, a3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw max i32 addrspace(200)* %ptr, i32 %val seq_cst + ret i32 %tmp +} + +define i32 @atomic_cap_ptr_min(i32 addrspace(200)* %ptr, i32 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_min: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amomin.w.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_min: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: lw a3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: mv s1, a1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 12 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 4 +; PURECAP-LIBCALLS-NEXT: j .LBB8_2 +; PURECAP-LIBCALLS-NEXT: .LBB8_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB8_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sw a3, 12(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: lw a3, 12(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB8_4 +; PURECAP-LIBCALLS-NEXT: .LBB8_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, a3 +; PURECAP-LIBCALLS-NEXT: bge s1, a3, .LBB8_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB8_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, s1 +; PURECAP-LIBCALLS-NEXT: j .LBB8_1 +; PURECAP-LIBCALLS-NEXT: .LBB8_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: mv a0, a3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_min: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lw a3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: mv s0, a1 +; HYBRID-NEXT: j .LBB8_2 +; HYBRID-NEXT: .LBB8_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB8_2 Depth=1 +; HYBRID-NEXT: sw a3, 20(sp) +; HYBRID-NEXT: addi a1, sp, 20 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: lw a3, 20(sp) +; HYBRID-NEXT: bnez a0, .LBB8_4 +; HYBRID-NEXT: .LBB8_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: mv a2, a3 +; HYBRID-NEXT: bge s0, a3, .LBB8_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB8_2 Depth=1 +; HYBRID-NEXT: mv a2, s0 +; HYBRID-NEXT: j .LBB8_1 +; HYBRID-NEXT: .LBB8_4: # %atomicrmw.end +; HYBRID-NEXT: mv a0, a3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw min i32 addrspace(200)* %ptr, i32 %val seq_cst + ret i32 %tmp +} + +define i32 @atomic_cap_ptr_umax(i32 addrspace(200)* %ptr, i32 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_umax: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amomaxu.w.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_umax: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: lw a3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: mv s1, a1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 12 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 4 +; PURECAP-LIBCALLS-NEXT: j .LBB9_2 +; PURECAP-LIBCALLS-NEXT: .LBB9_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB9_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sw a3, 12(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: lw a3, 12(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB9_4 +; PURECAP-LIBCALLS-NEXT: .LBB9_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, a3 +; PURECAP-LIBCALLS-NEXT: bltu s1, a3, .LBB9_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB9_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, s1 +; PURECAP-LIBCALLS-NEXT: j .LBB9_1 +; PURECAP-LIBCALLS-NEXT: .LBB9_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: mv a0, a3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_umax: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lw a3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: mv s0, a1 +; HYBRID-NEXT: j .LBB9_2 +; HYBRID-NEXT: .LBB9_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB9_2 Depth=1 +; HYBRID-NEXT: sw a3, 20(sp) +; HYBRID-NEXT: addi a1, sp, 20 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: lw a3, 20(sp) +; HYBRID-NEXT: bnez a0, .LBB9_4 +; HYBRID-NEXT: .LBB9_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: mv a2, a3 +; HYBRID-NEXT: bltu s0, a3, .LBB9_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB9_2 Depth=1 +; HYBRID-NEXT: mv a2, s0 +; HYBRID-NEXT: j .LBB9_1 +; HYBRID-NEXT: .LBB9_4: # %atomicrmw.end +; HYBRID-NEXT: mv a0, a3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw umax i32 addrspace(200)* %ptr, i32 %val seq_cst + ret i32 %tmp +} + +define i32 @atomic_cap_ptr_umin(i32 addrspace(200)* %ptr, i32 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_umin: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amominu.w.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_umin: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -48 +; PURECAP-LIBCALLS-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: lw a3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: mv s1, a1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 12 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 4 +; PURECAP-LIBCALLS-NEXT: j .LBB10_2 +; PURECAP-LIBCALLS-NEXT: .LBB10_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB10_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sw a3, 12(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: lw a3, 12(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB10_4 +; PURECAP-LIBCALLS-NEXT: .LBB10_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, a3 +; PURECAP-LIBCALLS-NEXT: bgeu s1, a3, .LBB10_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB10_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, s1 +; PURECAP-LIBCALLS-NEXT: j .LBB10_1 +; PURECAP-LIBCALLS-NEXT: .LBB10_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: mv a0, a3 +; PURECAP-LIBCALLS-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 48 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_umin: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lw a3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: mv s0, a1 +; HYBRID-NEXT: j .LBB10_2 +; HYBRID-NEXT: .LBB10_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB10_2 Depth=1 +; HYBRID-NEXT: sw a3, 20(sp) +; HYBRID-NEXT: addi a1, sp, 20 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: lw a3, 20(sp) +; HYBRID-NEXT: bnez a0, .LBB10_4 +; HYBRID-NEXT: .LBB10_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: mv a2, a3 +; HYBRID-NEXT: bgeu s0, a3, .LBB10_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB10_2 Depth=1 +; HYBRID-NEXT: mv a2, s0 +; HYBRID-NEXT: j .LBB10_1 +; HYBRID-NEXT: .LBB10_4: # %atomicrmw.end +; HYBRID-NEXT: mv a0, a3 +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw umin i32 addrspace(200)* %ptr, i32 %val seq_cst + ret i32 %tmp +} + +define float @atomic_cap_ptr_fadd(float addrspace(200)* %ptr, float %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_fadd: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: flw fa5, 0(ca0) +; PURECAP-ATOMICS-NEXT: .LBB11_1: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # =>This Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: # Child Loop BB11_3 Depth 2 +; PURECAP-ATOMICS-NEXT: fadd.s fa4, fa5, fa0 +; PURECAP-ATOMICS-NEXT: fmv.x.w a1, fa4 +; PURECAP-ATOMICS-NEXT: fmv.x.w a2, fa5 +; PURECAP-ATOMICS-NEXT: .LBB11_3: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # Parent Loop BB11_1 Depth=1 +; PURECAP-ATOMICS-NEXT: # => This Inner Loop Header: Depth=2 +; PURECAP-ATOMICS-NEXT: lr.w.aqrl a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a2, .LBB11_5 +; PURECAP-ATOMICS-NEXT: # %bb.4: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # in Loop: Header=BB11_3 Depth=2 +; PURECAP-ATOMICS-NEXT: sc.w.rl a4, a1, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB11_3 +; PURECAP-ATOMICS-NEXT: .LBB11_5: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # in Loop: Header=BB11_1 Depth=1 +; PURECAP-ATOMICS-NEXT: fmv.w.x fa5, a3 +; PURECAP-ATOMICS-NEXT: bne a3, a2, .LBB11_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-ATOMICS-NEXT: fmv.s fa0, fa5 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_fadd: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: fsw fs0, 4(csp) # 4-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: flw fa5, 0(ca0) +; PURECAP-LIBCALLS-NEXT: fmv.s fs0, fa0 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs1, ca0, 4 +; PURECAP-LIBCALLS-NEXT: .LBB11_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: fadd.s fa4, fa5, fs0 +; PURECAP-LIBCALLS-NEXT: fsw fa5, 0(csp) +; PURECAP-LIBCALLS-NEXT: fmv.x.w a2, fa4 +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs1 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: flw fa5, 0(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB11_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: fmv.s fa0, fa5 +; PURECAP-LIBCALLS-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: flw fs0, 4(csp) # 4-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_fadd: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: fsw fs0, 24(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lw a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: fmv.s fs0, fa0 +; HYBRID-NEXT: fmv.w.x fa0, a0 +; HYBRID-NEXT: .LBB11_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: fadd.s fa5, fa0, fs0 +; HYBRID-NEXT: fsw fa0, 20(sp) +; HYBRID-NEXT: fmv.x.w a2, fa5 +; HYBRID-NEXT: addi a1, sp, 20 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: flw fa0, 20(sp) +; HYBRID-NEXT: beqz a0, .LBB11_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: flw fs0, 24(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw fadd float addrspace(200)* %ptr, float %val seq_cst + ret float %tmp +} + +define float @atomic_cap_ptr_fsub(float addrspace(200)* %ptr, float %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_fsub: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: flw fa5, 0(ca0) +; PURECAP-ATOMICS-NEXT: .LBB12_1: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # =>This Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: # Child Loop BB12_3 Depth 2 +; PURECAP-ATOMICS-NEXT: fsub.s fa4, fa5, fa0 +; PURECAP-ATOMICS-NEXT: fmv.x.w a1, fa4 +; PURECAP-ATOMICS-NEXT: fmv.x.w a2, fa5 +; PURECAP-ATOMICS-NEXT: .LBB12_3: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # Parent Loop BB12_1 Depth=1 +; PURECAP-ATOMICS-NEXT: # => This Inner Loop Header: Depth=2 +; PURECAP-ATOMICS-NEXT: lr.w.aqrl a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a2, .LBB12_5 +; PURECAP-ATOMICS-NEXT: # %bb.4: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # in Loop: Header=BB12_3 Depth=2 +; PURECAP-ATOMICS-NEXT: sc.w.rl a4, a1, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB12_3 +; PURECAP-ATOMICS-NEXT: .LBB12_5: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # in Loop: Header=BB12_1 Depth=1 +; PURECAP-ATOMICS-NEXT: fmv.w.x fa5, a3 +; PURECAP-ATOMICS-NEXT: bne a3, a2, .LBB12_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-ATOMICS-NEXT: fmv.s fa0, fa5 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_fsub: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: fsw fs0, 4(csp) # 4-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: flw fa5, 0(ca0) +; PURECAP-LIBCALLS-NEXT: fmv.s fs0, fa0 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs1, ca0, 4 +; PURECAP-LIBCALLS-NEXT: .LBB12_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: fsub.s fa4, fa5, fs0 +; PURECAP-LIBCALLS-NEXT: fsw fa5, 0(csp) +; PURECAP-LIBCALLS-NEXT: fmv.x.w a2, fa4 +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs1 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: flw fa5, 0(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB12_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: fmv.s fa0, fa5 +; PURECAP-LIBCALLS-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: flw fs0, 4(csp) # 4-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_fsub: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; HYBRID-NEXT: fsw fs0, 24(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca0, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lw a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: fmv.s fs0, fa0 +; HYBRID-NEXT: fmv.w.x fa0, a0 +; HYBRID-NEXT: .LBB12_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: fsub.s fa5, fa0, fs0 +; HYBRID-NEXT: fsw fa0, 20(sp) +; HYBRID-NEXT: fmv.x.w a2, fa5 +; HYBRID-NEXT: addi a1, sp, 20 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: flw fa0, 20(sp) +; HYBRID-NEXT: beqz a0, .LBB12_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; HYBRID-NEXT: flw fs0, 24(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %tmp = atomicrmw fsub float addrspace(200)* %ptr, float %val seq_cst + ret float %tmp +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/bounded-allocas-lifetimes.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/bounded-allocas-lifetimes.ll new file mode 100644 index 0000000000000..dc30ed44fc542 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/bounded-allocas-lifetimes.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/bounded-allocas-lifetimes.ll +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; CHERI-GENERIC-UTC: mir +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - --stop-after=finalize-isel | FileCheck %s + +; Check that lifetime markers don't get lost due to CheriBoundAllocas, as we'd +; risk StackSlotColoring reusing the slot. + +declare void @use(i8 addrspace(200)*) + +define void @static_alloca() { + ; CHECK-LABEL: name: static_alloca + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: [[ADDIY:%[0-9]+]]:gpcr = ADDIY %stack.0, 0 + ; CHECK-NEXT: [[YBNDSIW:%[0-9]+]]:gpcr = YBNDSIW killed [[ADDIY]], 4 + ; CHECK-NEXT: LIFETIME_START %stack.0 + ; CHECK-NEXT: ADJCALLSTACKDOWNCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: $c10 = COPY [[YBNDSIW]] + ; CHECK-NEXT: PseudoCCALL target-flags(riscv-call) @use, csr_il32pc64f_l64pc128f, implicit-def dead $c1, implicit $c10, implicit-def $c2 + ; CHECK-NEXT: ADJCALLSTACKUPCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: LIFETIME_END %stack.0 + ; CHECK-NEXT: PseudoCRET + %1 = alloca i32, align 4, addrspace(200) + %2 = bitcast i32 addrspace(200)* %1 to i8 addrspace(200)* + call void @llvm.lifetime.start.p200i8(i64 4, i8 addrspace(200)* %2) + call void @use(i8 addrspace(200)* %2) + call void @llvm.lifetime.end.p200i8(i64 4, i8 addrspace(200)* %2) + ret void +} + +; LIFETIME_START/LIFETIME_END only apply to static allocas, so we can't verify +; that the analysis works correctly, but the IR is here for completeness. +define void @dynamic_alloca(i64 zeroext %n) { + ; CHECK-LABEL: name: dynamic_alloca + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 2 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[SLLI]], 15 + ; CHECK-NEXT: [[ANDI:%[0-9]+]]:gpr = ANDI killed [[ADDI]], -16 + ; CHECK-NEXT: [[YAMASK:%[0-9]+]]:gpr = YAMASK [[ANDI]] + ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[YAMASK]], -1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[ANDI]], killed [[XORI]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:gpr = AND killed [[ADD]], [[YAMASK]] + ; CHECK-NEXT: ADJCALLSTACKDOWNCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpcr = COPY $c2 + ; CHECK-NEXT: [[PseudoCGetAddr:%[0-9]+]]:gpr = PseudoCGetAddr [[COPY1]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB killed [[PseudoCGetAddr]], [[AND]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:gpr = AND killed [[SUB]], [[YAMASK]] + ; CHECK-NEXT: [[YADDRW:%[0-9]+]]:gpcr = YADDRW [[COPY1]], killed [[AND1]] + ; CHECK-NEXT: [[YBNDSRW:%[0-9]+]]:gpcr = YBNDSRW [[YADDRW]], [[AND]] + ; CHECK-NEXT: $c2 = COPY [[YADDRW]] + ; CHECK-NEXT: ADJCALLSTACKUPCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: [[YBNDSRW1:%[0-9]+]]:gpcr = YBNDSRW killed [[YBNDSRW]], [[SLLI]] + ; CHECK-NEXT: ADJCALLSTACKDOWNCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: $c10 = COPY [[YBNDSRW1]] + ; CHECK-NEXT: PseudoCCALL target-flags(riscv-call) @use, csr_il32pc64f_l64pc128f, implicit-def dead $c1, implicit $c10, implicit-def $c2 + ; CHECK-NEXT: ADJCALLSTACKUPCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: PseudoCRET + %1 = alloca i32, i64 %n, align 4, addrspace(200) + %2 = bitcast i32 addrspace(200)* %1 to i8 addrspace(200)* + call void @llvm.lifetime.start.p200i8(i64 -1, i8 addrspace(200)* %2) + call void @use(i8 addrspace(200)* %2) + call void @llvm.lifetime.end.p200i8(i64 -1, i8 addrspace(200)* %2) + ret void +} + +declare void @llvm.lifetime.start.p200i8(i64, i8 addrspace(200)*) +declare void @llvm.lifetime.end.p200i8(i64, i8 addrspace(200)*) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cap-from-ptr.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cap-from-ptr.ll new file mode 100644 index 0000000000000..29ea71b0ecd0e --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cap-from-ptr.ll @@ -0,0 +1,204 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cap-from-ptr.ll +;; Check that we can correctly generate code for llvm.cheri.cap.from.pointer() +;; This previously asserted on RISC-V due to a broken ISel pattern. +;; We pipe this input through instcombine first to ensure SelectionDAG sees canonical IR. +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -passes=instcombine -S < %s | FileCheck %s --check-prefix=CHECK-IR +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -passes=instcombine -S < %s | llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f | FileCheck %s --check-prefix=PURECAP +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f -passes=instcombine -S < %s | llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f | FileCheck %s --check-prefix=HYBRID + +define internal ptr addrspace(200) @test(ptr addrspace(200) %ptr, ptr addrspace(200) %cap, i32 %offset) nounwind { +; PURECAP-LABEL: test: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bnez a2, .LBB0_2 +; PURECAP-NEXT: # %bb.1: # %entry +; PURECAP-NEXT: ymv ca1, cnull +; PURECAP-NEXT: j .LBB0_3 +; PURECAP-NEXT: .LBB0_2: +; PURECAP-NEXT: yaddrw ca1, ca1, a2 +; PURECAP-NEXT: .LBB0_3: # %entry +; PURECAP-NEXT: sy ca1, 0(ca0) +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: test: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bnez a2, .LBB0_2 +; HYBRID-NEXT: # %bb.1: # %entry +; HYBRID-NEXT: ymv ca1, cnull +; HYBRID-NEXT: j .LBB0_3 +; HYBRID-NEXT: .LBB0_2: +; HYBRID-NEXT: yaddrw ca1, ca1, a2 +; HYBRID-NEXT: .LBB0_3: # %entry +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: sy ca0, 0(ca1) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: ret +; CHECK-IR-LABEL: define internal ptr addrspace(200) @test +; CHECK-IR-SAME: (ptr addrspace(200) [[PTR:%.*]], ptr addrspace(200) [[CAP:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: [[NEW:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i32(ptr addrspace(200) [[CAP]], i32 [[OFFSET]]) +; CHECK-IR-NEXT: store ptr addrspace(200) [[NEW]], ptr addrspace(200) [[PTR]], align 16 +; CHECK-IR-NEXT: ret ptr addrspace(200) [[NEW]] +; +entry: + %new = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i32(ptr addrspace(200) %cap, i32 %offset) + store ptr addrspace(200) %new, ptr addrspace(200) %ptr, align 16 + ret ptr addrspace(200) %new +} + +;; (int_cheri_cap_from_ptr x, 0) -> null +define internal ptr addrspace(200) @cap_from_ptr_zero(ptr addrspace(200) %ptr, ptr addrspace(200) %cap) nounwind { +; PURECAP-LABEL: cap_from_ptr_zero: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: sy cnull, 0(ca0) +; PURECAP-NEXT: ymv ca0, cnull +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: cap_from_ptr_zero: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: sy ca0, 0(cnull) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ymv ca0, cnull +; HYBRID-NEXT: ret +; CHECK-IR-LABEL: define internal ptr addrspace(200) @cap_from_ptr_zero +; CHECK-IR-SAME: (ptr addrspace(200) [[PTR:%.*]], ptr addrspace(200) [[CAP:%.*]]) #[[ATTR0]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: store ptr addrspace(200) null, ptr addrspace(200) [[PTR]], align 16 +; CHECK-IR-NEXT: ret ptr addrspace(200) null +; +entry: + %new = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i32(ptr addrspace(200) %cap, i32 0) + store ptr addrspace(200) %new, ptr addrspace(200) %ptr, align 16 + ret ptr addrspace(200) %new +} + +;; Check that (int_cheri_cap_from_ptr ddc, x) can use the DDC register directly +define internal ptr addrspace(200) @cap_from_ptr_ddc(ptr addrspace(200) %ptr, i32 %offset) nounwind { +; PURECAP-LABEL: cap_from_ptr_ddc: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bnez a1, .LBB2_2 +; PURECAP-NEXT: # %bb.1: # %entry +; PURECAP-NEXT: ymv ca1, cnull +; PURECAP-NEXT: j .LBB2_3 +; PURECAP-NEXT: .LBB2_2: +; PURECAP-NEXT: yaddrw ca1, cnull, a1 +; PURECAP-NEXT: .LBB2_3: # %entry +; PURECAP-NEXT: sy ca1, 0(ca0) +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: cap_from_ptr_ddc: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: csrrc ca2, ddc, zero +; HYBRID-NEXT: bnez a1, .LBB2_2 +; HYBRID-NEXT: # %bb.1: # %entry +; HYBRID-NEXT: ymv ca1, cnull +; HYBRID-NEXT: j .LBB2_3 +; HYBRID-NEXT: .LBB2_2: +; HYBRID-NEXT: yaddrw ca1, ca2, a1 +; HYBRID-NEXT: .LBB2_3: # %entry +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: sy ca0, 0(ca1) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: ret +; CHECK-IR-LABEL: define internal ptr addrspace(200) @cap_from_ptr_ddc +; CHECK-IR-SAME: (ptr addrspace(200) [[PTR:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: [[DDC:%.*]] = call ptr addrspace(200) @llvm.cheri.ddc.get() +; CHECK-IR-NEXT: [[NEW:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i32(ptr addrspace(200) [[DDC]], i32 [[OFFSET]]) +; CHECK-IR-NEXT: store ptr addrspace(200) [[NEW]], ptr addrspace(200) [[PTR]], align 16 +; CHECK-IR-NEXT: ret ptr addrspace(200) [[NEW]] +; +entry: + %ddc = call ptr addrspace(200) @llvm.cheri.ddc.get() + %new = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i32(ptr addrspace(200) %ddc, i32 %offset) + store ptr addrspace(200) %new, ptr addrspace(200) %ptr, align 16 + ret ptr addrspace(200) %new +} + +;; Check that (int_cheri_cap_from_ptr x, 0) -> null has priority over direct DDC usage +define internal ptr addrspace(200) @cap_from_ptr_ddc_zero(ptr addrspace(200) %ptr) nounwind { +; PURECAP-LABEL: cap_from_ptr_ddc_zero: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: sy cnull, 0(ca0) +; PURECAP-NEXT: ymv ca0, cnull +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: cap_from_ptr_ddc_zero: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: sy ca0, 0(cnull) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ymv ca0, cnull +; HYBRID-NEXT: ret +; CHECK-IR-LABEL: define internal ptr addrspace(200) @cap_from_ptr_ddc_zero +; CHECK-IR-SAME: (ptr addrspace(200) [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: store ptr addrspace(200) null, ptr addrspace(200) [[PTR]], align 16 +; CHECK-IR-NEXT: ret ptr addrspace(200) null +; +entry: + %ddc = call ptr addrspace(200) @llvm.cheri.ddc.get() + %new = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i32(ptr addrspace(200) %ddc, i32 0) + store ptr addrspace(200) %new, ptr addrspace(200) %ptr, align 16 + ret ptr addrspace(200) %new +} + +;; Check that (int_cheri_cap_from_ptr null, x) does not use register zero (since that is DDC) +define internal ptr addrspace(200) @cap_from_ptr_null(ptr addrspace(200) %ptr, i32 %offset) nounwind { +; PURECAP-LABEL: cap_from_ptr_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bnez a1, .LBB4_2 +; PURECAP-NEXT: # %bb.1: # %entry +; PURECAP-NEXT: ymv ca1, cnull +; PURECAP-NEXT: j .LBB4_3 +; PURECAP-NEXT: .LBB4_2: +; PURECAP-NEXT: yaddrw ca1, cnull, a1 +; PURECAP-NEXT: .LBB4_3: # %entry +; PURECAP-NEXT: sy ca1, 0(ca0) +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: cap_from_ptr_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bnez a1, .LBB4_2 +; HYBRID-NEXT: # %bb.1: # %entry +; HYBRID-NEXT: ymv ca1, cnull +; HYBRID-NEXT: j .LBB4_3 +; HYBRID-NEXT: .LBB4_2: +; HYBRID-NEXT: yaddrw ca1, cnull, a1 +; HYBRID-NEXT: .LBB4_3: # %entry +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: sy ca0, 0(ca1) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: ret +; CHECK-IR-LABEL: define internal ptr addrspace(200) @cap_from_ptr_null +; CHECK-IR-SAME: (ptr addrspace(200) [[PTR:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: [[NEW:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i32(ptr addrspace(200) null, i32 [[OFFSET]]) +; CHECK-IR-NEXT: store ptr addrspace(200) [[NEW]], ptr addrspace(200) [[PTR]], align 16 +; CHECK-IR-NEXT: ret ptr addrspace(200) [[NEW]] +; +entry: + %new = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i32(ptr addrspace(200) null, i32 %offset) + store ptr addrspace(200) %new, ptr addrspace(200) %ptr, align 16 + ret ptr addrspace(200) %new +} + +declare ptr addrspace(200) @llvm.cheri.cap.from.pointer.i32(ptr addrspace(200), i32) +declare ptr addrspace(200) @llvm.cheri.ddc.get() diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-csub.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-csub.ll new file mode 100644 index 0000000000000..ca034d475a4e7 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-csub.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cheri-csub.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f %s -o - | FileCheck %s --check-prefix=HYBRID +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - | FileCheck %s --check-prefix=PURECAP + +define i32 @subp(i8 addrspace(200)* readnone %a, i8 addrspace(200)* readnone %b) nounwind { +; HYBRID-LABEL: subp: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sub a0, a0, a1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: subp: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sub a0, a0, a1 +; PURECAP-NEXT: ret + %1 = tail call i32 @llvm.cheri.cap.diff.i32(i8 addrspace(200)* %a, i8 addrspace(200)* %b) + ret i32 %1 +} + +declare i32 @llvm.cheri.cap.diff.i32(i8 addrspace(200)*, i8 addrspace(200)*) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-intrinsics-folding-broken-module-regression.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-intrinsics-folding-broken-module-regression.ll new file mode 100644 index 0000000000000..2ab5daba4a0b0 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-intrinsics-folding-broken-module-regression.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cheri-intrinsics-folding-broken-module-regression.ll +; This used to create a broken function. +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -S -passes=instcombine %s -o - | FileCheck %s +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -S '-passes=default' %s | llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -O3 -o - | FileCheck %s --check-prefix ASM +target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" + +@d = common addrspace(200) global i32 0, align 4 +@e = common addrspace(200) global ptr addrspace(200) null, align 32 + +; C Source code: +;int d; +;void* e; +;void g(int x, int y) { +; e = (__uintcap_t)&d + x + y; +;} + +define void @g(i32 %x, i32 %y) addrspace(200) nounwind { +; ASM-LABEL: g: +; ASM: # %bb.0: +; ASM-NEXT: .LBB0_1: # Label of block must be emitted +; ASM-NEXT: auipcc ca2, %got_pcrel_hi(d) +; ASM-NEXT: ly ca2, %pcrel_lo(.LBB0_1)(ca2) +; ASM-NEXT: .LBB0_2: # Label of block must be emitted +; ASM-NEXT: auipcc ca3, %got_pcrel_hi(e) +; ASM-NEXT: ly ca3, %pcrel_lo(.LBB0_2)(ca3) +; ASM-NEXT: addy ca0, ca2, a0 +; ASM-NEXT: addy ca0, ca0, a1 +; ASM-NEXT: sy ca0, 0(ca3) +; ASM-NEXT: ret +; CHECK-LABEL: define void @g +; CHECK-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) addrspace(200) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(200) @d, i32 [[X]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(200) [[TMP5]], i32 [[Y]] +; CHECK-NEXT: store ptr addrspace(200) [[TMP11]], ptr addrspace(200) @e, align 32 +; CHECK-NEXT: ret void +; + %x.addr = alloca i32, align 4, addrspace(200) + %y.addr = alloca i32, align 4, addrspace(200) + store i32 %x, ptr addrspace(200) %x.addr, align 4 + store i32 %y, ptr addrspace(200) %y.addr, align 4 + %tmp1 = load i32, ptr addrspace(200) %x.addr, align 4 + %tmp2 = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i32(ptr addrspace(200) null, i32 %tmp1) + %tmp3 = call i32 @llvm.cheri.cap.offset.get.i32(ptr addrspace(200) @d) + %tmp4 = call i32 @llvm.cheri.cap.offset.get.i32(ptr addrspace(200) %tmp2) + %add = add i32 %tmp3, %tmp4 + %tmp5 = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i32(ptr addrspace(200) @d, i32 %add) + %tmp7 = load i32, ptr addrspace(200) %y.addr, align 4 + %tmp8 = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i32(ptr addrspace(200) null, i32 %tmp7) + %tmp9 = call i32 @llvm.cheri.cap.offset.get.i32(ptr addrspace(200) %tmp5) + %tmp10 = call i32 @llvm.cheri.cap.offset.get.i32(ptr addrspace(200) %tmp8) + %add1 = add i32 %tmp9, %tmp10 + %tmp11 = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i32(ptr addrspace(200) %tmp5, i32 %add1) + store ptr addrspace(200) %tmp11, ptr addrspace(200) @e, align 32 + ret void +} + +; define void @g(i32 %x, i32 %y) nounwind { +; %tmp1 = tail call i8 addrspace(200)* @llvm.cheri.cap.offset.increment.i32(i8 addrspace(200)* bitcast (i32 addrspace(200)* @d to i8 addrspace(200)*), i32 %x) +; %tmp3 = tail call i8 addrspace(200)* @llvm.cheri.cap.offset.increment.i32(i8 addrspace(200)* %tmp1, i32 %y) +; store i8 addrspace(200)* %tmp3, i8 addrspace(200)* addrspace(200)* @e, align 32 +; ret void +; } +; +declare ptr addrspace(200) @llvm.cheri.cap.offset.set.i32(ptr addrspace(200), i32) addrspace(200) +declare i32 @llvm.cheri.cap.offset.get.i32(ptr addrspace(200)) addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-memfn-call.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-memfn-call.ll new file mode 100644 index 0000000000000..870c8c405f0a7 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-memfn-call.ll @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cheri-memfn-call.ll +; Check that we call memset_c/memmove_c/memcpy_c in hybrid mode. +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - | FileCheck %s --check-prefix=PURECAP +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f %s -o - | FileCheck %s --check-prefix=HYBRID +%struct.x = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } + +declare void @llvm.memmove.p200.p200.i64(ptr addrspace(200) nocapture, ptr addrspace(200) nocapture readonly, i64, i1) +declare void @llvm.memset.p200.i64(ptr addrspace(200) nocapture, i8, i64, i1) +declare void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) nocapture, ptr addrspace(200) nocapture readonly, i64, i1) + +define void @call_memset(ptr addrspace(200) align 4 %dst) nounwind { +; PURECAP-LABEL: call_memset: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -16 +; PURECAP-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-NEXT: li a2, 40 +; PURECAP-NEXT: li a1, 0 +; PURECAP-NEXT: li a3, 0 +; PURECAP-NEXT: call memset +; PURECAP-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-NEXT: addiy csp, csp, 16 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: call_memset: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 40 +; HYBRID-NEXT: li a1, 0 +; HYBRID-NEXT: li a3, 0 +; HYBRID-NEXT: call memset_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret +entry: + call void @llvm.memset.p200.i64(ptr addrspace(200) align 4 %dst, i8 0, i64 40, i1 false) + ret void +} + +define void @call_memcpy(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 %src) nounwind { +; PURECAP-LABEL: call_memcpy: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -16 +; PURECAP-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-NEXT: li a2, 40 +; PURECAP-NEXT: li a3, 0 +; PURECAP-NEXT: call memcpy +; PURECAP-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-NEXT: addiy csp, csp, 16 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: call_memcpy: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 40 +; HYBRID-NEXT: li a3, 0 +; HYBRID-NEXT: call memcpy_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret +entry: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 %src, i64 40, i1 false) + ret void +} + +define void @call_memmove(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 %src) nounwind { +; PURECAP-LABEL: call_memmove: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -16 +; PURECAP-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-NEXT: li a2, 40 +; PURECAP-NEXT: li a3, 0 +; PURECAP-NEXT: call memmove +; PURECAP-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-NEXT: addiy csp, csp, 16 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: call_memmove: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a2, 40 +; HYBRID-NEXT: li a3, 0 +; HYBRID-NEXT: call memmove_c@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret +entry: + call void @llvm.memmove.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 %src, i64 40, i1 false) + ret void +} + diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-pointer-comparison.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-pointer-comparison.ll new file mode 100644 index 0000000000000..accdb16277a5a --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cheri-pointer-comparison.ll @@ -0,0 +1,1324 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cheri-pointer-comparison.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f %s -o - | FileCheck %s --check-prefix=HYBRID +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - | FileCheck %s --check-prefix=PURECAP +; This series of tests serves two purposes. +; The first purpose is to check that we generate efficient code for all +; capability comparisons, conditional branches and conditional selects. +; The second purpose is to check that we generate code that honours the +; signedness of the IR (which is always unsigned when emitting IR for C +; langage-level pointers, whereas __intcap uses the signedness of the type). +; NGINX has a loop with (void*)-1 as a sentinel value which was never entered +; due to this bug. +; Original issue: https://github.com/CTSRD-CHERI/llvm/issues/199 +; Fixed upstream in https://reviews.llvm.org/D70917 +; (be15dfa88fb1ed94d12f374797f98ede6808f809) +; +; Original source code showing this surprising behaviour (for CHERI-MIPS): +; int +; main(void) +; { +; void *a, *b; +; +; a = (void *)0x12033091e; +; b = (void *)0xffffffffffffffff; +; +; if (a < b) { +; printf("ok\n"); +; return (0); +; } +; +; printf("surprising result\n"); +; return (1); +; } +; +; Morello had a similar code generation issue for selects, where a less than +; generated a csel instruction using a singed predicate instead of the unsigned +; one: +; void *select_lt(void *p1, void *p2) { +; return p1 < p2 ? p1 : p2; +; } +; See https://git.morello-project.org/morello/llvm-project/-/issues/22 + +define i32 @eq(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: eq: +; HYBRID: # %bb.0: +; HYBRID-NEXT: xor a0, a0, a1 +; HYBRID-NEXT: seqz a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: eq: +; PURECAP: # %bb.0: +; PURECAP-NEXT: xor a0, a0, a1 +; PURECAP-NEXT: seqz a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp eq i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ne(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: ne: +; HYBRID: # %bb.0: +; HYBRID-NEXT: xor a0, a0, a1 +; HYBRID-NEXT: snez a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ne: +; PURECAP: # %bb.0: +; PURECAP-NEXT: xor a0, a0, a1 +; PURECAP-NEXT: snez a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp ne i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ugt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: ugt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltu a0, a1, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ugt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltu a0, a1, a0 +; PURECAP-NEXT: ret + %cmp = icmp ugt i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @uge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: uge: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltu a0, a0, a1 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: uge: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltu a0, a0, a1 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp uge i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ult(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: ult: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltu a0, a0, a1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ult: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltu a0, a0, a1 +; PURECAP-NEXT: ret + %cmp = icmp ult i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ule(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: ule: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltu a0, a1, a0 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ule: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltu a0, a1, a0 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp ule i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sgt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: sgt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slt a0, a1, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sgt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slt a0, a1, a0 +; PURECAP-NEXT: ret + %cmp = icmp sgt i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: sge: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slt a0, a0, a1 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sge: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slt a0, a0, a1 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp sge i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @slt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: slt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slt a0, a0, a1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: slt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slt a0, a0, a1 +; PURECAP-NEXT: ret + %cmp = icmp slt i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sle(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: sle: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slt a0, a1, a0 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sle: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slt a0, a1, a0 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp sle i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @eq_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: eq_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: seqz a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: eq_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: seqz a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp eq i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ne_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: ne_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: snez a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ne_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: snez a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp ne i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ugt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: ugt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: snez a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ugt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: snez a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp ugt i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @uge_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: uge_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltu a0, a0, zero +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: uge_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltu a0, a0, zero +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp uge i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ult_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: ult_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltiu a0, a0, 0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ult_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltiu a0, a0, 0 +; PURECAP-NEXT: ret + %cmp = icmp ult i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ule_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: ule_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: snez a0, a0 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ule_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: snez a0, a0 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp ule i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sgt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: sgt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sgtz a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sgt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sgtz a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp sgt i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sge_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: sge_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltz a0, a0 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sge_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltz a0, a0 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp sge i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @slt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: slt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slti a0, a0, 0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: slt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slti a0, a0, 0 +; PURECAP-NEXT: ret + %cmp = icmp slt i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sle_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: sle_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sgtz a0, a0 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sle_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sgtz a0, a0 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp sle i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i8 addrspace(200)* @select_eq(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_eq: +; HYBRID: # %bb.0: +; HYBRID-NEXT: beq a0, a1, .LBB20_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB20_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_eq: +; PURECAP: # %bb.0: +; PURECAP-NEXT: beq a0, a1, .LBB20_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB20_2: +; PURECAP-NEXT: ret + %cmp = icmp eq i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ne(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ne: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bne a0, a1, .LBB21_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB21_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ne: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bne a0, a1, .LBB21_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB21_2: +; PURECAP-NEXT: ret + %cmp = icmp ne i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ugt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ugt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bltu a1, a0, .LBB22_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB22_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ugt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bltu a1, a0, .LBB22_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB22_2: +; PURECAP-NEXT: ret + %cmp = icmp ugt i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_uge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_uge: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgeu a0, a1, .LBB23_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB23_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_uge: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgeu a0, a1, .LBB23_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB23_2: +; PURECAP-NEXT: ret + %cmp = icmp uge i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ult(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ult: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bltu a0, a1, .LBB24_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB24_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ult: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bltu a0, a1, .LBB24_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB24_2: +; PURECAP-NEXT: ret + %cmp = icmp ult i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ule(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ule: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgeu a1, a0, .LBB25_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB25_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ule: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgeu a1, a0, .LBB25_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB25_2: +; PURECAP-NEXT: ret + %cmp = icmp ule i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sgt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sgt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: blt a1, a0, .LBB26_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB26_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sgt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: blt a1, a0, .LBB26_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB26_2: +; PURECAP-NEXT: ret + %cmp = icmp sgt i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sge: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bge a0, a1, .LBB27_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB27_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sge: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bge a0, a1, .LBB27_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB27_2: +; PURECAP-NEXT: ret + %cmp = icmp sge i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_slt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_slt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: blt a0, a1, .LBB28_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB28_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_slt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: blt a0, a1, .LBB28_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB28_2: +; PURECAP-NEXT: ret + %cmp = icmp slt i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sle(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sle: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bge a1, a0, .LBB29_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB29_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sle: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bge a1, a0, .LBB29_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB29_2: +; PURECAP-NEXT: ret + %cmp = icmp sle i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_eq_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_eq_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: beqz a0, .LBB30_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB30_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_eq_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: beqz a0, .LBB30_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB30_2: +; PURECAP-NEXT: ret + %cmp = icmp eq i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ne_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ne_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bnez a0, .LBB31_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB31_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ne_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bnez a0, .LBB31_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB31_2: +; PURECAP-NEXT: ret + %cmp = icmp ne i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ugt_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ugt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bltu zero, a0, .LBB32_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB32_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ugt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bltu zero, a0, .LBB32_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB32_2: +; PURECAP-NEXT: ret + %cmp = icmp ugt i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_uge_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_uge_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgeu a0, zero, .LBB33_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB33_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_uge_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgeu a0, zero, .LBB33_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB33_2: +; PURECAP-NEXT: ret + %cmp = icmp uge i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ult_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ult_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bltu a0, zero, .LBB34_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB34_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ult_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bltu a0, zero, .LBB34_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB34_2: +; PURECAP-NEXT: ret + %cmp = icmp ult i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ule_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ule_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgeu zero, a0, .LBB35_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB35_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ule_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgeu zero, a0, .LBB35_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB35_2: +; PURECAP-NEXT: ret + %cmp = icmp ule i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sgt_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sgt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgtz a0, .LBB36_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB36_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sgt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgtz a0, .LBB36_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB36_2: +; PURECAP-NEXT: ret + %cmp = icmp sgt i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sge_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sge_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgez a0, .LBB37_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB37_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sge_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgez a0, .LBB37_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB37_2: +; PURECAP-NEXT: ret + %cmp = icmp sge i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_slt_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_slt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bltz a0, .LBB38_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB38_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_slt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bltz a0, .LBB38_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB38_2: +; PURECAP-NEXT: ret + %cmp = icmp slt i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sle_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sle_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: blez a0, .LBB39_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB39_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sle_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: blez a0, .LBB39_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB39_2: +; PURECAP-NEXT: ret + %cmp = icmp sle i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +declare i32 @func1() nounwind +declare i32 @func2() nounwind + +define i32 @branch_eq(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_eq: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: beq a0, a1, .LBB40_2 +; HYBRID-NEXT: # %bb.1: # %if.end +; HYBRID-NEXT: tail func2@plt +; HYBRID-NEXT: .LBB40_2: # %if.then +; HYBRID-NEXT: tail func1@plt +; +; PURECAP-LABEL: branch_eq: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: beq a0, a1, .LBB40_2 +; PURECAP-NEXT: # %bb.1: # %if.end +; PURECAP-NEXT: tail func2 +; PURECAP-NEXT: .LBB40_2: # %if.then +; PURECAP-NEXT: tail func1 +entry: + %cmp = icmp eq i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ne(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_ne: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: beq a0, a1, .LBB41_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB41_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ne: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: beq a0, a1, .LBB41_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB41_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ne i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ugt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_ugt: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgeu a1, a0, .LBB42_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB42_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ugt: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgeu a1, a0, .LBB42_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB42_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ugt i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_uge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_uge: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bltu a0, a1, .LBB43_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB43_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_uge: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bltu a0, a1, .LBB43_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB43_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp uge i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ult(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_ult: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgeu a0, a1, .LBB44_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB44_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ult: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgeu a0, a1, .LBB44_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB44_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ult i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ule(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_ule: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bltu a1, a0, .LBB45_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB45_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ule: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bltu a1, a0, .LBB45_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB45_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ule i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sgt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_sgt: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bge a1, a0, .LBB46_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB46_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sgt: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bge a1, a0, .LBB46_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB46_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sgt i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_sge: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: blt a0, a1, .LBB47_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB47_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sge: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: blt a0, a1, .LBB47_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB47_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sge i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_slt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_slt: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bge a0, a1, .LBB48_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB48_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_slt: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bge a0, a1, .LBB48_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB48_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp slt i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sle(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_sle: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: blt a1, a0, .LBB49_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB49_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sle: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: blt a1, a0, .LBB49_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB49_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sle i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_eq_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_eq_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: beqz a0, .LBB50_2 +; HYBRID-NEXT: # %bb.1: # %if.end +; HYBRID-NEXT: tail func2@plt +; HYBRID-NEXT: .LBB50_2: # %if.then +; HYBRID-NEXT: tail func1@plt +; +; PURECAP-LABEL: branch_eq_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: beqz a0, .LBB50_2 +; PURECAP-NEXT: # %bb.1: # %if.end +; PURECAP-NEXT: tail func2 +; PURECAP-NEXT: .LBB50_2: # %if.then +; PURECAP-NEXT: tail func1 +entry: + %cmp = icmp eq i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ne_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_ne_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: beqz a0, .LBB51_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB51_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ne_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: beqz a0, .LBB51_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB51_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ne i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ugt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_ugt_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgeu zero, a0, .LBB52_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB52_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ugt_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgeu zero, a0, .LBB52_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB52_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ugt i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_uge_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_uge_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bltu a0, zero, .LBB53_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB53_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_uge_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bltu a0, zero, .LBB53_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB53_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp uge i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ult_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_ult_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgeu a0, zero, .LBB54_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB54_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ult_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgeu a0, zero, .LBB54_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB54_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ult i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ule_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_ule_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bltu zero, a0, .LBB55_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB55_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ule_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bltu zero, a0, .LBB55_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB55_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ule i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sgt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_sgt_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: blez a0, .LBB56_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB56_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sgt_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: blez a0, .LBB56_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB56_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sgt i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sge_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_sge_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bltz a0, .LBB57_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB57_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sge_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bltz a0, .LBB57_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB57_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sge i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_slt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_slt_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgez a0, .LBB58_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB58_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_slt_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgez a0, .LBB58_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB58_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp slt i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sle_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_sle_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgtz a0, .LBB59_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB59_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sle_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgtz a0, .LBB59_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB59_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sle i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cmpxchg-cap-ptr.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cmpxchg-cap-ptr.ll new file mode 100644 index 0000000000000..f017d6aafec34 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/cmpxchg-cap-ptr.ll @@ -0,0 +1,670 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cmpxchg-cap-ptr.ll +; Check that we can generate sensible code for atomic operations using capability pointers on capabilities +; in both hybrid and purecap mode. +; See https://github.com/CTSRD-CHERI/llvm-project/issues/470 +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -mattr=+a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -mattr=-a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-LIBCALLS --allow-unused-prefixes +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f -mattr=+a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f -mattr=+a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-LIBCALLS --allow-unused-prefixes + +define { i8, i1 } @test_cmpxchg_strong_i8(ptr addrspace(200) %ptr, i8 %exp, i8 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_strong_i8: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: slli a1, a1, 24 +; PURECAP-ATOMICS-NEXT: srai a1, a1, 24 +; PURECAP-ATOMICS-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.b.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB0_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.b.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB0_1 +; PURECAP-ATOMICS-NEXT: .LBB0_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_strong_i8: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sb a1, 7(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 7 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 1 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_1 +; PURECAP-LIBCALLS-NEXT: lb a1, 7(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_i8: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sb a1, 11(sp) +; HYBRID-NEXT: addi a1, sp, 11 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_1_c@plt +; HYBRID-NEXT: lbu a1, 11(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg ptr addrspace(200) %ptr, i8 %exp, i8 %new acq_rel acquire + ret { i8, i1 } %1 +} + +define { i16, i1 } @test_cmpxchg_strong_i16(ptr addrspace(200) %ptr, i16 %exp, i16 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_strong_i16: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: slli a1, a1, 16 +; PURECAP-ATOMICS-NEXT: srai a1, a1, 16 +; PURECAP-ATOMICS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.h.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB1_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.h.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB1_1 +; PURECAP-ATOMICS-NEXT: .LBB1_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_strong_i16: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sh a1, 6(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 6 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 2 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_2 +; PURECAP-LIBCALLS-NEXT: lh a1, 6(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_i16: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sh a1, 10(sp) +; HYBRID-NEXT: addi a1, sp, 10 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_2_c@plt +; HYBRID-NEXT: lh a1, 10(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg ptr addrspace(200) %ptr, i16 %exp, i16 %new acq_rel acquire + ret { i16, i1 } %1 +} + +define { i32, i1 } @test_cmpxchg_strong_i32(ptr addrspace(200) %ptr, i32 %exp, i32 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_strong_i32: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.w.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB2_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.w.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB2_1 +; PURECAP-ATOMICS-NEXT: .LBB2_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_strong_i32: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sw a1, 4(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 4 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 4 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: lw a1, 4(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_i32: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sw a1, 8(sp) +; HYBRID-NEXT: addi a1, sp, 8 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: lw a1, 8(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg ptr addrspace(200) %ptr, i32 %exp, i32 %new acq_rel acquire + ret { i32, i1 } %1 +} + +define { i64, i1 } @test_cmpxchg_strong_i64(ptr addrspace(200) %ptr, i64 %exp, i64 %new) nounwind { +; PURECAP-LABEL: test_cmpxchg_strong_i64: +; PURECAP: # %bb.0: +; PURECAP-NEXT: addiy csp, csp, -32 +; PURECAP-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; PURECAP-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; PURECAP-NEXT: mv a6, a5 +; PURECAP-NEXT: mv a7, a4 +; PURECAP-NEXT: ymv ct0, ca1 +; PURECAP-NEXT: ymv cs0, ca0 +; PURECAP-NEXT: sw a3, 12(csp) +; PURECAP-NEXT: sw a2, 8(csp) +; PURECAP-NEXT: addiy ca0, csp, 8 +; PURECAP-NEXT: ybndsiw ca1, ca0, 8 +; PURECAP-NEXT: li a4, 4 +; PURECAP-NEXT: li a5, 2 +; PURECAP-NEXT: ymv ca0, ct0 +; PURECAP-NEXT: mv a2, a7 +; PURECAP-NEXT: mv a3, a6 +; PURECAP-NEXT: call __atomic_compare_exchange_8 +; PURECAP-NEXT: lw a1, 12(csp) +; PURECAP-NEXT: lw a2, 8(csp) +; PURECAP-NEXT: sw a1, 4(cs0) +; PURECAP-NEXT: sw a2, 0(cs0) +; PURECAP-NEXT: sb a0, 8(cs0) +; PURECAP-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; PURECAP-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; PURECAP-NEXT: addiy csp, csp, 32 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_i64: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; HYBRID-NEXT: mv a6, a5 +; HYBRID-NEXT: mv a7, a4 +; HYBRID-NEXT: ymv ct0, ca1 +; HYBRID-NEXT: mv s0, a0 +; HYBRID-NEXT: sw a3, 4(sp) +; HYBRID-NEXT: sw a2, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a4, 4 +; HYBRID-NEXT: li a5, 2 +; HYBRID-NEXT: ymv ca0, ct0 +; HYBRID-NEXT: mv a2, a7 +; HYBRID-NEXT: mv a3, a6 +; HYBRID-NEXT: call __atomic_compare_exchange_8_c@plt +; HYBRID-NEXT: lw a1, 4(sp) +; HYBRID-NEXT: lw a2, 0(sp) +; HYBRID-NEXT: sw a1, 4(s0) +; HYBRID-NEXT: sw a2, 0(s0) +; HYBRID-NEXT: sb a0, 8(s0) +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg ptr addrspace(200) %ptr, i64 %exp, i64 %new acq_rel acquire + ret { i64, i1 } %1 +} + +define { ptr addrspace(200), i1 } @test_cmpxchg_strong_cap(ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_strong_cap: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aq ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB4_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aq a4, ca2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB4_1 +; PURECAP-ATOMICS-NEXT: .LBB4_3: +; PURECAP-ATOMICS-NEXT: xor a0, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a0 +; PURECAP-ATOMICS-NEXT: ymv ca0, ca3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_strong_cap: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 8 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_cap: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca1, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca1, 0(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new acq_rel acquire + ret { ptr addrspace(200), i1 } %1 +} + +define { ptr addrspace(200), i1 } @test_cmpxchg_strong_cap_i32(ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_strong_cap_i32: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aq ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB5_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aq a4, ca2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB5_1 +; PURECAP-ATOMICS-NEXT: .LBB5_3: +; PURECAP-ATOMICS-NEXT: xor a0, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a0 +; PURECAP-ATOMICS-NEXT: ymv ca0, ca3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_strong_cap_i32: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 8 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_cap_i32: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca1, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca1, 0(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new acq_rel acquire + ret { ptr addrspace(200), i1 } %1 +} + + +define { i8, i1 } @test_cmpxchg_weak_i8(ptr addrspace(200) %ptr, i8 %exp, i8 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_weak_i8: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: slli a1, a1, 24 +; PURECAP-ATOMICS-NEXT: srai a1, a1, 24 +; PURECAP-ATOMICS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.b.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB6_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.b.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB6_1 +; PURECAP-ATOMICS-NEXT: .LBB6_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_weak_i8: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sb a1, 7(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 7 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 1 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_1 +; PURECAP-LIBCALLS-NEXT: lb a1, 7(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_i8: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sb a1, 11(sp) +; HYBRID-NEXT: addi a1, sp, 11 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_1_c@plt +; HYBRID-NEXT: lbu a1, 11(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, i8 %exp, i8 %new acq_rel acquire + ret { i8, i1 } %1 +} + +define { i16, i1 } @test_cmpxchg_weak_i16(ptr addrspace(200) %ptr, i16 %exp, i16 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_weak_i16: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: slli a1, a1, 16 +; PURECAP-ATOMICS-NEXT: srai a1, a1, 16 +; PURECAP-ATOMICS-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.h.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB7_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.h.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB7_1 +; PURECAP-ATOMICS-NEXT: .LBB7_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_weak_i16: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sh a1, 6(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 6 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 2 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_2 +; PURECAP-LIBCALLS-NEXT: lh a1, 6(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_i16: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sh a1, 10(sp) +; HYBRID-NEXT: addi a1, sp, 10 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_2_c@plt +; HYBRID-NEXT: lh a1, 10(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, i16 %exp, i16 %new acq_rel acquire + ret { i16, i1 } %1 +} + +define { i32, i1 } @test_cmpxchg_weak_i32(ptr addrspace(200) %ptr, i32 %exp, i32 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_weak_i32: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.w.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB8_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.w.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB8_1 +; PURECAP-ATOMICS-NEXT: .LBB8_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_weak_i32: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sw a1, 4(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 4 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 4 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: lw a1, 4(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_i32: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sw a1, 8(sp) +; HYBRID-NEXT: addi a1, sp, 8 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: lw a1, 8(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, i32 %exp, i32 %new acq_rel acquire + ret { i32, i1 } %1 +} + +define { i64, i1 } @test_cmpxchg_weak_i64(ptr addrspace(200) %ptr, i64 %exp, i64 %new) nounwind { +; PURECAP-LABEL: test_cmpxchg_weak_i64: +; PURECAP: # %bb.0: +; PURECAP-NEXT: addiy csp, csp, -32 +; PURECAP-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; PURECAP-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; PURECAP-NEXT: mv a6, a5 +; PURECAP-NEXT: mv a7, a4 +; PURECAP-NEXT: ymv ct0, ca1 +; PURECAP-NEXT: ymv cs0, ca0 +; PURECAP-NEXT: sw a3, 12(csp) +; PURECAP-NEXT: sw a2, 8(csp) +; PURECAP-NEXT: addiy ca0, csp, 8 +; PURECAP-NEXT: ybndsiw ca1, ca0, 8 +; PURECAP-NEXT: li a4, 4 +; PURECAP-NEXT: li a5, 2 +; PURECAP-NEXT: ymv ca0, ct0 +; PURECAP-NEXT: mv a2, a7 +; PURECAP-NEXT: mv a3, a6 +; PURECAP-NEXT: call __atomic_compare_exchange_8 +; PURECAP-NEXT: lw a1, 12(csp) +; PURECAP-NEXT: lw a2, 8(csp) +; PURECAP-NEXT: sw a1, 4(cs0) +; PURECAP-NEXT: sw a2, 0(cs0) +; PURECAP-NEXT: sb a0, 8(cs0) +; PURECAP-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; PURECAP-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; PURECAP-NEXT: addiy csp, csp, 32 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_i64: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; HYBRID-NEXT: mv a6, a5 +; HYBRID-NEXT: mv a7, a4 +; HYBRID-NEXT: ymv ct0, ca1 +; HYBRID-NEXT: mv s0, a0 +; HYBRID-NEXT: sw a3, 4(sp) +; HYBRID-NEXT: sw a2, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a4, 4 +; HYBRID-NEXT: li a5, 2 +; HYBRID-NEXT: ymv ca0, ct0 +; HYBRID-NEXT: mv a2, a7 +; HYBRID-NEXT: mv a3, a6 +; HYBRID-NEXT: call __atomic_compare_exchange_8_c@plt +; HYBRID-NEXT: lw a1, 4(sp) +; HYBRID-NEXT: lw a2, 0(sp) +; HYBRID-NEXT: sw a1, 4(s0) +; HYBRID-NEXT: sw a2, 0(s0) +; HYBRID-NEXT: sb a0, 8(s0) +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, i64 %exp, i64 %new acq_rel acquire + ret { i64, i1 } %1 +} + +define { ptr addrspace(200), i1 } @test_cmpxchg_weak_cap(ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_weak_cap: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aq ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB10_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aq a4, ca2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB10_1 +; PURECAP-ATOMICS-NEXT: .LBB10_3: +; PURECAP-ATOMICS-NEXT: xor a0, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a0 +; PURECAP-ATOMICS-NEXT: ymv ca0, ca3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_weak_cap: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 8 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_cap: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca1, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca1, 0(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new acq_rel acquire + ret { ptr addrspace(200), i1 } %1 +} + +define { ptr addrspace(200), i1 } @test_cmpxchg_weak_cap_i32(ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_weak_cap_i32: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aq ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB11_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aq a4, ca2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB11_1 +; PURECAP-ATOMICS-NEXT: .LBB11_3: +; PURECAP-ATOMICS-NEXT: xor a0, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a0 +; PURECAP-ATOMICS-NEXT: ymv ca0, ca3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_weak_cap_i32: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 8 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_cap_i32: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sy ca1, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca1, 0(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new acq_rel acquire + ret { ptr addrspace(200), i1 } %1 +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/dagcombine-ptradd-deleted-regression.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/dagcombine-ptradd-deleted-regression.ll new file mode 100644 index 0000000000000..523e23e5b44d3 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/dagcombine-ptradd-deleted-regression.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/dagcombine-ptradd-deleted-regression.ll +; This would previously crash DAGCombiner::visitPTRADD since the PTRADD +; corresponding to the second GEP would be collapsed to a no-op when +; reassociated and delete the synthesised PTRADD node, not just the ADD, which +; the folding code was not prepared for. +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f %s -o - | FileCheck %s --check-prefix=HYBRID +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - | FileCheck %s --check-prefix=PURECAP + +declare i32 @bar(i32 addrspace(200)*) + +define internal i32 @foo(i32 addrspace(200)* %a, i32 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: foo: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: addiy ca0, ca0, 4 +; HYBRID-NEXT: sy ca0, 0(sp) # 8-byte Folded Spill +; HYBRID-NEXT: .LBB0_1: # %loop +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 8-byte Folded Reload +; HYBRID-NEXT: call bar@plt +; HYBRID-NEXT: j .LBB0_1 +; +; PURECAP-LABEL: foo: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -16 +; PURECAP-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; PURECAP-NEXT: sy cs0, 0(csp) # 8-byte Folded Spill +; PURECAP-NEXT: addiy cs0, ca0, 4 +; PURECAP-NEXT: .LBB0_1: # %loop +; PURECAP-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-NEXT: ymv ca0, cs0 +; PURECAP-NEXT: call bar +; PURECAP-NEXT: j .LBB0_1 +entry: + br label %loop + +loop: + %0 = getelementptr inbounds i32, i32 addrspace(200)* %a, i32 1 + %1 = load i32, i32 addrspace(200)* %b, align 16 + %2 = mul i32 0, %1 + %3 = getelementptr inbounds i32, i32 addrspace(200)* %0, i32 %2 + %4 = call i32 @bar(i32 addrspace(200)* %3) + br label %loop +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/frameindex-arith.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/frameindex-arith.ll new file mode 100644 index 0000000000000..a2941ef0c8305 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/frameindex-arith.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/frameindex-arith.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - | FileCheck %s + +; Check that we can fold the GEP (PTRADD) into the FrameIndex calculation +; rather than emitting two instructions. + +; Contains an explicit @llvm.cheri.cap.bounds.set so CheriBoundAllocas sees the +; use as safe and doesn't interfere by inserting bounds on the FrameIndex +; before the GEP/PTRADD. +define void @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: addiy ca0, csp, 7 +; CHECK-NEXT: ybndsrw ca0, ca0, zero +; CHECK-NEXT: call bar +; CHECK-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret + %x = alloca [2 x i8], align 1, addrspace(200) + %x_plus_1 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(200)* %x, i32 0, i32 1 + %p = call i8 addrspace(200)* @llvm.cheri.cap.bounds.set.i32(i8 addrspace(200)* %x_plus_1, i32 0) + call void @bar(i8 addrspace(200)* %p) + ret void +} + +declare void @bar(i8 addrspace(200)*) + +declare i8 addrspace(200)* @llvm.cheri.cap.bounds.set.i32(i8 addrspace(200)*, i32) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/function-alias-size.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/function-alias-size.ll new file mode 100644 index 0000000000000..c375bfbbcc5cb --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/function-alias-size.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/function-alias-size.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - < %s | FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - -filetype=obj < %s | llvm-objdump --syms -r - | FileCheck %s --check-prefix=OBJDUMP +; The MIPS backend asserts emitting a relocation against an unsized but defined +; function-type global, which was happening with destructor aliases: +; The _ZN*D1Ev destructor is emitted as an alias for the defined _ZN*D2Ev destructor, +; and did not have size information, which triggered the assertion after the April 2021 merge. +; Check that we emit size information for function aliases: + +@a = constant i8 addrspace(200)* bitcast (void () addrspace(200)* @_ZN3fooD1Ev to i8 addrspace(200)*) +@_ZN3fooD1Ev = alias void (), void () addrspace(200)* @_ZN3fooD2Ev +define void @_ZN3fooD2Ev() addrspace(200) nounwind { +; ASM-LABEL: _ZN3fooD2Ev: +; ASM: # %bb.0: +; ASM-NEXT: ret + ret void +} + +@two_ints = private global {i32, i32} {i32 1, i32 2} +@elem0 = alias i32, getelementptr({i32, i32}, {i32, i32}* @two_ints, i32 0, i32 0) +@elem1 = alias i32, getelementptr({i32, i32}, {i32, i32}* @two_ints, i32 0, i32 1) + +; UTC_ARGS: --disable +; ASM: .size _ZN3fooD2Ev, .Lfunc_end0-_ZN3fooD2Ev + +; ASM-LABEL: .Ltwo_ints: +; ASM-NEXT: .{{4byte|word}} 1 +; ASM-NEXT: .{{4byte|word}} 2 +; ASM-NEXT: .size .Ltwo_ints, 8 + +; The function alias symbol should have the same size expression: +; ASM-LABEL: .globl _ZN3fooD1Ev +; ASM-NEXT: .type _ZN3fooD1Ev,@function +; ASM-NEXT: .set _ZN3fooD1Ev, _ZN3fooD2Ev +; ASM-NEXT: .size _ZN3fooD1Ev, .Lfunc_end0-_ZN3fooD2Ev + +; But for the aliases using a GEP, we have to subtract the offset: +; ASM-LABEL: .globl elem0 +; ASM-NEXT: .set elem0, .Ltwo_ints +; ASM-NEXT: .size elem0, 4 +; ASM-LABEL: .globl elem1 +; ASM-NEXT: .set elem1, .Ltwo_ints+4 +; ASM-NEXT: .size elem1, 4 + +; Check that the ELF st_size value was set correctly: +; OBJDUMP-LABEL: SYMBOL TABLE: +; OBJDUMP-NEXT: {{0+}}0 l df *ABS* {{0+}} function-alias-size.ll +; OBJDUMP-DAG: {{0+}}0 g F .text [[SIZE:[0-9a-f]+]] _ZN3fooD2Ev +; OBJDUMP-DAG: {{0+}}0 g O .data.rel.ro {{0+(10|8)}} a +; OBJDUMP-DAG: {{0+}}0 g F .text [[SIZE]] _ZN3fooD1Ev +; elem1 should have a size of 4 and not 8: +; OBJDUMP-DAG: {{0+}}0 g O .{{s?}}data {{0+}}4 elem0 +; OBJDUMP-DAG: {{0+}}4 g O .{{s?}}data {{0+}}4 elem1 diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/global-capinit-hybrid.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/global-capinit-hybrid.ll new file mode 100644 index 0000000000000..55f5a3bef0c6f --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/global-capinit-hybrid.ll @@ -0,0 +1,163 @@ +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/global-capinit-hybrid.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f %s -o - | \ +; RUN: FileCheck %s --check-prefix=ASM -DPTR_DIRECTIVE=.word +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f %s -filetype=obj -o - | llvm-objdump -r -t - | \ +; RUN: FileCheck %s --check-prefix=RELOCS -DINTEGER_RELOC=R_RISCV_32 '-DCAPABILITY_RELOC=R_RISCV_CHERI_CAPABILITY' +target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128" + +declare void @extern_fn() +@extern_data = external global i8, align 1 + +; TODO: should the inttoptr ones be tagged -> emit a constructor? + +@global_ptr_const = global i8* inttoptr (i32 1234 to i8*), align 4 +; ASM-LABEL: .globl global_ptr_const +; ASM-NEXT: .p2align 2 +; ASM-NEXT: global_ptr_const: +; ASM-NEXT: [[PTR_DIRECTIVE]] 1234 +; ASM-NEXT: .size global_ptr_const, 4 +@global_cap_inttoptr = global i8 addrspace(200)* inttoptr (i32 1234 to i8 addrspace(200)*), align 8 +; ASM-LABEL: .globl global_cap_inttoptr +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_cap_inttoptr: +; ASM-NEXT: .chericap 1234 +; ASM-NEXT: .size global_cap_inttoptr, 8 +@global_cap_addrspacecast = global i8 addrspace(200)* addrspacecast (i8* inttoptr (i32 1234 to i8*) to i8 addrspace(200)*), align 8 +; ASM-LABEL: .globl global_cap_addrspacecast +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_cap_addrspacecast: +; ASM-NEXT: .chericap 1234 +; ASM-NEXT: .size global_cap_addrspacecast, 8 +@global_cap_nullgep = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i32 1234), align 8 +; ASM-LABEL: .globl global_cap_nullgep +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_cap_nullgep: +; ASM-NEXT: .chericap 1234 +; ASM-NEXT: .size global_cap_nullgep, 8 + +@global_ptr_data = global i8* @extern_data, align 4 +; ASM-LABEL: .globl global_ptr_data +; ASM-NEXT: .p2align 2 +; ASM-NEXT: global_ptr_data: +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data +; ASM-NEXT: .size global_ptr_data, 4 +@global_ptr_data_past_end = global i8* getelementptr inbounds (i8, i8* @extern_data, i32 1), align 4 +; ASM-LABEL: .globl global_ptr_data_past_end +; ASM-NEXT: .p2align 2 +; ASM-NEXT: global_ptr_data_past_end: +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data+1 +; ASM-NEXT: .size global_ptr_data_past_end, 4 +@global_ptr_data_two_past_end = global i8* getelementptr (i8, i8* @extern_data, i32 2), align 4 +; ASM-LABEL: .globl global_ptr_data_two_past_end +; ASM-NEXT: .p2align 2 +; ASM-NEXT: global_ptr_data_two_past_end: +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data+2 +; ASM-NEXT: .size global_ptr_data_two_past_end, 4 + +@global_cap_data_addrspacecast = global i8 addrspace(200)* addrspacecast (i8* @extern_data to i8 addrspace(200)*), align 8 +; ASM-LABEL: .globl global_cap_data_addrspacecast +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_cap_data_addrspacecast: +; ASM-NEXT: .chericap extern_data +; ASM-NEXT: .size global_cap_data_addrspacecast, 8 +@global_cap_data_addrspacecast_past_end = global i8 addrspace(200)* addrspacecast (i8* getelementptr inbounds (i8, i8* @extern_data, i32 1) to i8 addrspace(200)*), align 8 +; ASM-LABEL: .globl global_cap_data_addrspacecast_past_end +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_cap_data_addrspacecast_past_end: +; ASM-NEXT: .chericap extern_data+1 +; ASM-NEXT: .size global_cap_data_addrspacecast_past_end, 8 +@global_cap_data_addrspacecast_two_past_end = global i8 addrspace(200)* addrspacecast (i8* getelementptr (i8, i8* @extern_data, i32 2) to i8 addrspace(200)*), align 8 +; ASM-LABEL: .globl global_cap_data_addrspacecast_two_past_end +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_cap_data_addrspacecast_two_past_end: +; ASM-NEXT: .chericap extern_data+2 +; ASM-NEXT: .size global_cap_data_addrspacecast_two_past_end, 8 + +@global_cap_data_nullgep = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i32 ptrtoint (i8* @extern_data to i32)), align 8 +; ASM-LABEL: .globl global_cap_data_nullgep +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_cap_data_nullgep: +; ASM-NEXT: .p2align 3 +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data +; ASM-NEXT: [[PTR_DIRECTIVE]] 0 +; ASM-NEXT: .size global_cap_data_nullgep, 8 +@global_cap_data_nullgep_past_end = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i32 ptrtoint (i8* getelementptr inbounds (i8, i8* @extern_data, i32 1) to i32)), align 8 +; ASM-LABEL: .globl global_cap_data_nullgep_past_end +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_cap_data_nullgep_past_end: +; ASM-NEXT: .p2align 3 +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data+1 +; ASM-NEXT: [[PTR_DIRECTIVE]] 0 +; ASM-NEXT: .size global_cap_data_nullgep_past_end, 8 +@global_cap_data_nullgep_two_past_end = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i32 ptrtoint (i8* getelementptr (i8, i8* @extern_data, i32 2) to i32)), align 8 +; ASM-LABEL: .globl global_cap_data_nullgep_two_past_end +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_cap_data_nullgep_two_past_end: +; ASM-NEXT: .p2align 3 +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data+2 +; ASM-NEXT: [[PTR_DIRECTIVE]] 0 +; ASM-NEXT: .size global_cap_data_nullgep_two_past_end, 8 + +@global_fnptr = global void ()* @extern_fn, align 4 +; ASM-LABEL: .globl global_fnptr +; ASM-NEXT: .p2align 2 +; ASM-NEXT: global_fnptr: +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_fn +; ASM-NEXT: .size global_fnptr, 4 +@global_fncap_addrspacecast = global void () addrspace(200)* addrspacecast (void ()* @extern_fn to void () addrspace(200)*), align 8 +; ASM-LABEL: .globl global_fncap_addrspacecast +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_fncap_addrspacecast: +; ASM-NEXT: .chericap extern_fn +; ASM-NEXT: .size global_fncap_addrspacecast, 8 +@global_fncap_intcap_addrspacecast = global i8 addrspace(200)* addrspacecast (i8* bitcast (void ()* @extern_fn to i8*) to i8 addrspace(200)*), align 8 +; ASM-LABEL: .globl global_fncap_intcap_addrspacecast +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_fncap_intcap_addrspacecast: +; ASM-NEXT: .chericap extern_fn +; ASM-NEXT: .size global_fncap_intcap_addrspacecast, 8 +@global_fncap_intcap_nullgep = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i32 ptrtoint (void ()* @extern_fn to i32)), align 8 +; ASM-LABEL: .globl global_fncap_intcap_nullgep +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_fncap_intcap_nullgep: +; ASM-NEXT: .p2align 3 +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_fn +; ASM-NEXT: [[PTR_DIRECTIVE]] 0 +; ASM-NEXT: .size global_fncap_intcap_nullgep, 8 +@global_fncap_addrspacecast_plus_two = global i8 addrspace(200)* addrspacecast (i8* getelementptr (i8, i8* bitcast (void ()* @extern_fn to i8*), i32 2) to i8 addrspace(200)*), align 8 +; ASM-LABEL: .globl global_fncap_addrspacecast_plus_two +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_fncap_addrspacecast_plus_two: +; ASM-NEXT: .chericap extern_fn+2 +; ASM-NEXT: .size global_fncap_addrspacecast_plus_two, 8 +@global_fncap_nullgep_plus_two = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i32 ptrtoint (i8* getelementptr (i8, i8* bitcast (void ()* @extern_fn to i8*), i32 2) to i32)), align 8 +; ASM-LABEL: .globl global_fncap_nullgep_plus_two +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_fncap_nullgep_plus_two: +; ASM-NEXT: .p2align 3 +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_fn+2 +; ASM-NEXT: [[PTR_DIRECTIVE]] 0 +; ASM-NEXT: .size global_fncap_nullgep_plus_two, 8 + + +; RELOCS-LABEL: RELOCATION RECORDS FOR [.{{s?}}data]: +; RELOCS-NEXT: OFFSET TYPE VALUE +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data+0x1 +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data+0x2 +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_data +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_data+0x1 +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_data+0x2 +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data+0x1 +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data+0x2 +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_fn +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_fn +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_fn +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_fn +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_fn+0x2 +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_fn+0x2 + +; Don't use .sdata for RISC-V, to allow re-using the same RELOCS lines. +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"SmallDataLimit", i32 0} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/gvn-capability-store-to-load-fwd.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/gvn-capability-store-to-load-fwd.ll new file mode 100644 index 0000000000000..8bd0b9f5bf29d --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/gvn-capability-store-to-load-fwd.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/gvn-capability-store-to-load-fwd.ll +; Check that GVN does not attempt to read capability fields that it can't get the bits for +; This is https://github.com/CTSRD-CHERI/llvm-project/issues/385 +; GVN was previously doing the following invalid transformation (Note the shift by 64 of the ptrtoint result) +; %ai = alloca %suspicious_type, align 16, addrspace(200) +; %tmp33 = bitcast %2 addrspace(200)* %ai to i8 addrspace(200)* addrspace(200)* +; %tmp34 = load i8 addrspace(200)*, i8 addrspace(200)* addrspace(200)* %tmp33, align 16 +; %0 = ptrtoint i8 addrspace(200)* %tmp34 to i64 ; INCORRECT transformation (does not transfer all bits) +; %1 = lshr i64 %0, 64 ; Shift right by 64 to get field #2 +; %2 = trunc i64 %1 to i32 ; truncate to drop the high bits +; It assumed it could get bits 32-63 by doing a ptrtoint, but on CHERI-MIPS ptrtoint returns bits 65-127 + +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -S -aa-pipeline=basic-aa -passes=gvn -o - %s | FileCheck %s +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -S -aa-pipeline=basic-aa -passes=gvn -o - %s | llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -O0 -o - | FileCheck %s --check-prefix=ASM + +; Check in the baseline (broken test now) to show the diff in the fixed commit + +target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" + +%struct.addrinfo = type { i32, i32, i32, i32, i32, ptr addrspace(200), ptr addrspace(200), ptr addrspace(200) } + + +define i32 @first_i32_store_to_load_fwd(ptr addrspace(200) %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: first_i32_store_to_load_fwd: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -48 +; ASM-NEXT: sy ca0, 0(csp) +; ASM-NEXT: lw a0, 0(csp) +; ASM-NEXT: addiy csp, csp, 48 +; ASM-NEXT: ret +; CHECK-LABEL: define i32 @first_i32_store_to_load_fwd +; CHECK-SAME: (ptr addrspace(200) [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[STACKVAL:%.*]] = alloca [[STRUCT_ADDRINFO:%.*]], align 8, addrspace(200) +; CHECK-NEXT: store ptr addrspace(200) [[ARG]], ptr addrspace(200) [[STACKVAL]], align 8 +; CHECK-NEXT: [[RESULT:%.*]] = load i32, ptr addrspace(200) [[STACKVAL]], align 4 +; CHECK-NEXT: ret i32 [[RESULT]] +; + %stackval = alloca %struct.addrinfo, align 8, addrspace(200) + %field = getelementptr inbounds %struct.addrinfo, ptr addrspace(200) %stackval, i64 0, i32 0 + store ptr addrspace(200) %arg, ptr addrspace(200) %stackval, align 8 + %result = load i32, ptr addrspace(200) %field, align 4 + ret i32 %result +} + +define i32 @second_i32_store_to_load_fwd(ptr addrspace(200) %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: second_i32_store_to_load_fwd: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -48 +; ASM-NEXT: sy ca0, 0(csp) +; ASM-NEXT: lw a0, 4(csp) +; ASM-NEXT: addiy csp, csp, 48 +; ASM-NEXT: ret +; CHECK-LABEL: define i32 @second_i32_store_to_load_fwd +; CHECK-SAME: (ptr addrspace(200) [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0]] { +; CHECK-NEXT: [[STACKVAL:%.*]] = alloca [[STRUCT_ADDRINFO:%.*]], align 8, addrspace(200) +; CHECK-NEXT: [[FIELD:%.*]] = getelementptr inbounds [[STRUCT_ADDRINFO]], ptr addrspace(200) [[STACKVAL]], i64 0, i32 1 +; CHECK-NEXT: store ptr addrspace(200) [[ARG]], ptr addrspace(200) [[STACKVAL]], align 8 +; CHECK-NEXT: [[RESULT:%.*]] = load i32, ptr addrspace(200) [[FIELD]], align 4 +; CHECK-NEXT: ret i32 [[RESULT]] +; + %stackval = alloca %struct.addrinfo, align 8, addrspace(200) + %field = getelementptr inbounds %struct.addrinfo, ptr addrspace(200) %stackval, i64 0, i32 1 + store ptr addrspace(200) %arg, ptr addrspace(200) %stackval, align 8 + %result = load i32, ptr addrspace(200) %field, align 4 + ret i32 %result +} + +define i32 @third_i32_store_to_load_fwd(ptr addrspace(200) %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: third_i32_store_to_load_fwd: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -48 +; ASM-NEXT: sy ca0, 0(csp) +; ASM-NEXT: # implicit-def: $x10 +; ASM-NEXT: addiy csp, csp, 48 +; ASM-NEXT: ret +; CHECK-LABEL: define i32 @third_i32_store_to_load_fwd +; CHECK-SAME: (ptr addrspace(200) [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0]] { +; CHECK-NEXT: [[STACKVAL:%.*]] = alloca [[STRUCT_ADDRINFO:%.*]], align 8, addrspace(200) +; CHECK-NEXT: [[FIELD:%.*]] = getelementptr inbounds [[STRUCT_ADDRINFO]], ptr addrspace(200) [[STACKVAL]], i64 0, i32 2 +; CHECK-NEXT: store ptr addrspace(200) [[ARG]], ptr addrspace(200) [[STACKVAL]], align 8 +; CHECK-NEXT: ret i32 undef +; + %stackval = alloca %struct.addrinfo, align 8, addrspace(200) + %field = getelementptr inbounds %struct.addrinfo, ptr addrspace(200) %stackval, i64 0, i32 2 + store ptr addrspace(200) %arg, ptr addrspace(200) %stackval, align 8 + %result = load i32, ptr addrspace(200) %field, align 4 + ret i32 %result +} + +define i32 @fourth_i32_store_to_load_fwd(ptr addrspace(200) %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: fourth_i32_store_to_load_fwd: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -48 +; ASM-NEXT: sy ca0, 0(csp) +; ASM-NEXT: # implicit-def: $x10 +; ASM-NEXT: addiy csp, csp, 48 +; ASM-NEXT: ret +; CHECK-LABEL: define i32 @fourth_i32_store_to_load_fwd +; CHECK-SAME: (ptr addrspace(200) [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0]] { +; CHECK-NEXT: [[STACKVAL:%.*]] = alloca [[STRUCT_ADDRINFO:%.*]], align 8, addrspace(200) +; CHECK-NEXT: [[FIELD:%.*]] = getelementptr inbounds [[STRUCT_ADDRINFO]], ptr addrspace(200) [[STACKVAL]], i64 0, i32 3 +; CHECK-NEXT: store ptr addrspace(200) [[ARG]], ptr addrspace(200) [[STACKVAL]], align 8 +; CHECK-NEXT: ret i32 undef +; + %stackval = alloca %struct.addrinfo, align 8, addrspace(200) + %field = getelementptr inbounds %struct.addrinfo, ptr addrspace(200) %stackval, i64 0, i32 3 + store ptr addrspace(200) %arg, ptr addrspace(200) %stackval, align 8 + %result = load i32, ptr addrspace(200) %field, align 4 + ret i32 %result +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/hoist-alloca.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/hoist-alloca.ll new file mode 100644 index 0000000000000..88272d2d25cb3 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/hoist-alloca.ll @@ -0,0 +1,188 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/hoist-alloca.ll +; REQUIRES: asserts +; Check that we can hoist the csetbounds for a local alloca outside of loops +; We know that it's always tagged and unsealed so machinelicm should be able to +; to hoist the csetbounds instructions. +; TODO: for MIPS "simple-register-coalescing" moves the CheriBoundedStackPseudoImm back into the loop. +; In general this will be faster than loading from the stack, but it's probably worse +; than using a callee-saved register for loops with many iterations. + +; Generated from this code: +; void call(int *src, int *dst); +; +; void hoist_alloca_uncond(int cond) { +; int buf1[123]; +; int buf2[22]; +; for (int i = 0; i < 100; i++) { +; call(buf1, buf2); +; } +; } +; +; void hoist_alloca_cond(int cond) { +; int buf1[123]; +; int buf2[22]; +; for (int i = 0; i < 100; i++) { +; if (cond) { +; call(buf1, buf2); +; } +; } +; } + +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -o %t.mir -stop-before=early-machinelicm < %s +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -run-pass=early-machinelicm -debug-only=machinelicm %t.mir -o /dev/null 2>%t.dbg +; RUN: FileCheck --input-file=%t.dbg --check-prefix=MACHINELICM-DBG %s +; Check that MachineLICM hoists the CheriBoundedStackPseudoImm (MIPS) / IncOffset+SetBoundsImm (RISCV) instructions +; MACHINELICM-DBG-LABEL: ******** Pre-regalloc Machine LICM: hoist_alloca_uncond +; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 512 +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.0.buf1, 0 +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 88 +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.1.buf2, 0 +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG-LABEL: ******** Pre-regalloc Machine LICM: hoist_alloca_cond +; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 512 +; from %bb.3 to %bb.0 +; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.0.buf1, 0 +; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 +; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 +; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 88 +; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 +; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.1.buf2, 0 +; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 +; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 + +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -O1 -o - < %s | FileCheck %s + +define void @hoist_alloca_uncond(i32 signext %cond) local_unnamed_addr addrspace(200) nounwind { +; CHECK-LABEL: hoist_alloca_uncond: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -704 +; CHECK-NEXT: sy cra, 696(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs0, 688(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs1, 680(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs2, 672(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs3, 664(csp) # 8-byte Folded Spill +; CHECK-NEXT: addiy cs0, csp, 704 +; CHECK-NEXT: andi a0, sp, -64 +; CHECK-NEXT: yaddrw csp, csp, a0 +; CHECK-NEXT: li s3, 100 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: addiy ca1, csp, 128 +; CHECK-NEXT: ybndsrw cs1, ca1, a0 +; CHECK-NEXT: li a0, 88 +; CHECK-NEXT: addiy ca1, csp, 40 +; CHECK-NEXT: ybndsrw cs2, ca1, a0 +; CHECK-NEXT: .LBB0_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ymv ca0, cs1 +; CHECK-NEXT: ymv ca1, cs2 +; CHECK-NEXT: call call +; CHECK-NEXT: addi s3, s3, -1 +; CHECK-NEXT: bnez s3, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: addiy csp, cs0, -704 +; CHECK-NEXT: ly cra, 696(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs0, 688(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs1, 680(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs2, 672(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs3, 664(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 704 +; CHECK-NEXT: ret +entry: + %buf1 = alloca [123 x i32], align 4, addrspace(200) + %buf2 = alloca [22 x i32], align 4, addrspace(200) + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arraydecay = getelementptr inbounds [123 x i32], [123 x i32] addrspace(200)* %buf1, i64 0, i64 0 + %arraydecay1 = getelementptr inbounds [22 x i32], [22 x i32] addrspace(200)* %buf2, i64 0, i64 0 + call void @call(i32 addrspace(200)* nonnull %arraydecay, i32 addrspace(200)* nonnull %arraydecay1) + %inc = add nuw nsw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +declare void @call(i32 addrspace(200)*, i32 addrspace(200)*) local_unnamed_addr addrspace(200) nounwind + +define void @hoist_alloca_cond(i32 signext %cond) local_unnamed_addr addrspace(200) nounwind { +; CHECK-LABEL: hoist_alloca_cond: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -704 +; CHECK-NEXT: sy cra, 696(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs0, 688(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs1, 680(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs2, 672(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs3, 664(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs4, 656(csp) # 8-byte Folded Spill +; CHECK-NEXT: addiy cs0, csp, 704 +; CHECK-NEXT: andi a1, sp, -64 +; CHECK-NEXT: yaddrw csp, csp, a1 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: li s4, 100 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: addiy ca1, csp, 128 +; CHECK-NEXT: ybndsrw cs2, ca1, a0 +; CHECK-NEXT: li a0, 88 +; CHECK-NEXT: addiy ca1, csp, 40 +; CHECK-NEXT: ybndsrw cs3, ca1, a0 +; CHECK-NEXT: j .LBB1_2 +; CHECK-NEXT: .LBB1_1: # %for.inc +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: addi s4, s4, -1 +; CHECK-NEXT: beqz s4, .LBB1_4 +; CHECK-NEXT: .LBB1_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: beqz s1, .LBB1_1 +; CHECK-NEXT: # %bb.3: # %if.then +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: ymv ca0, cs2 +; CHECK-NEXT: ymv ca1, cs3 +; CHECK-NEXT: call call +; CHECK-NEXT: j .LBB1_1 +; CHECK-NEXT: .LBB1_4: # %for.cond.cleanup +; CHECK-NEXT: addiy csp, cs0, -704 +; CHECK-NEXT: ly cra, 696(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs0, 688(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs1, 680(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs2, 672(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs3, 664(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs4, 656(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 704 +; CHECK-NEXT: ret +entry: + %buf1 = alloca [123 x i32], align 4, addrspace(200) + %buf2 = alloca [22 x i32], align 4, addrspace(200) + %tobool.not = icmp eq i32 %cond, 0 + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + br i1 %tobool.not, label %for.inc, label %if.then + +if.then: + %arraydecay = getelementptr inbounds [123 x i32], [123 x i32] addrspace(200)* %buf1, i64 0, i64 0 + %arraydecay1 = getelementptr inbounds [22 x i32], [22 x i32] addrspace(200)* %buf2, i64 0, i64 0 + call void @call(i32 addrspace(200)* nonnull %arraydecay, i32 addrspace(200)* nonnull %arraydecay1) + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/intrinsics-purecap-only.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/intrinsics-purecap-only.ll new file mode 100644 index 0000000000000..252320b371501 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/intrinsics-purecap-only.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/intrinsics-purecap-only.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f < %s -o - | FileCheck %s --check-prefix=PURECAP +; RUN: not --crash llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f < %s -o - 2>&1 | FileCheck %s --check-prefix HYBRID-ERROR +; This test checks target-independent CHERI intrinsics that are only available for purecap code + +; Currently the only purecap-only intrinsic is llvm.cheri.stack.cap.get() +declare i8 addrspace(200)* @llvm.cheri.stack.cap.get() + +define i8 addrspace(200)* @stack_get() nounwind { +; PURECAP-LABEL: stack_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ymv ca0, csp +; PURECAP-NEXT: ret + %cap = call i8 addrspace(200)* @llvm.cheri.stack.cap.get() + ret i8 addrspace(200)* %cap +} +; HYBRID-ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.cheri.stack.cap.get diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/intrinsics.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/intrinsics.ll new file mode 100644 index 0000000000000..4bd2da000bea3 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/intrinsics.ll @@ -0,0 +1,563 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/intrinsics.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - < %s | FileCheck %s --check-prefix=PURECAP +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f -o - < %s | FileCheck %s --check-prefix=HYBRID +; Check that the target-independent CHERI intrinsics are support for all architectures +; The grouping/ordering in this test is based on the RISC-V instruction listing +; in the CHERI ISA specification (Appendix C.1 in ISAv7). + +; Capability-Inspection Instructions + +declare i32 @llvm.cheri.cap.perms.get.i32(i8 addrspace(200)*) +declare i32 @llvm.cheri.cap.type.get.i32(i8 addrspace(200)*) +declare i32 @llvm.cheri.cap.base.get.i32(i8 addrspace(200)*) +declare i32 @llvm.cheri.cap.length.get.i32(i8 addrspace(200)*) +declare i1 @llvm.cheri.cap.tag.get(i8 addrspace(200)*) +declare i1 @llvm.cheri.cap.sealed.get(i8 addrspace(200)*) +declare i32 @llvm.cheri.cap.offset.get.i32(i8 addrspace(200)*) +declare i32 @llvm.cheri.cap.flags.get.i32(i8 addrspace(200)*) +declare i32 @llvm.cheri.cap.address.get.i32(i8 addrspace(200)*) +declare i32 @llvm.cheri.cap.high.get.i32(i8 addrspace(200)*) + +define i32 @perms_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: perms_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ypermr a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: perms_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ypermr a0, ca0 +; HYBRID-NEXT: ret + %perms = call i32 @llvm.cheri.cap.perms.get.i32(i8 addrspace(200)* %cap) + ret i32 %perms +} + +define i32 @type_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: type_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ytyper a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: type_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytyper a0, ca0 +; HYBRID-NEXT: ret + %type = call i32 @llvm.cheri.cap.type.get.i32(i8 addrspace(200)* %cap) + ret i32 %type +} + +define i32 @base_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: base_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybaser a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: base_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybaser a0, ca0 +; HYBRID-NEXT: ret + %base = call i32 @llvm.cheri.cap.base.get.i32(i8 addrspace(200)* %cap) + ret i32 %base +} + +define i32 @length_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: length_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ylenr a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: length_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ylenr a0, ca0 +; HYBRID-NEXT: ret + %length = call i32 @llvm.cheri.cap.length.get.i32(i8 addrspace(200)* %cap) + ret i32 %length +} + +define i32 @tag_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: tag_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ytagr a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: tag_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a0, ca0 +; HYBRID-NEXT: ret + %tag = call i1 @llvm.cheri.cap.tag.get(i8 addrspace(200)* %cap) + %tag.zext = zext i1 %tag to i32 + ret i32 %tag.zext +} + +define i32 @sealed_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: sealed_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ytyper a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: sealed_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytyper a0, ca0 +; HYBRID-NEXT: ret + %sealed = call i1 @llvm.cheri.cap.sealed.get(i8 addrspace(200)* %cap) + %sealed.zext = zext i1 %sealed to i32 + ret i32 %sealed.zext +} + +define i32 @offset_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: offset_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybaser a1, ca0 +; PURECAP-NEXT: sub a0, a0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: offset_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybaser a1, ca0 +; HYBRID-NEXT: sub a0, a0, a1 +; HYBRID-NEXT: ret + %offset = call i32 @llvm.cheri.cap.offset.get.i32(i8 addrspace(200)* %cap) + ret i32 %offset +} + +define i32 @flags_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: flags_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: li a0, 0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: flags_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ymoder a0, ca0 +; HYBRID-NEXT: ret + %flags = call i32 @llvm.cheri.cap.flags.get.i32(i8 addrspace(200)* %cap) + ret i32 %flags +} + +define i32 @address_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: address_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: mv a0, a0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: address_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: mv a0, a0 +; HYBRID-NEXT: ret + %address = call i32 @llvm.cheri.cap.address.get.i32(i8 addrspace(200)* %cap) + ret i32 %address +} + +define i32 @high_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: high_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: yhir a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: high_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: yhir a0, ca0 +; HYBRID-NEXT: ret + %high = call i32 @llvm.cheri.cap.high.get.i32(i8 addrspace(200)* %cap) + ret i32 %high +} + +; Capability-Modification Instructions + +declare i8 addrspace(200)* @llvm.cheri.cap.seal(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.unseal(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.perms.and.i32(i8 addrspace(200)*, i32) +declare i8 addrspace(200)* @llvm.cheri.cap.flags.set.i32(i8 addrspace(200)*, i32) +declare i8 addrspace(200)* @llvm.cheri.cap.offset.set.i32(i8 addrspace(200)*, i32) +declare i8 addrspace(200)* @llvm.cheri.cap.address.set.i32(i8 addrspace(200)*, i32) +declare i8 addrspace(200)* @llvm.cheri.cap.bounds.set.i32(i8 addrspace(200)*, i32) +declare i8 addrspace(200)* @llvm.cheri.cap.bounds.set.exact.i32(i8 addrspace(200)*, i32) +declare i8 addrspace(200)* @llvm.cheri.cap.high.set.i32(i8 addrspace(200)*, i32) +declare i8 addrspace(200)* @llvm.cheri.cap.tag.clear(i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.build(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.type.copy(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.conditional.seal(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.seal.entry(i8 addrspace(200)*) + +define i8 addrspace(200)* @seal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: seal: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: seal: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ret + %sealed = call i8 addrspace(200)* @llvm.cheri.cap.seal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i8 addrspace(200)* %sealed +} + +define i8 addrspace(200)* @unseal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: unseal: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: unseal: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ret + %unsealed = call i8 addrspace(200)* @llvm.cheri.cap.unseal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i8 addrspace(200)* %unsealed +} + +define i8 addrspace(200)* @perms_and(i8 addrspace(200)* %cap, i32 %perms) nounwind { +; PURECAP-LABEL: perms_and: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ypermc ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: perms_and: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ypermc ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.perms.and.i32(i8 addrspace(200)* %cap, i32 %perms) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @flags_set(i8 addrspace(200)* %cap, i32 %flags) nounwind { +; PURECAP-LABEL: flags_set: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: flags_set: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ymodew ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.flags.set.i32(i8 addrspace(200)* %cap, i32 %flags) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @offset_set(i8 addrspace(200)* %cap, i32 %offset) nounwind { +; PURECAP-LABEL: offset_set: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybaser a2, ca0 +; PURECAP-NEXT: yaddrw ca0, ca0, a2 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: offset_set: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybaser a2, ca0 +; HYBRID-NEXT: yaddrw ca0, ca0, a2 +; HYBRID-NEXT: addy ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.offset.set.i32(i8 addrspace(200)* %cap, i32 %offset) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @address_set(i8 addrspace(200)* %cap, i32 %address) nounwind { +; PURECAP-LABEL: address_set: +; PURECAP: # %bb.0: +; PURECAP-NEXT: yaddrw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: address_set: +; HYBRID: # %bb.0: +; HYBRID-NEXT: yaddrw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.address.set.i32(i8 addrspace(200)* %cap, i32 %address) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @bounds_set(i8 addrspace(200)* %cap, i32 %bounds) nounwind { +; PURECAP-LABEL: bounds_set: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybndsrw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: bounds_set: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybndsrw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.bounds.set.i32(i8 addrspace(200)* %cap, i32 %bounds) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @bounds_set_exact(i8 addrspace(200)* %cap, i32 %bounds) nounwind { +; PURECAP-LABEL: bounds_set_exact: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybndsw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: bounds_set_exact: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybndsw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.bounds.set.exact.i32(i8 addrspace(200)* %cap, i32 %bounds) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @high_set(i8 addrspace(200)* %cap, i32 %high) nounwind { +; PURECAP-LABEL: high_set: +; PURECAP: # %bb.0: +; PURECAP-NEXT: yhiw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: high_set: +; HYBRID: # %bb.0: +; HYBRID-NEXT: yhiw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.high.set.i32(i8 addrspace(200)* %cap, i32 %high) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @bounds_set_immediate(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: bounds_set_immediate: +; PURECAP: # %bb.0: +; PURECAP-NEXT: li a1, 42 +; PURECAP-NEXT: ybndsrw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: bounds_set_immediate: +; HYBRID: # %bb.0: +; HYBRID-NEXT: li a1, 42 +; HYBRID-NEXT: ybndsrw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.bounds.set.i32(i8 addrspace(200)* %cap, i32 42) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @tag_clear(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: tag_clear: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: tag_clear: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ret + %untagged = call i8 addrspace(200)* @llvm.cheri.cap.tag.clear(i8 addrspace(200)* %cap) + ret i8 addrspace(200)* %untagged +} + +define i8 addrspace(200)* @build(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: build: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybld ca0, ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: build: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybld ca0, ca0, ca1 +; HYBRID-NEXT: ret + %built = call i8 addrspace(200)* @llvm.cheri.cap.build(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i8 addrspace(200)* %built +} + +define i8 addrspace(200)* @type_copy(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: type_copy: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: type_copy: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.type.copy(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @conditional_seal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: conditional_seal: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: conditional_seal: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.conditional.seal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @seal_entry(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: seal_entry: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ysentry ca0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: seal_entry: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ysentry ca0, ca0 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.seal.entry(i8 addrspace(200)* %cap) + ret i8 addrspace(200)* %newcap +} + +; Pointer-Arithmetic Instructions + +declare i32 @llvm.cheri.cap.to.pointer(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.from.pointer(i8 addrspace(200)*, i32) +declare i32 @llvm.cheri.cap.diff(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.ddc.get() +declare i8 addrspace(200)* @llvm.cheri.pcc.get() + +define i32 @to_pointer(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: to_pointer: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ytagr a0, ca1 +; PURECAP-NEXT: neg a0, a0 +; PURECAP-NEXT: and a0, a1, a0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: to_pointer: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a0, ca1 +; HYBRID-NEXT: neg a0, a0 +; HYBRID-NEXT: and a0, a1, a0 +; HYBRID-NEXT: ret + %ptr = call i32 @llvm.cheri.cap.to.pointer(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i32 %ptr +} + +define i32 @to_pointer_ddc_relative(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: to_pointer_ddc_relative: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ytagr a1, ca0 +; PURECAP-NEXT: neg a1, a1 +; PURECAP-NEXT: and a0, a0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: to_pointer_ddc_relative: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a1, ca0 +; HYBRID-NEXT: neg a1, a1 +; HYBRID-NEXT: and a0, a0, a1 +; HYBRID-NEXT: ret + %ddc = call i8 addrspace(200)* @llvm.cheri.ddc.get() + %ptr = call i32 @llvm.cheri.cap.to.pointer(i8 addrspace(200)* %ddc, i8 addrspace(200)* %cap) + ret i32 %ptr +} + +define i8 addrspace(200)* @from_pointer(i8 addrspace(200)* %cap, i32 %ptr) nounwind { +; PURECAP-LABEL: from_pointer: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bnez a1, .LBB27_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, cnull +; PURECAP-NEXT: ret +; PURECAP-NEXT: .LBB27_2: +; PURECAP-NEXT: yaddrw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: from_pointer: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bnez a1, .LBB27_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, cnull +; HYBRID-NEXT: ret +; HYBRID-NEXT: .LBB27_2: +; HYBRID-NEXT: yaddrw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.from.pointer(i8 addrspace(200)* %cap, i32 %ptr) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @from_ddc(i32 %ptr) nounwind { +; PURECAP-LABEL: from_ddc: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bnez a0, .LBB28_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, cnull +; PURECAP-NEXT: ret +; PURECAP-NEXT: .LBB28_2: +; PURECAP-NEXT: yaddrw ca0, cnull, a0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: from_ddc: +; HYBRID: # %bb.0: +; HYBRID-NEXT: csrrc ca1, ddc, zero +; HYBRID-NEXT: bnez a0, .LBB28_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, cnull +; HYBRID-NEXT: ret +; HYBRID-NEXT: .LBB28_2: +; HYBRID-NEXT: yaddrw ca0, ca1, a0 +; HYBRID-NEXT: ret + %ddc = call i8 addrspace(200)* @llvm.cheri.ddc.get() + %cap = call i8 addrspace(200)* @llvm.cheri.cap.from.pointer(i8 addrspace(200)* %ddc, i32 %ptr) + ret i8 addrspace(200)* %cap +} + +define i32 @diff(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: diff: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sub a0, a0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: diff: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sub a0, a0, a1 +; HYBRID-NEXT: ret + %diff = call i32 @llvm.cheri.cap.diff(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i32 %diff +} + +define i8 addrspace(200)* @ddc_get() nounwind { +; PURECAP-LABEL: ddc_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ymv ca0, cnull +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: ddc_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: csrrc ca0, ddc, zero +; HYBRID-NEXT: ret + %cap = call i8 addrspace(200)* @llvm.cheri.ddc.get() + ret i8 addrspace(200)* %cap +} + +define i8 addrspace(200)* @pcc_get() nounwind { +; PURECAP-LABEL: pcc_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: auipc ca0, 0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: pcc_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: auipcc ca0, 0 +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %cap = call i8 addrspace(200)* @llvm.cheri.pcc.get() + ret i8 addrspace(200)* %cap +} + +; Assertion Instructions + +declare i1 @llvm.cheri.cap.subset.test(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + +define i32 @subset_test(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: subset_test: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ylt a0, ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: subset_test: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ylt a0, ca0, ca1 +; HYBRID-NEXT: ret + %subset = call i1 @llvm.cheri.cap.subset.test(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + %subset.zext = zext i1 %subset to i32 + ret i32 %subset.zext +} + +declare i1 @llvm.cheri.cap.equal.exact(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + +define i32 @equal_exact(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: equal_exact: +; PURECAP: # %bb.0: +; PURECAP-NEXT: syeq a0, ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: equal_exact: +; HYBRID: # %bb.0: +; HYBRID-NEXT: syeq a0, ca0, ca1 +; HYBRID-NEXT: ret + %eqex = call i1 @llvm.cheri.cap.equal.exact(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + %eqex.zext = zext i1 %eqex to i32 + ret i32 %eqex.zext +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/landingpad-non-preemptible.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/landingpad-non-preemptible.ll new file mode 100644 index 0000000000000..a9c3ed333f60f --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/landingpad-non-preemptible.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/landingpad-non-preemptible.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f --relocation-model=pic < %s -o - | FileCheck %s +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f --relocation-model=pic < %s -o - -filetype=obj | llvm-readobj --relocs --symbols - | FileCheck %s --check-prefix=RELOCS +; Capabilities for exception landing pads were using preemptible relocations such as +; .chericap foo + .Ltmp - .Lfunc_begin instead of using a local alias. +; https://github.com/CTSRD-CHERI/llvm-project/issues/512 +; This test case was generated from the following C++ code: +; extern long foo(); +; int do_catch() { +; try { +; return foo(); +; } catch(int &i) { +; return 1; +; } catch(...) { +; return 2; +; } +; } + +@_ZTIi = external dso_local addrspace(200) constant ptr addrspace(200) +define dso_local noundef signext i32 @_Z8do_catchv() local_unnamed_addr addrspace(200) #0 personality ptr addrspace(200) @__gxx_personality_v0 { +; CHECK-LABEL: _Z8do_catchv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs1, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: .cfi_offset s1, -24 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: call _Z3foov +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: .LBB0_2: # %return +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs1, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 32 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_3: # %lpad +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: call __cxa_begin_catch +; CHECK-NEXT: li s0, 2 +; CHECK-NEXT: bne s1, s0, .LBB0_5 +; CHECK-NEXT: # %bb.4: # %catch1 +; CHECK-NEXT: call __cxa_end_catch +; CHECK-NEXT: li s0, 1 +; CHECK-NEXT: j .LBB0_2 +; CHECK-NEXT: .LBB0_5: # %catch +; CHECK-NEXT: call __cxa_end_catch +; CHECK-NEXT: j .LBB0_2 +entry: + %call = invoke noundef signext i32 @_Z3foov() + to label %return unwind label %lpad + +lpad: ; preds = %entry + %0 = landingpad { ptr addrspace(200), i32 } + catch ptr addrspace(200) @_ZTIi + catch ptr addrspace(200) null + %1 = extractvalue { ptr addrspace(200), i32 } %0, 0 + %2 = extractvalue { ptr addrspace(200), i32 } %0, 1 + %3 = tail call i32 @llvm.eh.typeid.for(ptr addrspacecast (ptr addrspace(200) @_ZTIi to ptr)) nounwind + %matches = icmp eq i32 %2, %3 + %4 = tail call ptr addrspace(200) @__cxa_begin_catch(ptr addrspace(200) %1) nounwind + br i1 %matches, label %catch1, label %catch + +catch1: ; preds = %lpad + tail call void @__cxa_end_catch() nounwind + br label %return + +catch: ; preds = %lpad + tail call void @__cxa_end_catch() + br label %return + +return: ; preds = %entry, %catch1, %catch + %retval.0 = phi i32 [ 1, %catch1 ], [ 2, %catch ], [ %call, %entry ] + ret i32 %retval.0 +} + +declare dso_local i32 @_Z3foov() local_unnamed_addr addrspace(200) + +declare dso_local i32 @__gxx_personality_v0(...) addrspace(200) + +declare i32 @llvm.eh.typeid.for(i8*) addrspace(200) nounwind readnone + +declare dso_local ptr addrspace(200) @__cxa_begin_catch(ptr addrspace(200)) local_unnamed_addr addrspace(200) + +declare dso_local void @__cxa_end_catch() local_unnamed_addr addrspace(200) + +; UTC_ARGS: --disable +; CHECK: .Lfunc_end0: +; CHECK-NEXT: .size _Z8do_catchv, .Lfunc_end0-_Z8do_catchv +; CHECK-NEXT: .size .L_Z8do_catchv$local, .Lfunc_end0-_Z8do_catchv + +; CHECK: GCC_except_table0: +; CHECK-NEXT: .Lexception0: +; CHECK-NEXT: .byte 255 # @LPStart Encoding = omit +; CHECK-NEXT: .byte 155 # @TType Encoding = indirect pcrel sdata4 +; CHECK-NEXT: .uleb128 .Lttbase0-.Lttbaseref0 +; CHECK-NEXT: .Lttbaseref0: +; RISC-V uses DW_EH_PE_udata4 instead of uleb128 since uleb128 causes issues with linker relaxations. +; CHECK-NEXT: .byte 3 # Call site Encoding = udata4 +; CHECK-NEXT: .uleb128 .Lcst_end0-.Lcst_begin0 +; CHECK-NEXT: .Lcst_begin0: +; CHECK-NEXT: [[CS_DIRECTIVE:(\.uleb128)|(\.word)]] .Ltmp0-.Lfunc_begin0 # >> Call Site 1 << +; CHECK-NEXT: [[CS_DIRECTIVE]] .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1 +; Note: RISC-V uses DW_EH_PE_udata4, so the 0xc marker uses 4 bytes instead of 1 +; CHECK-NEXT: [[SMALL_CS_DIRECTIVE:(\.byte)|(\.word)]] 12 # (landing pad is a capability) +; Note: the following line should not be using _Z8do_catchv, but a local alias +; CHECK-NEXT: .chericap %code(.L_Z8do_catchv$local+(.Ltmp2-.Lfunc_begin0)) # jumps to .Ltmp2 +; CHECK-NEXT: .byte 3 # On action: 2 +; CHECK-NEXT: [[CS_DIRECTIVE]] .Ltmp1-.Lfunc_begin0 # >> Call Site 2 << +; CHECK-NEXT: [[CS_DIRECTIVE]] .Lfunc_end0-.Ltmp1 # Call between .Ltmp1 and .Lfunc_end0 +; CHECK-NEXT: [[SMALL_CS_DIRECTIVE]] 0 # has no landing pad +; CHECK-NEXT: .byte 0 # On action: cleanup +; CHECK-NEXT: .Lcst_end0: +; CHECK-NEXT: .byte 1 # >> Action Record 1 << +; CHECK-NEXT: # Catch TypeInfo 1 +; CHECK-NEXT: .byte 0 # No further actions +; CHECK-NEXT: .byte 2 # >> Action Record 2 << +; CHECK-NEXT: # Catch TypeInfo 2 +; CHECK-NEXT: .byte 125 # Continue to action 1 +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: # >> Catch TypeInfos << +; CHECK-NEXT: [[TI_LABEL:\.Ltmp[0-9]+]]: # TypeInfo 2 +; CHECK-NEXT: .{{4byte|word}} .L_ZTIi.DW.stub-[[TI_LABEL]] +; CHECK-NEXT: .{{4byte|word}} 0 # TypeInfo 1 +; CHECK-NEXT: .Lttbase0: +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: # -- End function + + + + +; RELOCS-LABEL: Relocations [ +; RELOCS-LABEL: Section ({{.+}}) .rela.gcc_except_table { +; RELOCS-NEXT: R_RISCV_CHERI_CAPABILITY_CODE .L_Z8do_catchv$local 0x34 +; RELOCS-NEXT: R_RISCV_ADD32 0x0 +; RELOCS-NEXT: R_RISCV_SUB32 0x0 +; RELOCS-NEXT: R_RISCV_ADD32 .L_ZTIi.DW.stub 0x0 +; RELOCS-NEXT: R_RISCV_SUB32 0x0 +; RELOCS-NEXT: } + +; The local alias should have the same type and non-zero size as the real function: +; RELOCS: Symbol { +; RELOCS-LABEL: Name: .L_Z8do_catchv$local ( +; RELOCS-NEXT: Value: 0x0 +; RELOCS-NEXT: Size: [[FN_SIZE:[1-9][0-9]*]] +; RELOCS-NEXT: Binding: Local (0x0) +; RELOCS-NEXT: Type: Function (0x2) +; RELOCS-NEXT: Other: 0 +; RELOCS-NEXT: Section: .text (0x2) +; RELOCS-NEXT: } +; RELOCS: Symbol { +; RELOCS-LABEL: Name: _Z8do_catchv ( +; RELOCS-NEXT: Value: 0x0 +; RELOCS-NEXT: Size: [[FN_SIZE]] +; RELOCS-NEXT: Binding: Global (0x1) +; RELOCS-NEXT: Type: Function (0x2) +; RELOCS-NEXT: Other: 0 +; RELOCS-NEXT: Section: .text (0x2) +; RELOCS-NEXT: } diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/machinelicm-hoist-csetbounds.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/machinelicm-hoist-csetbounds.ll new file mode 100644 index 0000000000000..f8d7e5e01c440 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/machinelicm-hoist-csetbounds.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/machinelicm-hoist-csetbounds.ll +; Previously LLVM would hoist CSetBounds instructions out of if conditions/loops +; even if the source pointer could be NULL. On MIPS and RISC-V this results in a +; tag violation so we must ensure that the CSetBounds happens after the NULL check. + +; Note: Opt correctly hoists the condition+csetbounds into a preheader, and LLC +; used to unconditionally hoist the csetbounds. +; RUN: opt -data-layout="e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f "-passes=default" -S < %s | FileCheck %s --check-prefix=HOIST-OPT +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -O3 < %s | FileCheck %s + +; Generated from the following C code (with subobject bounds): +; struct foo { +; int src; +; int dst; +; }; +; +; void call(int* src, int* dst); +; +; void hoist_csetbounds(int cond, struct foo* f) { +; for (int i = 0; i < 100; i++) { +; if (f) { +; call(&f->src, &f->dst); +; } +; } +; } + +%struct.foo = type { i32, i32 } +declare dso_local void @call(ptr addrspace(200), ptr addrspace(200)) local_unnamed_addr addrspace(200) nounwind +declare ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200), i32) addrspace(200) nounwind readnone willreturn + +define dso_local void @hoist_csetbounds(i32 signext %cond, ptr addrspace(200) %f) local_unnamed_addr addrspace(200) nounwind { +; CHECK-LABEL: hoist_csetbounds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -48 +; CHECK-NEXT: sy cra, 40(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs0, 32(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs1, 24(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs2, 16(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs3, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs4, 0(csp) # 8-byte Folded Spill +; CHECK-NEXT: ymv cs0, ca1 +; CHECK-NEXT: addiy ca0, ca1, 4 +; CHECK-NEXT: li s3, -1 +; CHECK-NEXT: li s4, 99 +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: ybndsrw cs1, cs0, a1 +; CHECK-NEXT: ybndsrw cs2, ca0, a1 +; CHECK-NEXT: j .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %for.inc +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: addi s3, s3, 1 +; CHECK-NEXT: bgeu s3, s4, .LBB0_4 +; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: beqz s0, .LBB0_1 +; CHECK-NEXT: # %bb.3: # %if.then +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: ymv ca0, cs1 +; CHECK-NEXT: ymv ca1, cs2 +; CHECK-NEXT: call call +; CHECK-NEXT: j .LBB0_1 +; CHECK-NEXT: .LBB0_4: # %for.cond.cleanup +; CHECK-NEXT: ly cra, 40(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs0, 32(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs1, 24(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs2, 16(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs3, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs4, 0(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 48 +; CHECK-NEXT: ret +; HOIST-OPT-LABEL: define dso_local void @hoist_csetbounds +; HOIST-OPT-SAME: (i32 signext [[COND:%.*]], ptr addrspace(200) [[F:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0:[0-9]+]] { +; HOIST-OPT-NEXT: entry: +; HOIST-OPT-NEXT: [[TOBOOL:%.*]] = icmp eq ptr addrspace(200) [[F]], null +; HOIST-OPT-NEXT: br i1 [[TOBOOL]], label [[FOR_COND_CLEANUP:%.*]], label [[ENTRY_SPLIT:%.*]] +; HOIST-OPT: entry.split: +; HOIST-OPT-NEXT: [[DST:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr addrspace(200) [[F]], i32 0, i32 1 +; HOIST-OPT-NEXT: [[ADDRESS_WITH_BOUNDS:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull [[F]], i32 4) +; HOIST-OPT-NEXT: [[ADDRESS_WITH_BOUNDS1:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull [[DST]], i32 4) +; HOIST-OPT-NEXT: br label [[FOR_BODY:%.*]] +; HOIST-OPT: for.cond.cleanup: +; HOIST-OPT-NEXT: ret void +; HOIST-OPT: for.body: +; HOIST-OPT-NEXT: [[I_06:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; HOIST-OPT-NEXT: tail call void @call(ptr addrspace(200) [[ADDRESS_WITH_BOUNDS]], ptr addrspace(200) [[ADDRESS_WITH_BOUNDS1]]) +; HOIST-OPT-NEXT: [[INC]] = add nuw nsw i32 [[I_06]], 1 +; HOIST-OPT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 100 +; HOIST-OPT-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; +entry: + %tobool = icmp eq ptr addrspace(200) %f, null + %dst = getelementptr inbounds %struct.foo, ptr addrspace(200) %f, i64 0, i32 1 + br label %for.body + +for.cond.cleanup: ; preds = %for.inc + ret void + +for.body: ; preds = %for.inc, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + br i1 %tobool, label %for.inc, label %if.then + +if.then: ; preds = %for.body + %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %f, i32 4) + %address.with.bounds1 = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %dst, i32 4) + call void @call(ptr addrspace(200) %address.with.bounds, ptr addrspace(200) %address.with.bounds1) + br label %for.inc + +for.inc: ; preds = %if.then, %for.body + %inc = add nuw nsw i32 %i.06, 1 + %cmp = icmp ult i32 %i.06, 99 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-from-constant.ll new file mode 100644 index 0000000000000..1683cd1470155 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-from-constant.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-from-constant.ll +;; Copying from a zero constant can be converted to a memset (even with the tag preservation flags) +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f < %s -o - | FileCheck %s + +@a = internal addrspace(200) constant ptr addrspace(200) null +@b = internal addrspace(200) constant ptr addrspace(200) null +@zero_constant = internal addrspace(200) constant [5 x ptr addrspace(200)] zeroinitializer +@constant_ptrs = internal addrspace(200) constant [2 x ptr addrspace(200)] [ptr addrspace(200) @a, ptr addrspace(200) @b] + +declare void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noalias nocapture writeonly, ptr addrspace(200) noalias nocapture readonly, i64, i1 immarg) addrspace(200) #0 + +define linkonce_odr void @copy_from_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sy cnull, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 8, i1 false) + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_with_offset: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sy cnull, 0(ca0) +; CHECK-NEXT: ret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 8 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 %src, i64 8, i1 false) + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_large_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sw zero, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 4, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB3_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(constant_ptrs) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB3_1)(ca1) +; CHECK-NEXT: ly ca1, 0(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @constant_ptrs, i64 8, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_with_offset: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB4_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(constant_ptrs) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB4_1)(ca1) +; CHECK-NEXT: ly ca1, 8(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 8 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 %src, i64 8, i1 false) + ret void +} + +;; Run the same tests again this time with must_preserve_tags to check that we don't call memcpy(). + +define linkonce_odr void @copy_from_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sy cnull, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 8, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_with_offset_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sy cnull, 0(ca0) +; CHECK-NEXT: ret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 8 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 %src, i64 8, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_large_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sw zero, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 4, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB8_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(constant_ptrs) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB8_1)(ca1) +; CHECK-NEXT: ly ca1, 0(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @constant_ptrs, i64 8, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_with_offset_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB9_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(constant_ptrs) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB9_1)(ca1) +; CHECK-NEXT: ly ca1, 8(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 8 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 %src, i64 8, i1 false) #1 + ret void +} + +;; Finally, check copying from a zero constant with insufficient known alignment. +;; We should be able to emit this inline since a zero constant source never has tags. + +define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_underaligned_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sw zero, 4(ca0) +; CHECK-NEXT: sw zero, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 @zero_constant, i64 8, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_underaligned_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sw zero, 4(ca0) +; CHECK-NEXT: sw zero, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 @zero_constant, i64 8, i1 false) #1 + ret void +} + +attributes #0 = { argmemonly nocallback nofree nounwind willreturn } +attributes #1 = { must_preserve_cheri_tags "frontend-memtransfer-type"="'const UChar * __capability' (aka 'const char16_t * __capability')" } diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-no-preserve-tags-attr.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-no-preserve-tags-attr.ll new file mode 100644 index 0000000000000..b0b8eac9ea44d --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-no-preserve-tags-attr.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-no-preserve-tags-attr.ll +; Check that the no_preserve_tags annotation on memcpy/memmove intrinsics allows +; use to inline struct copies >= capability size. +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -o - < %s | FileCheck %s + +%struct.pair = type { i32, i32 } + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* nocapture writeonly, i8 addrspace(200)* nocapture readonly, i64, i1) +declare void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* nocapture writeonly, i8 addrspace(200)* nocapture readonly, i64, i1) + +; Without a no_preserve_tags attribute we always call memcpy. In this case we +; don't know whether the type might actually contain capabilities (e.g. unions). +define void @memcpy_no_attr(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memcpy_no_attr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: call memcpy +; CHECK-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 4 %a_i8, i8 addrspace(200)* align 4 %b_i8, i64 16, i1 false) + ret void +} + +define void @memmove_no_attr(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memmove_no_attr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: call memmove +; CHECK-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* align 4 %a_i8, i8 addrspace(200)* align 4 %b_i8, i64 16, i1 false) + ret void +} + +; We have to emit a call if the intrinsic has must_preserve_cheri_tags: +define void @memcpy_must_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memcpy_must_preserve: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: call memcpy +; CHECK-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 4 %a_i8, i8 addrspace(200)* align 4 %b_i8, i64 16, i1 false) must_preserve_cheri_tags + ret void +} + +define void @memmove_must_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memmove_must_preserve: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: call memmove +; CHECK-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* align 4 %a_i8, i8 addrspace(200)* align 4 %b_i8, i64 16, i1 false) must_preserve_cheri_tags + ret void +} + +; We should be able to inline the call memcpy/memmove if the intrinsic has no_preserve_cheri_tags: +define void @memcpy_no_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memcpy_no_preserve: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lw a2, 12(ca1) +; CHECK-NEXT: sw a2, 12(ca0) +; CHECK-NEXT: lw a2, 8(ca1) +; CHECK-NEXT: sw a2, 8(ca0) +; CHECK-NEXT: lw a2, 4(ca1) +; CHECK-NEXT: sw a2, 4(ca0) +; CHECK-NEXT: lw a1, 0(ca1) +; CHECK-NEXT: sw a1, 0(ca0) +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 4 %a_i8, i8 addrspace(200)* align 4 %b_i8, i64 16, i1 false) no_preserve_cheri_tags + ret void +} + +define void @memmove_no_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memmove_no_preserve: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lw a2, 12(ca1) +; CHECK-NEXT: lw a3, 8(ca1) +; CHECK-NEXT: lw a4, 4(ca1) +; CHECK-NEXT: lw a1, 0(ca1) +; CHECK-NEXT: sw a2, 12(ca0) +; CHECK-NEXT: sw a3, 8(ca0) +; CHECK-NEXT: sw a4, 4(ca0) +; CHECK-NEXT: sw a1, 0(ca0) +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* align 4 %a_i8, i8 addrspace(200)* align 4 %b_i8, i64 16, i1 false) no_preserve_cheri_tags + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-preserve-tags-assume-aligned.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-preserve-tags-assume-aligned.ll new file mode 100644 index 0000000000000..6103a3cd6cac6 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-preserve-tags-assume-aligned.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-preserve-tags-assume-aligned.ll +; Check that __builtin_assume_aligned does the right thing and allows us to elide the memcpy +; call even with must_preserve_cheri_tags attribute (run instcombine to propagate assume information) +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -S -passes=instcombine < %s | llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -O2 -o - | FileCheck %s +target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" + +declare void @llvm.memcpy.p200i8.p200i8.i32(i8 addrspace(200)* nocapture writeonly, i8 addrspace(200)* nocapture readonly, i32, i1) +declare void @llvm.memmove.p200i8.p200i8.i32(i8 addrspace(200)* nocapture writeonly, i8 addrspace(200)* nocapture readonly, i32, i1) +declare void @llvm.assume(i1) addrspace(200) + +define void @memcpy_assume(i8 addrspace(200)* addrspace(200)* %local_cap_ptr, i8 addrspace(200)* %align1) addrspace(200) nounwind { +; CHECK-LABEL: memcpy_assume: +; CHECK: # %bb.0: +; CHECK-NEXT: ly ca2, 24(ca0) +; CHECK-NEXT: sy ca2, 24(ca1) +; CHECK-NEXT: ly ca2, 16(ca0) +; CHECK-NEXT: sy ca2, 16(ca1) +; CHECK-NEXT: ly ca2, 8(ca0) +; CHECK-NEXT: sy ca2, 8(ca1) +; CHECK-NEXT: ly ca0, 0(ca0) +; CHECK-NEXT: sy ca0, 0(ca1) +; CHECK-NEXT: ret + %ptrint = ptrtoint i8 addrspace(200)* %align1 to i32 + %maskedptr = and i32 %ptrint, 15 + %maskcond = icmp eq i32 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %1 = bitcast i8 addrspace(200)* addrspace(200)* %local_cap_ptr to i8 addrspace(200)* + call void @llvm.memcpy.p200i8.p200i8.i32(i8 addrspace(200)* align 1 %align1, i8 addrspace(200)* align 16 %1, i32 32, i1 false) must_preserve_cheri_tags + ret void +} + +define void @memmove_assume(i8 addrspace(200)* addrspace(200)* %local_cap_ptr, i8 addrspace(200)* %align1) addrspace(200) nounwind { +; CHECK-LABEL: memmove_assume: +; CHECK: # %bb.0: +; CHECK-NEXT: ly ca2, 0(ca0) +; CHECK-NEXT: ly ca3, 16(ca0) +; CHECK-NEXT: sy ca2, 0(ca1) +; CHECK-NEXT: ly ca2, 8(ca0) +; CHECK-NEXT: ly ca0, 24(ca0) +; CHECK-NEXT: sy ca2, 8(ca1) +; CHECK-NEXT: sy ca3, 16(ca1) +; CHECK-NEXT: sy ca0, 24(ca1) +; CHECK-NEXT: ret + %ptrint = ptrtoint i8 addrspace(200)* %align1 to i32 + %maskedptr = and i32 %ptrint, 15 + %maskcond = icmp eq i32 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %1 = bitcast i8 addrspace(200)* addrspace(200)* %local_cap_ptr to i8 addrspace(200)* + call void @llvm.memmove.p200i8.p200i8.i32(i8 addrspace(200)* align 1 %align1, i8 addrspace(200)* align 16 %1, i32 32, i1 false) must_preserve_cheri_tags + ret void +} + diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-preserve-tags-size-not-multiple.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-preserve-tags-size-not-multiple.ll new file mode 100644 index 0000000000000..a099b3540d2d5 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-preserve-tags-size-not-multiple.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-preserve-tags-size-not-multiple.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -o - -O0 -verify-machineinstrs %s | FileCheck %s -check-prefixes CHECK +; Check that we can inline memmove/memcpy despite having the must_preserve_cheri_tags property and the size not +; being a multiple of CAP_SIZE. Since the pointers are aligned we can start with capability copies and use +; word/byte copies for the trailing bytes. +declare void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* nocapture, i8 addrspace(200)* nocapture readonly, i64, i1) addrspace(200) +declare void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* nocapture, i8 addrspace(200)* nocapture readonly, i64, i1) addrspace(200) + +define void @test_string_memmove(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) addrspace(200) nounwind { + ; Note: has must_preserve_cheri_tags, but this memmove can still be inlined since it's aligned +; CHECK-LABEL: test_string_memmove: +; CHECK: # %bb.0: +; CHECK-NEXT: ymv ca7, ca1 +; CHECK-NEXT: ymv ca1, ca0 +; CHECK-NEXT: ly ca0, 0(ca7) +; CHECK-NEXT: ly ca2, 8(ca7) +; CHECK-NEXT: ly ca3, 16(ca7) +; CHECK-NEXT: ly ca4, 24(ca7) +; CHECK-NEXT: ly ca5, 32(ca7) +; CHECK-NEXT: lw a6, 40(ca7) +; CHECK-NEXT: lb a7, 44(ca7) +; CHECK-NEXT: sb a7, 44(ca1) +; CHECK-NEXT: sw a6, 40(ca1) +; CHECK-NEXT: sy ca5, 32(ca1) +; CHECK-NEXT: sy ca4, 24(ca1) +; CHECK-NEXT: sy ca3, 16(ca1) +; CHECK-NEXT: sy ca2, 8(ca1) +; CHECK-NEXT: sy ca0, 0(ca1) +; CHECK-NEXT: ret + call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* align 16 %dst, i8 addrspace(200)* align 16 %src, i64 45, i1 false) must_preserve_cheri_tags + ret void +} + +define void @test_string_memcpy(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) addrspace(200) nounwind { + ; Note: has must_preserve_cheri_tags, but this memcpy can still be inlined since it's aligned +; CHECK-LABEL: test_string_memcpy: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy ca1, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: ymv ca1, ca0 +; CHECK-NEXT: ly ca0, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: lb a2, 44(ca0) +; CHECK-NEXT: sb a2, 44(ca1) +; CHECK-NEXT: lw a2, 40(ca0) +; CHECK-NEXT: sw a2, 40(ca1) +; CHECK-NEXT: ly ca2, 32(ca0) +; CHECK-NEXT: sy ca2, 32(ca1) +; CHECK-NEXT: ly ca2, 24(ca0) +; CHECK-NEXT: sy ca2, 24(ca1) +; CHECK-NEXT: ly ca2, 16(ca0) +; CHECK-NEXT: sy ca2, 16(ca1) +; CHECK-NEXT: ly ca2, 8(ca0) +; CHECK-NEXT: sy ca2, 8(ca1) +; CHECK-NEXT: ly ca0, 0(ca0) +; CHECK-NEXT: sy ca0, 0(ca1) +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret + call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 16 %dst, i8 addrspace(200)* align 16 %src, i64 45, i1 false) must_preserve_cheri_tags + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-zeroinit.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-zeroinit.ll new file mode 100644 index 0000000000000..ebf61832d4243 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/memcpy-zeroinit.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-zeroinit.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f < %s -o - | FileCheck %s +; Check that the copy from the zeroinitializer global is turned into a series of zero stores +; or memset() as long as the memcpy is not volatile: + +%struct.umutex = type { i32, i32, [2 x i32], i8 addrspace(200)*, i32, [2 x i32] } + +@_thr_umutex_init.default_mtx = internal addrspace(200) constant %struct.umutex zeroinitializer, align 16 + +define void @_thr_umutex_init(%struct.umutex addrspace(200)* %mtx) local_unnamed_addr addrspace(200) nounwind "frame-pointer"="none" { +; CHECK-LABEL: _thr_umutex_init: +; CHECK: # %bb.0: +; CHECK-NEXT: sy cnull, 40(ca0) +; CHECK-NEXT: sy cnull, 32(ca0) +; CHECK-NEXT: sy cnull, 24(ca0) +; CHECK-NEXT: sy cnull, 16(ca0) +; CHECK-NEXT: sy cnull, 8(ca0) +; CHECK-NEXT: sy cnull, 0(ca0) +; CHECK-NEXT: ret + %1 = bitcast %struct.umutex addrspace(200)* %mtx to i8 addrspace(200)* + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 16 %1, i8 addrspace(200)* align 16 bitcast (%struct.umutex addrspace(200)* @_thr_umutex_init.default_mtx to i8 addrspace(200)*), i64 48, i1 false) + ret void +} + +define void @_thr_umutex_init_volatile(%struct.umutex addrspace(200)* %mtx) local_unnamed_addr addrspace(200) nounwind "frame-pointer"="none" { +; CHECK-LABEL: _thr_umutex_init_volatile: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB1_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(_thr_umutex_init.default_mtx) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB1_1)(ca1) +; CHECK-NEXT: ly ca2, 40(ca1) +; CHECK-NEXT: sy ca2, 40(ca0) +; CHECK-NEXT: ly ca2, 32(ca1) +; CHECK-NEXT: sy ca2, 32(ca0) +; CHECK-NEXT: ly ca2, 24(ca1) +; CHECK-NEXT: sy ca2, 24(ca0) +; CHECK-NEXT: ly ca2, 16(ca1) +; CHECK-NEXT: sy ca2, 16(ca0) +; CHECK-NEXT: ly ca2, 8(ca1) +; CHECK-NEXT: sy ca2, 8(ca0) +; CHECK-NEXT: ly ca1, 0(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret + %1 = bitcast %struct.umutex addrspace(200)* %mtx to i8 addrspace(200)* + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 16 %1, i8 addrspace(200)* align 16 bitcast (%struct.umutex addrspace(200)* @_thr_umutex_init.default_mtx to i8 addrspace(200)*), i64 48, i1 true) + ret void +} + +declare void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noalias nocapture writeonly %0, i8 addrspace(200)* noalias nocapture readonly %1, i64 %2, i1 immarg %3) addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/optsize-preserve-tags-memcpy-crash.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/optsize-preserve-tags-memcpy-crash.ll new file mode 100644 index 0000000000000..55f2cf8294a18 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/optsize-preserve-tags-memcpy-crash.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/optsize-preserve-tags-memcpy-crash.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f < %s -o - | FileCheck %s +; The following code copying 31 bytes (with capability alignment) using the +; must_preserve_tags attribute used to trigger a "(Align < CapSize)" assertion +; inside diagnoseInefficientCheriMemOp() when compiling with -Oz. +; This function should not be called since the reason we are falling back to memcpy +; is that the load/store limit is reached (and not the alignment). +; However, the code was checking for limit reached using a simple `(CapSize * Limit) < Size` +; check which fails here since the last 15 bytes need four (8 + 4 + 2 + 1 bytes) copies on +; architectures where LLVM does not emit misaligned loads/stores. + +define hidden void @optnone_preserve_tags_memcpy(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) optnone noinline nounwind { +; CHECK-LABEL: optnone_preserve_tags_memcpy: +; CHECK: # %bb.0: +; CHECK-NEXT: lb a2, 30(ca1) +; CHECK-NEXT: sb a2, 30(ca0) +; CHECK-NEXT: lh a2, 28(ca1) +; CHECK-NEXT: sh a2, 28(ca0) +; CHECK-NEXT: lw a2, 24(ca1) +; CHECK-NEXT: sw a2, 24(ca0) +; CHECK-NEXT: ly ca2, 16(ca1) +; CHECK-NEXT: sy ca2, 16(ca0) +; CHECK-NEXT: ly ca2, 8(ca1) +; CHECK-NEXT: sy ca2, 8(ca0) +; CHECK-NEXT: ly ca1, 0(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +define hidden void @optsize_preserve_tags_memcpy(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) optsize nounwind { +; CHECK-LABEL: optsize_preserve_tags_memcpy: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: li a2, 31 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: call memcpy +; CHECK-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +define hidden void @default_preserve_tags_memcpy(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) nounwind { +; CHECK-LABEL: default_preserve_tags_memcpy: +; CHECK: # %bb.0: +; CHECK-NEXT: lb a2, 30(ca1) +; CHECK-NEXT: sb a2, 30(ca0) +; CHECK-NEXT: lh a2, 28(ca1) +; CHECK-NEXT: sh a2, 28(ca0) +; CHECK-NEXT: lw a2, 24(ca1) +; CHECK-NEXT: sw a2, 24(ca0) +; CHECK-NEXT: ly ca2, 16(ca1) +; CHECK-NEXT: sy ca2, 16(ca0) +; CHECK-NEXT: ly ca2, 8(ca1) +; CHECK-NEXT: sy ca2, 8(ca0) +; CHECK-NEXT: ly ca1, 0(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +define hidden void @optnone_preserve_tags_memmove(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) optnone noinline nounwind { +; CHECK-LABEL: optnone_preserve_tags_memmove: +; CHECK: # %bb.0: +; CHECK-NEXT: ly ca2, 0(ca1) +; CHECK-NEXT: ly ca3, 8(ca1) +; CHECK-NEXT: ly ca4, 16(ca1) +; CHECK-NEXT: lw a5, 24(ca1) +; CHECK-NEXT: lh a6, 28(ca1) +; CHECK-NEXT: lb a1, 30(ca1) +; CHECK-NEXT: sb a1, 30(ca0) +; CHECK-NEXT: sh a6, 28(ca0) +; CHECK-NEXT: sw a5, 24(ca0) +; CHECK-NEXT: sy ca4, 16(ca0) +; CHECK-NEXT: sy ca3, 8(ca0) +; CHECK-NEXT: sy ca2, 0(ca0) +; CHECK-NEXT: ret + tail call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +define hidden void @optsize_preserve_tags_memmove(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) optsize nounwind { +; CHECK-LABEL: optsize_preserve_tags_memmove: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: li a2, 31 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: call memmove +; CHECK-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret + tail call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +define hidden void @default_preserve_tags_memmove(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) nounwind{ +; CHECK-LABEL: default_preserve_tags_memmove: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a2, 24(ca1) +; CHECK-NEXT: ly ca3, 0(ca1) +; CHECK-NEXT: lh a4, 28(ca1) +; CHECK-NEXT: lb a5, 30(ca1) +; CHECK-NEXT: ly ca6, 16(ca1) +; CHECK-NEXT: sy ca3, 0(ca0) +; CHECK-NEXT: ly ca1, 8(ca1) +; CHECK-NEXT: sy ca1, 8(ca0) +; CHECK-NEXT: sy ca6, 16(ca0) +; CHECK-NEXT: sb a5, 30(ca0) +; CHECK-NEXT: sh a4, 28(ca0) +; CHECK-NEXT: sw a2, 24(ca0) +; CHECK-NEXT: ret + tail call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +declare void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noalias nocapture writeonly, i8 addrspace(200)* noalias nocapture readonly, i64, i1 immarg) addrspace(200) +declare void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* noalias nocapture writeonly, i8 addrspace(200)* noalias nocapture readonly, i64, i1 immarg) addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/ptradd-immediate.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/ptradd-immediate.ll new file mode 100644 index 0000000000000..93668af954a71 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/ptradd-immediate.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/ptradd-immediate.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f < %s | FileCheck %s --check-prefix=PURECAP +;; Hybrid baseline to compare against +; RUN: sed 's/addrspace(200)//g' %s | llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f | FileCheck %s --check-prefix=HYBRID + +;; If both offsets are known to be non-negative it is safe to commute them and +;; use an immediate load. +define i32 @nneg_nneg(ptr addrspace(200) %p, i16 %x) { +; PURECAP-LABEL: nneg_nneg: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slli a1, a1, 16 +; PURECAP-NEXT: srli a1, a1, 16 +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: lw a0, 4(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: nneg_nneg: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slli a1, a1, 16 +; HYBRID-NEXT: srli a1, a1, 16 +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: add a0, a0, a1 +; HYBRID-NEXT: lw a0, 4(a0) +; HYBRID-NEXT: ret + %x.ext = zext i16 %x to i64 + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 1, i64 %x.ext + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} + +;; If both offsets are known to be negative it is safe to commute them and use +;; an immediate load. +define i32 @neg_neg(ptr addrspace(200) %p, i16 %x) { +; PURECAP-LABEL: neg_neg: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ori a1, a1, 1 +; PURECAP-NEXT: slli a1, a1, 16 +; PURECAP-NEXT: srli a1, a1, 16 +; PURECAP-NEXT: neg a1, a1 +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: lw a0, -4(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: neg_neg: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ori a1, a1, 1 +; HYBRID-NEXT: slli a1, a1, 16 +; HYBRID-NEXT: srli a1, a1, 16 +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: sub a0, a0, a1 +; HYBRID-NEXT: lw a0, -4(a0) +; HYBRID-NEXT: ret + %x.ext = zext i16 %x to i64 + %x.pos = or i64 %x.ext, 1 + %x.neg = sub i64 0, %x.pos + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 -1, i64 %x.neg + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} + +;; If one offset is known to be non-negative and the other negative it is not in +;; general safe to commute them and use an immediate load. +define i32 @nneg_neg(ptr addrspace(200) %p, i16 %x) { +; PURECAP-LABEL: nneg_neg: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ori a1, a1, 1 +; PURECAP-NEXT: slli a1, a1, 16 +; PURECAP-NEXT: srli a1, a1, 16 +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: li a2, 4 +; PURECAP-NEXT: sub a2, a2, a1 +; PURECAP-NEXT: addy ca0, ca0, a2 +; PURECAP-NEXT: lw a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: nneg_neg: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ori a1, a1, 1 +; HYBRID-NEXT: slli a1, a1, 16 +; HYBRID-NEXT: srli a1, a1, 16 +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: sub a0, a0, a1 +; HYBRID-NEXT: lw a0, 4(a0) +; HYBRID-NEXT: ret + %x.ext = zext i16 %x to i64 + %x.pos = or i64 %x.ext, 1 + %x.neg = sub i64 0, %x.pos + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 1, i64 %x.neg + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} + +;; If one offset is known to be non-negative and the other negative it is not in +;; general safe to commute them and use an immediate load. +define i32 @neg_nneg(ptr addrspace(200) %p, i16 %x) { +; PURECAP-LABEL: neg_nneg: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slli a1, a1, 16 +; PURECAP-NEXT: srli a1, a1, 16 +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: addi a1, a1, -4 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: lw a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: neg_nneg: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slli a1, a1, 16 +; HYBRID-NEXT: srli a1, a1, 16 +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: add a0, a0, a1 +; HYBRID-NEXT: lw a0, -4(a0) +; HYBRID-NEXT: ret + %x.ext = zext i16 %x to i64 + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 -1, i64 %x.ext + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} + +;; If we do not know the sign of one offset it is not in general safe to +;; commute them and use an immediate load. +define i32 @nneg_unknown(ptr addrspace(200) %p, i64 %x) { +; PURECAP-LABEL: nneg_unknown: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: addi a1, a1, 4 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: lw a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: nneg_unknown: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: add a0, a0, a1 +; HYBRID-NEXT: lw a0, 4(a0) +; HYBRID-NEXT: ret + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 1, i64 %x + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} + +;; If we do not know the sign of one offset it is not in general safe to +;; commute them and use an immediate load. +define i32 @neg_unknown(ptr addrspace(200) %p, i64 %x) { +; PURECAP-LABEL: neg_unknown: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: addi a1, a1, -4 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: lw a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: neg_unknown: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: add a0, a0, a1 +; HYBRID-NEXT: lw a0, -4(a0) +; HYBRID-NEXT: ret + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 -1, i64 %x + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/ptrtoint.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/ptrtoint.ll new file mode 100644 index 0000000000000..d11f40541bb2d --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/ptrtoint.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/ptrtoint.ll +;; Check that we can correctly generate code for ptrtoint and perform simple folds +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f < %s | FileCheck %s --check-prefix=HYBRID + +define internal i32 @ptrtoint(i8 addrspace(200)* %cap) addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint: +; CHECK: # %bb.0: +; CHECK-NEXT: mv a0, a0 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a1, ca0 +; HYBRID-NEXT: neg a1, a1 +; HYBRID-NEXT: and a0, a0, a1 +; HYBRID-NEXT: ret + %ret = ptrtoint i8 addrspace(200)* %cap to i32 + ret i32 %ret +} + +define internal i32 @ptrtoint_plus_const(i8 addrspace(200)* %cap) addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint_plus_const: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, 2 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint_plus_const: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a1, ca0 +; HYBRID-NEXT: neg a1, a1 +; HYBRID-NEXT: and a0, a0, a1 +; HYBRID-NEXT: addi a0, a0, 2 +; HYBRID-NEXT: ret + %zero = ptrtoint i8 addrspace(200)* %cap to i32 + %ret = add i32 %zero, 2 + ret i32 %ret +} + +define internal i32 @ptrtoint_plus_var(i8 addrspace(200)* %cap, i32 %add) addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint_plus_var: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint_plus_var: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a2, ca0 +; HYBRID-NEXT: neg a2, a2 +; HYBRID-NEXT: and a0, a0, a2 +; HYBRID-NEXT: add a0, a0, a1 +; HYBRID-NEXT: ret + %zero = ptrtoint i8 addrspace(200)* %cap to i32 + %ret = add i32 %zero, %add + ret i32 %ret +} + +define internal i32 @ptrtoint_null() addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint_null: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a0, cnull +; HYBRID-NEXT: neg a0, a0 +; HYBRID-NEXT: and a0, zero, a0 +; HYBRID-NEXT: ret + %ret = ptrtoint i8 addrspace(200)* null to i32 + ret i32 %ret +} + +define internal i32 @ptrtoint_null_plus_const() addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint_null_plus_const: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 2 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint_null_plus_const: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a0, cnull +; HYBRID-NEXT: neg a0, a0 +; HYBRID-NEXT: and a0, zero, a0 +; HYBRID-NEXT: addi a0, a0, 2 +; HYBRID-NEXT: ret + %zero = ptrtoint i8 addrspace(200)* null to i32 + %ret = add i32 %zero, 2 + ret i32 %ret +} + +define internal i32 @ptrtoint_null_plus_var(i32 %add) addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint_null_plus_var: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, zero, a0 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint_null_plus_var: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a1, cnull +; HYBRID-NEXT: neg a1, a1 +; HYBRID-NEXT: and a1, zero, a1 +; HYBRID-NEXT: add a0, a1, a0 +; HYBRID-NEXT: ret + %zero = ptrtoint i8 addrspace(200)* null to i32 + %ret = add i32 %zero, %add + ret i32 %ret +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/purecap-jumptable.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/purecap-jumptable.ll new file mode 100644 index 0000000000000..0dbf613a64eba --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/purecap-jumptable.ll @@ -0,0 +1,184 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/purecap-jumptable.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f < %s -o - | FileCheck %s +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -relocation-model=static < %s -o - | FileCheck %s +; Check that we can generate jump tables for switch statements. +; TODO: this is currently not implemented for CHERI-RISC-V + +define void @below_threshold(i32 %in, i32 addrspace(200)* %out) nounwind { +; CHECK-LABEL: below_threshold: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: beq a0, a2, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: bne a0, a2, .LBB0_5 +; CHECK-NEXT: # %bb.2: # %bb1 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: j .LBB0_4 +; CHECK-NEXT: .LBB0_3: # %bb2 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: .LBB0_4: # %exit +; CHECK-NEXT: sw a0, 0(ca1) +; CHECK-NEXT: .LBB0_5: # %exit +; CHECK-NEXT: ret +entry: + switch i32 %in, label %exit [ + i32 1, label %bb1 + i32 2, label %bb2 + ] +bb1: + store i32 4, i32 addrspace(200)* %out + br label %exit +bb2: + store i32 3, i32 addrspace(200)* %out + br label %exit +exit: + ret void +} + +; For RISC-V the jump table threshold is set to 5 cases, but MIPS uses the default +; value of 4 (set in llvm/lib/CodeGen/TargetLoweringBase.cpp). +define void @above_threshold_mips(i32 %in, i32 addrspace(200)* %out) nounwind { +; CHECK-LABEL: above_threshold_mips: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: blt a2, a0, .LBB1_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: beq a0, a2, .LBB1_7 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: bne a0, a2, .LBB1_10 +; CHECK-NEXT: # %bb.3: # %bb2 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: j .LBB1_9 +; CHECK-NEXT: .LBB1_4: # %entry +; CHECK-NEXT: li a2, 3 +; CHECK-NEXT: beq a0, a2, .LBB1_8 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: li a2, 4 +; CHECK-NEXT: bne a0, a2, .LBB1_10 +; CHECK-NEXT: # %bb.6: # %bb4 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: j .LBB1_9 +; CHECK-NEXT: .LBB1_7: # %bb1 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: j .LBB1_9 +; CHECK-NEXT: .LBB1_8: # %bb3 +; CHECK-NEXT: li a0, 2 +; CHECK-NEXT: .LBB1_9: # %exit +; CHECK-NEXT: sw a0, 0(ca1) +; CHECK-NEXT: .LBB1_10: # %exit +; CHECK-NEXT: ret +entry: + switch i32 %in, label %exit [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + ] +bb1: + store i32 4, i32 addrspace(200)* %out + br label %exit +bb2: + store i32 3, i32 addrspace(200)* %out + br label %exit +bb3: + store i32 2, i32 addrspace(200)* %out + br label %exit +bb4: + store i32 1, i32 addrspace(200)* %out + br label %exit +exit: + ret void +} + +; UTC_ARGS: --disable +; UTC_ARGS: --enable + +define void @above_threshold_all(i32 %in, i32 addrspace(200)* %out) nounwind { +; CHECK-LABEL: above_threshold_all: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: li a2, 5 +; CHECK-NEXT: bltu a2, a0, .LBB2_9 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: .LBB2_10: # %entry +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca2, %pcrel_hi(.LJTI2_0) +; CHECK-NEXT: addiy ca2, ca2, %pcrel_lo(.LBB2_10) +; CHECK-NEXT: addy ca0, ca2, a0 +; CHECK-NEXT: lw a0, 0(ca0) +; CHECK-NEXT: .LBB2_11: # %entry +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca2, %pcrel_hi(.Labove_threshold_all$jump_table_base) +; CHECK-NEXT: addiy ca2, ca2, %pcrel_lo(.LBB2_11) +; CHECK-NEXT: addy ca0, ca2, a0 +; CHECK-NEXT: jr ca0 +; CHECK-NEXT: .LBB2_2: # %bb1 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_3: # %bb2 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_4: # %bb3 +; CHECK-NEXT: li a0, 2 +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_5: # %bb4 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_6: # %bb5 +; CHECK-NEXT: li a0, 100 +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_7: # %bb6 +; CHECK-NEXT: li a0, 200 +; CHECK-NEXT: .LBB2_8: +; CHECK-NEXT: sw a0, 0(ca1) +; CHECK-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: .LBB2_9: # %exit +; CHECK-NEXT: ret +entry: + switch i32 %in, label %exit [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + ] +bb1: + store i32 4, i32 addrspace(200)* %out + br label %exit +bb2: + store i32 3, i32 addrspace(200)* %out + br label %exit +bb3: + store i32 2, i32 addrspace(200)* %out + br label %exit +bb4: + store i32 1, i32 addrspace(200)* %out + br label %exit +bb5: + store i32 100, i32 addrspace(200)* %out + br label %exit +bb6: + store i32 200, i32 addrspace(200)* %out + br label %exit +exit: + ret void +} + +; UTC_ARGS: --disable +; CHECK-LABEL: .LJTI2_0: +; CHECK-NEXT: .word .LBB2_2-.Labove_threshold_all$jump_table_base +; CHECK-NEXT: .word .LBB2_3-.Labove_threshold_all$jump_table_base +; CHECK-NEXT: .word .LBB2_4-.Labove_threshold_all$jump_table_base +; CHECK-NEXT: .word .LBB2_5-.Labove_threshold_all$jump_table_base +; CHECK-NEXT: .word .LBB2_6-.Labove_threshold_all$jump_table_base +; CHECK-NEXT: .word .LBB2_7-.Labove_threshold_all$jump_table_base +; UTC_ARGS: --enable diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/setoffset-multiple-uses.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/setoffset-multiple-uses.ll new file mode 100644 index 0000000000000..09e195ec186db --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/setoffset-multiple-uses.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/setoffset-multiple-uses.ll +; RUN: opt -S -passes=instcombine -o - %s | FileCheck %s +; RUN: opt -S -passes=instcombine -o - %s | llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -O1 - -o - | %cheri_FileCheck %s --check-prefix ASM + +target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" +; Reduced test case for a crash in the new optimization to fold multiple setoffset calls (orignally found when compiling libunwind) + +declare i32 @check_fold(i32) addrspace(200) +declare void @check_fold_i8ptr(ptr addrspace(200)) addrspace(200) +declare i32 @llvm.cheri.cap.offset.get.i32(ptr addrspace(200)) addrspace(200) +declare ptr addrspace(200) @llvm.cheri.cap.offset.set.i32(ptr addrspace(200), i32) addrspace(200) + +define void @infer_values_from_null_set_offset() addrspace(200) nounwind { +; ASM-LABEL: infer_values_from_null_set_offset: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; ASM-NEXT: lui a0, 30 +; ASM-NEXT: addi a0, a0, 576 +; ASM-NEXT: call check_fold +; ASM-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define void @infer_values_from_null_set_offset +; CHECK-SAME: () addrspace(200) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[OFFSET_CHECK:%.*]] = call i32 @check_fold(i32 123456) +; CHECK-NEXT: ret void +; + %with_offset = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i32(ptr addrspace(200) null, i32 123456) + %offset = call i32 @llvm.cheri.cap.offset.get.i32(ptr addrspace(200) nonnull %with_offset) + %offset_check = call i32 @check_fold(i32 %offset) + ret void +} + +define void @multiple_uses_big_constant() addrspace(200) nounwind { +; ASM-LABEL: multiple_uses_big_constant: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; ASM-NEXT: sy cs0, 0(csp) # 8-byte Folded Spill +; ASM-NEXT: lui a0, 30 +; ASM-NEXT: addi a0, a0, 576 +; ASM-NEXT: addy cs0, cnull, a0 +; ASM-NEXT: ymv ca0, cs0 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: ymv ca0, cs0 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: ymv ca0, cs0 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; ASM-NEXT: ly cs0, 0(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define void @multiple_uses_big_constant +; CHECK-SAME: () addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i32 123456)) +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i32 123456)) +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i32 123456)) +; CHECK-NEXT: ret void +; + %with_offset = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i32(ptr addrspace(200) null, i32 123456) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + ret void +} + +; Here we should use an immediate cincoffset: +define void @multiple_uses_small_constant() addrspace(200) nounwind { +; ASM-LABEL: multiple_uses_small_constant: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; ASM-NEXT: addiy ca0, cnull, 123 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: addiy ca0, cnull, 123 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: addiy ca0, cnull, 123 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define void @multiple_uses_small_constant +; CHECK-SAME: () addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i32 123)) +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i32 123)) +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i32 123)) +; CHECK-NEXT: ret void +; + %with_offset = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i32(ptr addrspace(200) null, i32 123) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-dynamic-alloca.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-dynamic-alloca.ll new file mode 100644 index 0000000000000..75a9dab02ce05 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-dynamic-alloca.ll @@ -0,0 +1,311 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/stack-bounds-dynamic-alloca.ll +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -cheri-bound-allocas -o - -S %s | FileCheck %s +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -O0 %s -o - | FileCheck %s -check-prefix ASM +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -O2 %s -o - | FileCheck %s -check-prefix ASM-OPT + +; reduced C test case: +; __builtin_va_list a; +; char *b; +; void c() { +; while (__builtin_va_arg(a, char)) +; b = __builtin_alloca(sizeof(b)); +; d(b); +; } +target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" + +declare i32 @use_alloca(ptr addrspace(200)) local_unnamed_addr addrspace(200) + +define i32 @alloca_in_entry(i1 %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: alloca_in_entry: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -32 +; ASM-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; ASM-NEXT: mv a1, a0 +; ASM-NEXT: andi a0, a1, 1 +; ASM-NEXT: beqz a0, .LBB0_4 +; ASM-NEXT: j .LBB0_1 +; ASM-NEXT: .LBB0_1: # %do_alloca +; ASM-NEXT: j .LBB0_2 +; ASM-NEXT: .LBB0_2: # %use_alloca_no_bounds +; ASM-NEXT: li a0, 0 +; ASM-NEXT: sw a0, 12(csp) +; ASM-NEXT: li a0, 1234 +; ASM-NEXT: sw a0, 8(csp) +; ASM-NEXT: j .LBB0_3 +; ASM-NEXT: .LBB0_3: # %use_alloca_need_bounds +; ASM-NEXT: addiy ca0, csp, 0 +; ASM-NEXT: ybndsiw ca0, ca0, 16 +; ASM-NEXT: call use_alloca +; ASM-NEXT: j .LBB0_4 +; ASM-NEXT: .LBB0_4: # %exit +; ASM-NEXT: li a0, 123 +; ASM-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 32 +; ASM-NEXT: ret +; +; ASM-OPT-LABEL: alloca_in_entry: +; ASM-OPT: # %bb.0: # %entry +; ASM-OPT-NEXT: andi a0, a0, 1 +; ASM-OPT-NEXT: beqz a0, .LBB0_2 +; ASM-OPT-NEXT: # %bb.1: # %do_alloca +; ASM-OPT-NEXT: addiy csp, csp, -32 +; ASM-OPT-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; ASM-OPT-NEXT: sw zero, 12(csp) +; ASM-OPT-NEXT: li a0, 1234 +; ASM-OPT-NEXT: sw a0, 8(csp) +; ASM-OPT-NEXT: addiy ca0, csp, 0 +; ASM-OPT-NEXT: ybndsiw ca0, ca0, 16 +; ASM-OPT-NEXT: call use_alloca +; ASM-OPT-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; ASM-OPT-NEXT: addiy csp, csp, 32 +; ASM-OPT-NEXT: .LBB0_2: # %exit +; ASM-OPT-NEXT: li a0, 123 +; ASM-OPT-NEXT: ret +; CHECK-LABEL: define i32 @alloca_in_entry +; CHECK-SAME: (i1 [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [16 x i8], align 16, addrspace(200) +; CHECK-NEXT: br i1 [[ARG]], label [[DO_ALLOCA:%.*]], label [[EXIT:%.*]] +; CHECK: do_alloca: +; CHECK-NEXT: br label [[USE_ALLOCA_NO_BOUNDS:%.*]] +; CHECK: use_alloca_no_bounds: +; CHECK-NEXT: [[PTR_PLUS_ONE:%.*]] = getelementptr i64, ptr addrspace(200) [[ALLOCA]], i64 1 +; CHECK-NEXT: store i64 1234, ptr addrspace(200) [[PTR_PLUS_ONE]], align 8 +; CHECK-NEXT: br label [[USE_ALLOCA_NEED_BOUNDS:%.*]] +; CHECK: use_alloca_need_bounds: +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i32(ptr addrspace(200) [[ALLOCA]], i32 16) +; CHECK-NEXT: [[DOTSUB_LE:%.*]] = getelementptr inbounds [16 x i8], ptr addrspace(200) [[TMP0]], i64 0, i64 0 +; CHECK-NEXT: [[CALL:%.*]] = call signext i32 @use_alloca(ptr addrspace(200) [[DOTSUB_LE]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 123 +; +entry: + %alloca = alloca [16 x i8], align 16, addrspace(200) + br i1 %arg, label %do_alloca, label %exit + +do_alloca: ; preds = %entry + br label %use_alloca_no_bounds + +use_alloca_no_bounds: ; preds = %do_alloca + %ptr_plus_one = getelementptr i64, ptr addrspace(200) %alloca, i64 1 + store i64 1234, ptr addrspace(200) %ptr_plus_one, align 8 + br label %use_alloca_need_bounds + +use_alloca_need_bounds: ; preds = %use_alloca_no_bounds + %.sub.le = getelementptr inbounds [16 x i8], ptr addrspace(200) %alloca, i64 0, i64 0 + %call = call signext i32 @use_alloca(ptr addrspace(200) %.sub.le) + br label %exit + +exit: ; preds = %use_alloca_need_bounds, %entry + ret i32 123 +} + +define i32 @alloca_not_in_entry(i1 %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: alloca_not_in_entry: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -32 +; ASM-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; ASM-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; ASM-NEXT: addiy cs0, csp, 32 +; ASM-NEXT: mv a1, a0 +; ASM-NEXT: andi a0, a1, 1 +; ASM-NEXT: beqz a0, .LBB1_4 +; ASM-NEXT: j .LBB1_1 +; ASM-NEXT: .LBB1_1: # %do_alloca +; ASM-NEXT: ymv ca0, csp +; ASM-NEXT: mv a1, a0 +; ASM-NEXT: addi a1, a1, -16 +; ASM-NEXT: yaddrw ca1, ca0, a1 +; ASM-NEXT: li a0, 16 +; ASM-NEXT: ybndsrw ca0, ca1, a0 +; ASM-NEXT: sy ca0, -32(cs0) # 8-byte Folded Spill +; ASM-NEXT: ymv csp, ca1 +; ASM-NEXT: ybndsiw ca0, ca0, 16 +; ASM-NEXT: sy ca0, -24(cs0) # 8-byte Folded Spill +; ASM-NEXT: j .LBB1_2 +; ASM-NEXT: .LBB1_2: # %use_alloca_no_bounds +; ASM-NEXT: ly ca1, -32(cs0) # 8-byte Folded Reload +; ASM-NEXT: li a0, 0 +; ASM-NEXT: sw a0, 12(ca1) +; ASM-NEXT: li a0, 1234 +; ASM-NEXT: sw a0, 8(ca1) +; ASM-NEXT: j .LBB1_3 +; ASM-NEXT: .LBB1_3: # %use_alloca_need_bounds +; ASM-NEXT: ly ca0, -24(cs0) # 8-byte Folded Reload +; ASM-NEXT: call use_alloca +; ASM-NEXT: j .LBB1_4 +; ASM-NEXT: .LBB1_4: # %exit +; ASM-NEXT: li a0, 123 +; ASM-NEXT: addiy csp, cs0, -32 +; ASM-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; ASM-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 32 +; ASM-NEXT: ret +; +; ASM-OPT-LABEL: alloca_not_in_entry: +; ASM-OPT: # %bb.0: # %entry +; ASM-OPT-NEXT: andi a0, a0, 1 +; ASM-OPT-NEXT: beqz a0, .LBB1_2 +; ASM-OPT-NEXT: # %bb.1: # %do_alloca +; ASM-OPT-NEXT: addiy csp, csp, -16 +; ASM-OPT-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; ASM-OPT-NEXT: sy cs0, 0(csp) # 8-byte Folded Spill +; ASM-OPT-NEXT: addiy cs0, csp, 16 +; ASM-OPT-NEXT: addi a0, sp, -16 +; ASM-OPT-NEXT: yaddrw ca0, csp, a0 +; ASM-OPT-NEXT: li a1, 16 +; ASM-OPT-NEXT: ybndsrw ca1, ca0, a1 +; ASM-OPT-NEXT: ymv csp, ca0 +; ASM-OPT-NEXT: ybndsiw ca0, ca1, 16 +; ASM-OPT-NEXT: sw zero, 12(ca1) +; ASM-OPT-NEXT: li a2, 1234 +; ASM-OPT-NEXT: sw a2, 8(ca1) +; ASM-OPT-NEXT: call use_alloca +; ASM-OPT-NEXT: addiy csp, cs0, -16 +; ASM-OPT-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; ASM-OPT-NEXT: ly cs0, 0(csp) # 8-byte Folded Reload +; ASM-OPT-NEXT: addiy csp, csp, 16 +; ASM-OPT-NEXT: .LBB1_2: # %exit +; ASM-OPT-NEXT: li a0, 123 +; ASM-OPT-NEXT: ret +; CHECK-LABEL: define i32 @alloca_not_in_entry +; CHECK-SAME: (i1 [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[ARG]], label [[DO_ALLOCA:%.*]], label [[EXIT:%.*]] +; CHECK: do_alloca: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [16 x i8], align 16, addrspace(200) +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.dynamic.i32(ptr addrspace(200) [[ALLOCA]], i32 16) +; CHECK-NEXT: br label [[USE_ALLOCA_NO_BOUNDS:%.*]] +; CHECK: use_alloca_no_bounds: +; CHECK-NEXT: [[PTR_PLUS_ONE:%.*]] = getelementptr i64, ptr addrspace(200) [[ALLOCA]], i64 1 +; CHECK-NEXT: store i64 1234, ptr addrspace(200) [[PTR_PLUS_ONE]], align 8 +; CHECK-NEXT: br label [[USE_ALLOCA_NEED_BOUNDS:%.*]] +; CHECK: use_alloca_need_bounds: +; CHECK-NEXT: [[DOTSUB_LE:%.*]] = getelementptr inbounds [16 x i8], ptr addrspace(200) [[TMP0]], i64 0, i64 0 +; CHECK-NEXT: [[CALL:%.*]] = call signext i32 @use_alloca(ptr addrspace(200) [[DOTSUB_LE]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 123 +; +entry: + br i1 %arg, label %do_alloca, label %exit + +do_alloca: ; preds = %entry + %alloca = alloca [16 x i8], align 16, addrspace(200) + br label %use_alloca_no_bounds + +use_alloca_no_bounds: ; preds = %do_alloca + %ptr_plus_one = getelementptr i64, ptr addrspace(200) %alloca, i64 1 + store i64 1234, ptr addrspace(200) %ptr_plus_one, align 8 + br label %use_alloca_need_bounds + +use_alloca_need_bounds: ; preds = %use_alloca_no_bounds + %.sub.le = getelementptr inbounds [16 x i8], ptr addrspace(200) %alloca, i64 0, i64 0 + %call = call signext i32 @use_alloca(ptr addrspace(200) %.sub.le) + br label %exit + +exit: ; preds = %use_alloca_need_bounds, %entry + ret i32 123 +} + +; The original reduced test case from libc/gen/exec.c +; We can't use llvm.cheri.bounded.stack.cap.i64 here, since that only works for static allocas: +define i32 @crash_reproducer(i1 %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: crash_reproducer: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -32 +; ASM-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; ASM-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; ASM-NEXT: addiy cs0, csp, 32 +; ASM-NEXT: mv a1, a0 +; ASM-NEXT: andi a0, a1, 1 +; ASM-NEXT: beqz a0, .LBB2_2 +; ASM-NEXT: j .LBB2_1 +; ASM-NEXT: .LBB2_1: # %entry.while.end_crit_edge +; ASM-NEXT: .LBB2_2: # %while.body +; ASM-NEXT: ymv ca0, csp +; ASM-NEXT: mv a1, a0 +; ASM-NEXT: addi a1, a1, -16 +; ASM-NEXT: yaddrw ca1, ca0, a1 +; ASM-NEXT: li a0, 16 +; ASM-NEXT: ybndsrw ca0, ca1, a0 +; ASM-NEXT: ymv csp, ca1 +; ASM-NEXT: ybndsiw ca0, ca0, 16 +; ASM-NEXT: sy ca0, -24(cs0) # 8-byte Folded Spill +; ASM-NEXT: j .LBB2_3 +; ASM-NEXT: .LBB2_3: # %while.end.loopexit +; ASM-NEXT: ly ca0, -24(cs0) # 8-byte Folded Reload +; ASM-NEXT: sy ca0, -32(cs0) # 8-byte Folded Spill +; ASM-NEXT: j .LBB2_4 +; ASM-NEXT: .LBB2_4: # %while.end +; ASM-NEXT: ly ca0, -32(cs0) # 8-byte Folded Reload +; ASM-NEXT: call use_alloca +; ASM-NEXT: addi a0, a0, 1234 +; ASM-NEXT: addiy csp, cs0, -32 +; ASM-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; ASM-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 32 +; ASM-NEXT: ret +; +; ASM-OPT-LABEL: crash_reproducer: +; ASM-OPT: # %bb.0: # %entry +; ASM-OPT-NEXT: andi a0, a0, 1 +; ASM-OPT-NEXT: bnez a0, .LBB2_2 +; ASM-OPT-NEXT: # %bb.1: # %while.body +; ASM-OPT-NEXT: addiy csp, csp, -16 +; ASM-OPT-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; ASM-OPT-NEXT: sy cs0, 0(csp) # 8-byte Folded Spill +; ASM-OPT-NEXT: addiy cs0, csp, 16 +; ASM-OPT-NEXT: addi a0, sp, -16 +; ASM-OPT-NEXT: yaddrw ca0, csp, a0 +; ASM-OPT-NEXT: li a1, 16 +; ASM-OPT-NEXT: ybndsrw ca1, ca0, a1 +; ASM-OPT-NEXT: ymv csp, ca0 +; ASM-OPT-NEXT: ybndsiw ca0, ca1, 16 +; ASM-OPT-NEXT: call use_alloca +; ASM-OPT-NEXT: addi a0, a0, 1234 +; ASM-OPT-NEXT: addiy csp, cs0, -16 +; ASM-OPT-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; ASM-OPT-NEXT: ly cs0, 0(csp) # 8-byte Folded Reload +; ASM-OPT-NEXT: addiy csp, csp, 16 +; ASM-OPT-NEXT: ret +; ASM-OPT-NEXT: .LBB2_2: # %entry.while.end_crit_edge +; CHECK-LABEL: define i32 @crash_reproducer +; CHECK-SAME: (i1 [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[ARG]], label [[ENTRY_WHILE_END_CRIT_EDGE:%.*]], label [[WHILE_BODY:%.*]] +; CHECK: entry.while.end_crit_edge: +; CHECK-NEXT: unreachable +; CHECK: while.body: +; CHECK-NEXT: [[TMP0:%.*]] = alloca [16 x i8], align 16, addrspace(200) +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.dynamic.i32(ptr addrspace(200) [[TMP0]], i32 16) +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] +; CHECK: while.end.loopexit: +; CHECK-NEXT: [[DOTSUB_LE:%.*]] = getelementptr inbounds [16 x i8], ptr addrspace(200) [[TMP1]], i64 0, i64 0 +; CHECK-NEXT: br label [[WHILE_END:%.*]] +; CHECK: while.end: +; CHECK-NEXT: [[CALL:%.*]] = call signext i32 @use_alloca(ptr addrspace(200) [[DOTSUB_LE]]) +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[CALL]], 1234 +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + br i1 %arg, label %entry.while.end_crit_edge, label %while.body + +entry.while.end_crit_edge: ; preds = %entry + unreachable + +while.body: ; preds = %entry + %0 = alloca [16 x i8], align 16, addrspace(200) + br label %while.end.loopexit + +while.end.loopexit: ; preds = %while.body + %.sub.le = getelementptr inbounds [16 x i8], ptr addrspace(200) %0, i64 0, i64 0 + br label %while.end + +while.end: ; preds = %while.end.loopexit + %call = call signext i32 @use_alloca(ptr addrspace(200) %.sub.le) + %result = add i32 %call, 1234 + ret i32 %result +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-opaque-spill-too-early.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-opaque-spill-too-early.ll new file mode 100644 index 0000000000000..884134d110287 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-opaque-spill-too-early.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/stack-bounds-opaque-spill-too-early.ll +;; After merging to LLVM 15 the stack bounds pass the switch to opqaue pointers caused +;; miscompilations in the stack bounding pass (the unbounded value was used instead of +;; the bounded one due to the removal of the bitcast instructions). +; REQUIRES: asserts +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -cheri-bound-allocas -o - -S %s -debug-only=cheri-bound-allocas 2>%t.dbg| FileCheck %s +; RUN: FileCheck %s -input-file=%t.dbg -check-prefix DBG +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - | FileCheck %s -check-prefix ASM +target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" + +; DBG-LABEL: Checking function lazy_bind_args +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): %0 = load ptr addrspace(200), ptr addrspace(200) %cap, align 8 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=8, alloca size=8, current GEP offset=0 for ptr addrspace(200) +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for %0 = load ptr addrspace(200), ptr addrspace(200) %cap, align 8 +; DBG-NEXT: cheri-bound-allocas: -Adding stack bounds since it is passed to call: %call = call ptr addrspace(200) @cheribsdtest_dynamic_identity_cap(ptr addrspace(200) noundef nonnull %cap) +; DBG-NEXT: cheri-bound-allocas: Found alloca use that needs bounds: %call = call ptr addrspace(200) @cheribsdtest_dynamic_identity_cap(ptr addrspace(200) noundef nonnull %cap) +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): store ptr addrspace(200) %cap, ptr addrspace(200) %cap, align 8 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=8, alloca size=8, current GEP offset=0 for ptr addrspace(200) +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for store ptr addrspace(200) %cap, ptr addrspace(200) %cap, align 8 +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): store ptr addrspace(200) %cap, ptr addrspace(200) %cap, align 8 +; DBG-NEXT: cheri-bound-allocas: -Stack slot used as value and not pointer -> must set bounds +; DBG-NEXT: cheri-bound-allocas: Found alloca use that needs bounds: store ptr addrspace(200) %cap, ptr addrspace(200) %cap, align 8 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.start.p200(i64 8, ptr addrspace(200) nonnull %cap) +; DBG-NEXT: cheri-bound-allocas: lazy_bind_args: 2 of 5 users need bounds for %cap = alloca ptr addrspace(200), align 8, addrspace(200) +; DBG-NEXT: lazy_bind_args: setting bounds on stack alloca to 8 %cap = alloca ptr addrspace(200), align 8, addrspace(200) + +declare void @llvm.lifetime.start.p200(i64 immarg, ptr addrspace(200) nocapture) addrspace(200) + +declare ptr addrspace(200) @cheribsdtest_dynamic_identity_cap(ptr addrspace(200) noundef) addrspace(200) + +declare void @cheribsdtest_check_cap_eq(ptr addrspace(200) noundef, ptr addrspace(200) noundef) addrspace(200) + +define dso_local void @lazy_bind_args() addrspace(200) nounwind { +; ASM-LABEL: lazy_bind_args: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; ASM-NEXT: addiy ca0, csp, 0 +; ASM-NEXT: ybndsiw ca0, ca0, 8 +; ASM-NEXT: sy ca0, 0(csp) +; ASM-NEXT: call cheribsdtest_dynamic_identity_cap +; ASM-NEXT: ly ca1, 0(csp) +; ASM-NEXT: ymv ca2, ca0 +; ASM-NEXT: ymv ca0, ca1 +; ASM-NEXT: ymv ca1, ca2 +; ASM-NEXT: call cheribsdtest_check_cap_eq +; ASM-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define dso_local void @lazy_bind_args +; CHECK-SAME: () addrspace(200) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CAP:%.*]] = alloca ptr addrspace(200), align 8, addrspace(200) +; CHECK-NEXT: call void @llvm.lifetime.start.p200(i64 8, ptr addrspace(200) nonnull [[CAP]]) +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i32(ptr addrspace(200) [[CAP]], i32 8) +; CHECK-NEXT: store ptr addrspace(200) [[TMP0]], ptr addrspace(200) [[CAP]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i32(ptr addrspace(200) [[CAP]], i32 8) +; CHECK-NEXT: [[CALL:%.*]] = call ptr addrspace(200) @cheribsdtest_dynamic_identity_cap(ptr addrspace(200) noundef nonnull [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(200), ptr addrspace(200) [[CAP]], align 8 +; CHECK-NEXT: call void @cheribsdtest_check_cap_eq(ptr addrspace(200) noundef [[TMP2]], ptr addrspace(200) noundef [[CALL]]) +; CHECK-NEXT: ret void +; +entry: + %cap = alloca ptr addrspace(200), align 8, addrspace(200) + call void @llvm.lifetime.start.p200(i64 8, ptr addrspace(200) nonnull %cap) + store ptr addrspace(200) %cap, ptr addrspace(200) %cap, align 8 + %call = call ptr addrspace(200) @cheribsdtest_dynamic_identity_cap(ptr addrspace(200) noundef nonnull %cap) + %0 = load ptr addrspace(200), ptr addrspace(200) %cap, align 8 + call void @cheribsdtest_check_cap_eq(ptr addrspace(200) noundef %0, ptr addrspace(200) noundef %call) + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-pass-phi.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-pass-phi.ll new file mode 100644 index 0000000000000..5caa518699585 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-bounds-pass-phi.ll @@ -0,0 +1,169 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/stack-bounds-pass-phi.ll +; REQUIRES: asserts +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -cheri-bound-allocas %s -o - -S -cheri-stack-bounds=if-needed \ +; RUN: -cheri-stack-bounds-single-intrinsic-threshold=10 -debug-only=cheri-bound-allocas 2>%t.dbg | FileCheck %s +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -cheri-stack-bounds=if-needed -O2 -cheri-stack-bounds-single-intrinsic-threshold=10 < %s | %cheri_FileCheck %s -check-prefix ASM +; RUN: FileCheck %s -check-prefix DBG -input-file=%t.dbg +target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" + +declare void @foo(ptr addrspace(200)) addrspace(200) + +; Check that we don't attempt to insert stack bounds intrinisics before the PHI at the start of a basic block: +define void @test_phi(i1 %cond) addrspace(200) nounwind { +; ASM-LABEL: test_phi: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -32 +; ASM-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; ASM-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; ASM-NEXT: andi a0, a0, 1 +; ASM-NEXT: beqz a0, .LBB0_2 +; ASM-NEXT: # %bb.1: # %block1 +; ASM-NEXT: ymv ca0, cnull +; ASM-NEXT: li a1, 1 +; ASM-NEXT: sw a1, 12(csp) +; ASM-NEXT: li a1, 2 +; ASM-NEXT: sw a1, 8(csp) +; ASM-NEXT: li a1, 3 +; ASM-NEXT: sw a1, 4(csp) +; ASM-NEXT: addiy ca1, csp, 8 +; ASM-NEXT: j .LBB0_3 +; ASM-NEXT: .LBB0_2: # %block2 +; ASM-NEXT: li a0, 4 +; ASM-NEXT: sw a0, 12(csp) +; ASM-NEXT: li a0, 5 +; ASM-NEXT: sw a0, 8(csp) +; ASM-NEXT: li a0, 6 +; ASM-NEXT: sw a0, 4(csp) +; ASM-NEXT: addiy ca0, csp, 12 +; ASM-NEXT: ybndsiw ca0, ca0, 4 +; ASM-NEXT: addiy ca1, csp, 4 +; ASM-NEXT: .LBB0_3: # %phi_block +; ASM-NEXT: ybndsiw cs0, ca1, 4 +; ASM-NEXT: call foo +; ASM-NEXT: ymv ca0, cs0 +; ASM-NEXT: call foo +; ASM-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; ASM-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 32 +; ASM-NEXT: ret +; CHECK-LABEL: define void @test_phi +; CHECK-SAME: (i1 [[COND:%.*]]) addrspace(200) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: [[ALLOCA2:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: [[ALLOCA3:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: br i1 [[COND]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]] +; CHECK: block1: +; CHECK-NEXT: store i32 1, ptr addrspace(200) [[ALLOCA1]], align 4 +; CHECK-NEXT: store i32 2, ptr addrspace(200) [[ALLOCA2]], align 4 +; CHECK-NEXT: store i32 3, ptr addrspace(200) [[ALLOCA3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i32(ptr addrspace(200) [[ALLOCA2]], i32 4) +; CHECK-NEXT: br label [[PHI_BLOCK:%.*]] +; CHECK: block2: +; CHECK-NEXT: store i32 4, ptr addrspace(200) [[ALLOCA1]], align 4 +; CHECK-NEXT: store i32 5, ptr addrspace(200) [[ALLOCA2]], align 4 +; CHECK-NEXT: store i32 6, ptr addrspace(200) [[ALLOCA3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i32(ptr addrspace(200) [[ALLOCA1]], i32 4) +; CHECK-NEXT: [[TMP2:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i32(ptr addrspace(200) [[ALLOCA3]], i32 4) +; CHECK-NEXT: br label [[PHI_BLOCK]] +; CHECK: phi_block: +; CHECK-NEXT: [[VAL1:%.*]] = phi ptr addrspace(200) [ null, [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ] +; CHECK-NEXT: [[VAL2:%.*]] = phi ptr addrspace(200) [ [[TMP0]], [[BLOCK1]] ], [ [[TMP2]], [[BLOCK2]] ] +; CHECK-NEXT: call void @foo(ptr addrspace(200) [[VAL1]]) +; CHECK-NEXT: call void @foo(ptr addrspace(200) [[VAL2]]) +; CHECK-NEXT: ret void +; +entry: + %alloca1 = alloca i32, align 4, addrspace(200) + %alloca2 = alloca i32, align 4, addrspace(200) + %alloca3 = alloca i32, align 4, addrspace(200) + br i1 %cond, label %block1, label %block2 + +block1: + store i32 1, ptr addrspace(200) %alloca1, align 4 + store i32 2, ptr addrspace(200) %alloca2, align 4 + store i32 3, ptr addrspace(200) %alloca3, align 4 + br label %phi_block + +block2: + store i32 4, ptr addrspace(200) %alloca1, align 4 + store i32 5, ptr addrspace(200) %alloca2, align 4 + store i32 6, ptr addrspace(200) %alloca3, align 4 + br label %phi_block + +phi_block: + %val1 = phi ptr addrspace(200) [ null, %block1 ], [ %alloca1, %block2 ] + %val2 = phi ptr addrspace(200) [ %alloca2, %block1 ], [ %alloca3, %block2 ] + call void @foo(ptr addrspace(200) %val1) + call void @foo(ptr addrspace(200) %val2) + ret void +} + +; Check that we don't place all bounded allocas in the entry block, instead only do it in the predecessor +define void @test_only_created_in_predecessor_block(i1 %cond) addrspace(200) nounwind { +; ASM-LABEL: test_only_created_in_predecessor_block: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; ASM-NEXT: andi a0, a0, 1 +; ASM-NEXT: beqz a0, .LBB1_2 +; ASM-NEXT: # %bb.1: # %block1 +; ASM-NEXT: li a0, 1 +; ASM-NEXT: sw a0, 4(csp) +; ASM-NEXT: addiy ca0, csp, 4 +; ASM-NEXT: j .LBB1_3 +; ASM-NEXT: .LBB1_2: # %block2 +; ASM-NEXT: li a0, 5 +; ASM-NEXT: sw a0, 0(csp) +; ASM-NEXT: addiy ca0, csp, 0 +; ASM-NEXT: .LBB1_3: # %phi_block +; ASM-NEXT: ybndsiw ca0, ca0, 4 +; ASM-NEXT: call foo +; ASM-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define void @test_only_created_in_predecessor_block +; CHECK-SAME: (i1 [[COND:%.*]]) addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: [[ALLOCA2:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: br i1 [[COND]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]] +; CHECK: block1: +; CHECK-NEXT: store i32 1, ptr addrspace(200) [[ALLOCA1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i32(ptr addrspace(200) [[ALLOCA1]], i32 4) +; CHECK-NEXT: br label [[PHI_BLOCK:%.*]] +; CHECK: block2: +; CHECK-NEXT: store i32 5, ptr addrspace(200) [[ALLOCA2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i32(ptr addrspace(200) [[ALLOCA2]], i32 4) +; CHECK-NEXT: br label [[PHI_BLOCK]] +; CHECK: phi_block: +; CHECK-NEXT: [[VAL1:%.*]] = phi ptr addrspace(200) [ [[TMP0]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ] +; CHECK-NEXT: call void @foo(ptr addrspace(200) [[VAL1]]) +; CHECK-NEXT: ret void +; +entry: + %alloca1 = alloca i32, align 4, addrspace(200) + %alloca2 = alloca i32, align 4, addrspace(200) + br i1 %cond, label %block1, label %block2 + +block1: + store i32 1, ptr addrspace(200) %alloca1, align 4 + br label %phi_block + +block2: + store i32 5, ptr addrspace(200) %alloca2, align 4 + br label %phi_block + +phi_block: + %val1 = phi ptr addrspace(200) [ %alloca1, %block1 ], [ %alloca2, %block2 ] + call void @foo(ptr addrspace(200) %val1) + ret void +} + +; DBG: -Adding stack bounds since phi user needs bounds: call void @foo(ptr addrspace(200) %val1) +; DBG: test_phi: 1 of 3 users need bounds for %alloca1 = alloca i32, align 4, addrspace(200) +; DBG: -Adding stack bounds since phi user needs bounds: call void @foo(ptr addrspace(200) %val2) +; DBG: test_phi: 1 of 3 users need bounds for %alloca2 = alloca i32, align 4, addrspace(200) +; DBG: -Adding stack bounds since phi user needs bounds: call void @foo(ptr addrspace(200) %val2) +; DBG: test_phi: 1 of 3 users need bounds for %alloca3 = alloca i32, align 4, addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-spill-unnecessary.c.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-spill-unnecessary.c.ll new file mode 100644 index 0000000000000..1d1a113c71fec --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stack-spill-unnecessary.c.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/stack-spill-unnecessary.c.ll +; The new CheriBoundedStackPseudo instruction lets us pretend that the incoffset+csetbounds +; is a single trivially rematerizable instruction so it can freely move it around to avoid stack spills. +; Previously we were moving the allocation of the register that is only used later to the beginning of +; the function and saving+restoring it instead of materializing it just before + +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -O2 --cheri-stack-bounds-single-intrinsic-threshold=0 < %s | %cheri_FileCheck %s --check-prefixes=CHECK +; Always use a single intrinsic for the calls (should result in same codegen) +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -O2 --cheri-stack-bounds-single-intrinsic-threshold=0 < %s | %cheri_FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/addrspace(200)/addrspace(0)/g' %s | llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f | FileCheck --check-prefix HYBRID %s + + +declare void @foo() addrspace(200) +declare void @one_arg(i32 addrspace(200)*) addrspace(200) +declare void @multi_arg(i32 addrspace(200)* %start, i32 addrspace(200)* %end, i8 addrspace(200)* %buf) addrspace(200) + +define void @use_after_call() addrspace(200) nounwind { +; CHECK-LABEL: use_after_call: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -32 +; CHECK-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; CHECK-NEXT: addiy ca0, csp, 12 +; CHECK-NEXT: ybndsiw cs0, ca0, 4 +; CHECK-NEXT: li a0, 123 +; CHECK-NEXT: sw a0, 12(csp) +; CHECK-NEXT: call foo +; CHECK-NEXT: ymv ca0, cs0 +; CHECK-NEXT: call one_arg +; CHECK-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 32 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: use_after_call: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: li a0, 123 +; HYBRID-NEXT: sw a0, 8(sp) +; HYBRID-NEXT: call foo@plt +; HYBRID-NEXT: addi a0, sp, 8 +; HYBRID-NEXT: call one_arg@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %x = alloca i32, align 4, addrspace(200) + store i32 123, i32 addrspace(200)* %x, align 4 + call void @foo() + call void @one_arg(i32 addrspace(200)* %x) + ret void +} + +define void @use_after_call_no_store() addrspace(200) nounwind { +; CHECK-LABEL: use_after_call_no_store: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -32 +; CHECK-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs1, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: addiy ca0, csp, 4 +; CHECK-NEXT: ybndsiw cs0, ca0, 4 +; CHECK-NEXT: addiy ca0, csp, 0 +; CHECK-NEXT: ybndsiw cs1, ca0, 4 +; CHECK-NEXT: call foo +; CHECK-NEXT: ymv ca0, cs0 +; CHECK-NEXT: call one_arg +; CHECK-NEXT: ymv ca0, cs1 +; CHECK-NEXT: call one_arg +; CHECK-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs1, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 32 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: use_after_call_no_store: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: call foo@plt +; HYBRID-NEXT: addi a0, sp, 8 +; HYBRID-NEXT: call one_arg@plt +; HYBRID-NEXT: addi a0, sp, 4 +; HYBRID-NEXT: call one_arg@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %x = alloca i32, align 4, addrspace(200) + %y = alloca i32, align 4, addrspace(200) + call void @foo() + call void @one_arg(i32 addrspace(200)* %x) + call void @one_arg(i32 addrspace(200)* %y) + ret void +} + +define void @multi_use() addrspace(200) nounwind { +; CHECK-LABEL: multi_use: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -32 +; CHECK-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; CHECK-NEXT: sy cs1, 8(csp) # 8-byte Folded Spill +; CHECK-NEXT: addiy ca0, csp, 4 +; CHECK-NEXT: ybndsiw cs0, ca0, 4 +; CHECK-NEXT: addiy ca0, csp, 0 +; CHECK-NEXT: ybndsiw cs1, ca0, 4 +; CHECK-NEXT: call foo +; CHECK-NEXT: addiy ca1, cs1, 4 +; CHECK-NEXT: addiy ca2, cs1, 1 +; CHECK-NEXT: ymv ca0, cs1 +; CHECK-NEXT: call multi_arg +; CHECK-NEXT: ymv ca0, cs0 +; CHECK-NEXT: call one_arg +; CHECK-NEXT: ymv ca0, cs1 +; CHECK-NEXT: call one_arg +; CHECK-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; CHECK-NEXT: ly cs1, 8(csp) # 8-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 32 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: multi_use: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: call foo@plt +; HYBRID-NEXT: addi a1, sp, 8 +; HYBRID-NEXT: addi a2, sp, 5 +; HYBRID-NEXT: addi a0, sp, 4 +; HYBRID-NEXT: call multi_arg@plt +; HYBRID-NEXT: addi a0, sp, 8 +; HYBRID-NEXT: call one_arg@plt +; HYBRID-NEXT: addi a0, sp, 4 +; HYBRID-NEXT: call one_arg@plt +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %y = alloca i32, align 4, addrspace(200) + %x = alloca i32, align 4, addrspace(200) + call void @foo() + %x_plus0 = getelementptr inbounds i32, i32 addrspace(200)* %x, i32 0 + %x_plus1 = getelementptr i32, i32 addrspace(200)* %x, i32 1 + %x_i8 = bitcast i32 addrspace(200)* %x to i8 addrspace(200)* + %x_i8_plus_1 = getelementptr inbounds i8, i8 addrspace(200)* %x_i8, i32 1 + call void @multi_arg(i32 addrspace(200)* %x_plus0, i32 addrspace(200)* %x_plus1, i8 addrspace(200)* %x_i8_plus_1) + call void @one_arg(i32 addrspace(200)* %y) + call void @one_arg(i32 addrspace(200)* %x) + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stackframe-intrinsics.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stackframe-intrinsics.ll new file mode 100644 index 0000000000000..bb7228c8a871e --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/stackframe-intrinsics.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/stackframe-intrinsics.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - < %s | FileCheck %s --check-prefix=PURECAP +; RUN: sed 's/addrspace(200)/addrspace(0)/g' %s | llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f | FileCheck %s --check-prefix HYBRID +; Check that we can lower llvm.frameaddress/llvm.returnaddress + +; Capability-Inspection Instructions + +define dso_local ptr addrspace(200) @frameaddr() addrspace(200) nounwind { +; PURECAP-LABEL: frameaddr: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -32 +; PURECAP-NEXT: sy cra, 24(csp) # 8-byte Folded Spill +; PURECAP-NEXT: sy cs0, 16(csp) # 8-byte Folded Spill +; PURECAP-NEXT: addiy cs0, csp, 32 +; PURECAP-NEXT: sy cs0, -32(cs0) +; PURECAP-NEXT: ymv ca0, cs0 +; PURECAP-NEXT: ly cra, 24(csp) # 8-byte Folded Reload +; PURECAP-NEXT: ly cs0, 16(csp) # 8-byte Folded Reload +; PURECAP-NEXT: addiy csp, csp, 32 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: frameaddr: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; HYBRID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; HYBRID-NEXT: addi s0, sp, 16 +; HYBRID-NEXT: sw s0, -16(s0) +; HYBRID-NEXT: mv a0, s0 +; HYBRID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; HYBRID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret +entry: + %b = alloca ptr addrspace(200), align 16, addrspace(200) + %0 = call ptr addrspace(200) @llvm.frameaddress.p200(i32 0) + store ptr addrspace(200) %0, ptr addrspace(200) %b, align 16 + %1 = load ptr addrspace(200), ptr addrspace(200) %b, align 16 + ret ptr addrspace(200) %1 +} + +declare ptr addrspace(200) @llvm.frameaddress.p200(i32 immarg) addrspace(200) + +define dso_local ptr addrspace(200) @retaddr() addrspace(200) nounwind { +; PURECAP-LABEL: retaddr: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -16 +; PURECAP-NEXT: sy cra, 0(csp) +; PURECAP-NEXT: ymv ca0, cra +; PURECAP-NEXT: addiy csp, csp, 16 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: retaddr: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sw ra, 0(sp) +; HYBRID-NEXT: mv a0, ra +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret +entry: + %b = alloca ptr addrspace(200), align 16, addrspace(200) + %0 = call ptr addrspace(200) @llvm.returnaddress.p200(i32 0) + store ptr addrspace(200) %0, ptr addrspace(200) %b, align 16 + %1 = load ptr addrspace(200), ptr addrspace(200) %b, align 16 + ret ptr addrspace(200) %1 +} + +declare ptr addrspace(200) @llvm.returnaddress.p200(i32 immarg) addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/strcpy-to-memcpy-no-tags.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/strcpy-to-memcpy-no-tags.ll new file mode 100644 index 0000000000000..dfc4481ba634c --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/strcpy-to-memcpy-no-tags.ll @@ -0,0 +1,166 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/strcpy-to-memcpy-no-tags.ll +; Check that we can inline the loads/stores generated when simplifiying +; string libcalls to memcpy() (since it should be marked as non-tag-preserving). +; CHERI-GENERIC-UTC: llc +; Note: unlike other tests we do want to test attributes in this one. +; CHERI-GENERIC-UTC: opt --function-signature +; RUN: opt < %s -passes=instcombine -S | FileCheck %s --check-prefix=CHECK-IR +; RUN: opt < %s -passes=instcombine -S | llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f - -o - | FileCheck %s --check-prefix=CHECK-ASM +target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" + +@str = private unnamed_addr addrspace(200) constant [17 x i8] c"exactly 16 chars\00", align 4 + +declare ptr addrspace(200) @strcpy(ptr addrspace(200), ptr addrspace(200)) addrspace(200) +declare ptr addrspace(200) @stpcpy(ptr addrspace(200), ptr addrspace(200)) addrspace(200) +declare ptr addrspace(200) @strcat(ptr addrspace(200), ptr addrspace(200)) addrspace(200) +declare ptr addrspace(200) @strncpy(ptr addrspace(200), ptr addrspace(200), i64) addrspace(200) +declare ptr addrspace(200) @stpncpy(ptr addrspace(200), ptr addrspace(200), i64) addrspace(200) + +define void @test_strcpy_to_memcpy(ptr addrspace(200) align 4 %dst) addrspace(200) nounwind { +; CHECK-ASM-LABEL: test_strcpy_to_memcpy: +; CHECK-ASM: # %bb.0: # %entry +; CHECK-ASM-NEXT: sb zero, 16(ca0) +; CHECK-ASM-NEXT: lui a1, 472870 +; CHECK-ASM-NEXT: addi a1, a1, 360 +; CHECK-ASM-NEXT: sw a1, 12(ca0) +; CHECK-ASM-NEXT: lui a1, 406019 +; CHECK-ASM-NEXT: addi a1, a1, 1585 +; CHECK-ASM-NEXT: sw a1, 8(ca0) +; CHECK-ASM-NEXT: lui a1, 133015 +; CHECK-ASM-NEXT: addi a1, a1, -908 +; CHECK-ASM-NEXT: sw a1, 4(ca0) +; CHECK-ASM-NEXT: lui a1, 407064 +; CHECK-ASM-NEXT: addi a1, a1, -1947 +; CHECK-ASM-NEXT: sw a1, 0(ca0) +; CHECK-ASM-NEXT: ret +; CHECK-IR-LABEL: define void @test_strcpy_to_memcpy +; CHECK-IR-SAME: (ptr addrspace(200) align 4 [[DST:%.*]]) addrspace(200) #[[ATTR1:[0-9]+]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) noundef nonnull align 4 dereferenceable(17) [[DST]], ptr addrspace(200) noundef nonnull align 4 dereferenceable(17) @str, i32 17, i1 false) #[[ATTR4:[0-9]+]] +; CHECK-IR-NEXT: ret void +; +entry: + %call = call ptr addrspace(200) @strcpy(ptr addrspace(200) %dst, ptr addrspace(200) @str) + ret void +} + +define void @test_stpcpy_to_memcpy(ptr addrspace(200) align 4 %dst) addrspace(200) nounwind { +; CHECK-ASM-LABEL: test_stpcpy_to_memcpy: +; CHECK-ASM: # %bb.0: # %entry +; CHECK-ASM-NEXT: sb zero, 16(ca0) +; CHECK-ASM-NEXT: lui a1, 472870 +; CHECK-ASM-NEXT: addi a1, a1, 360 +; CHECK-ASM-NEXT: sw a1, 12(ca0) +; CHECK-ASM-NEXT: lui a1, 406019 +; CHECK-ASM-NEXT: addi a1, a1, 1585 +; CHECK-ASM-NEXT: sw a1, 8(ca0) +; CHECK-ASM-NEXT: lui a1, 133015 +; CHECK-ASM-NEXT: addi a1, a1, -908 +; CHECK-ASM-NEXT: sw a1, 4(ca0) +; CHECK-ASM-NEXT: lui a1, 407064 +; CHECK-ASM-NEXT: addi a1, a1, -1947 +; CHECK-ASM-NEXT: sw a1, 0(ca0) +; CHECK-ASM-NEXT: ret +; CHECK-IR-LABEL: define void @test_stpcpy_to_memcpy +; CHECK-IR-SAME: (ptr addrspace(200) align 4 [[DST:%.*]]) addrspace(200) #[[ATTR1]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) noundef nonnull align 4 dereferenceable(17) [[DST]], ptr addrspace(200) noundef nonnull align 4 dereferenceable(17) @str, i32 17, i1 false) #[[ATTR5:[0-9]+]] +; CHECK-IR-NEXT: ret void +; +entry: + %call = call ptr addrspace(200) @stpcpy(ptr addrspace(200) %dst, ptr addrspace(200) @str) + ret void +} + +define void @test_strcat_to_memcpy(ptr addrspace(200) align 4 %dst) addrspace(200) nounwind { +; CHECK-ASM-LABEL: test_strcat_to_memcpy: +; CHECK-ASM: # %bb.0: # %entry +; CHECK-ASM-NEXT: addiy csp, csp, -16 +; CHECK-ASM-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; CHECK-ASM-NEXT: sy cs0, 0(csp) # 8-byte Folded Spill +; CHECK-ASM-NEXT: ymv cs0, ca0 +; CHECK-ASM-NEXT: call strlen +; CHECK-ASM-NEXT: addy ca0, cs0, a0 +; CHECK-ASM-NEXT: .LBB2_1: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca1, %got_pcrel_hi(.Lstr) +; CHECK-ASM-NEXT: ly ca1, %pcrel_lo(.LBB2_1)(ca1) +; CHECK-ASM-NEXT: li a2, 17 +; CHECK-ASM-NEXT: call memcpy +; CHECK-ASM-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; CHECK-ASM-NEXT: ly cs0, 0(csp) # 8-byte Folded Reload +; CHECK-ASM-NEXT: addiy csp, csp, 16 +; CHECK-ASM-NEXT: ret +; CHECK-IR-LABEL: define void @test_strcat_to_memcpy +; CHECK-IR-SAME: (ptr addrspace(200) align 4 [[DST:%.*]]) addrspace(200) #[[ATTR1]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: [[STRLEN:%.*]] = call i32 @strlen(ptr addrspace(200) noundef nonnull dereferenceable(1) [[DST]]) +; CHECK-IR-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(200) [[DST]], i32 [[STRLEN]] +; CHECK-IR-NEXT: call void @llvm.memcpy.p200.p200.i32(ptr addrspace(200) noundef nonnull align 1 dereferenceable(17) [[ENDPTR]], ptr addrspace(200) noundef nonnull align 4 dereferenceable(17) @str, i32 17, i1 false) #[[ATTR5]] +; CHECK-IR-NEXT: ret void +; +entry: + %call = call ptr addrspace(200) @strcat(ptr addrspace(200) %dst, ptr addrspace(200) @str) + ret void +} + + +define void @test_strncpy_to_memcpy(ptr addrspace(200) align 4 %dst) addrspace(200) nounwind { +; CHECK-ASM-LABEL: test_strncpy_to_memcpy: +; CHECK-ASM: # %bb.0: # %entry +; CHECK-ASM-NEXT: addiy csp, csp, -16 +; CHECK-ASM-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; CHECK-ASM-NEXT: .LBB3_1: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca1, %got_pcrel_hi(.Lstr) +; CHECK-ASM-NEXT: ly ca1, %pcrel_lo(.LBB3_1)(ca1) +; CHECK-ASM-NEXT: li a2, 17 +; CHECK-ASM-NEXT: li a3, 0 +; CHECK-ASM-NEXT: call strncpy +; CHECK-ASM-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; CHECK-ASM-NEXT: addiy csp, csp, 16 +; CHECK-ASM-NEXT: ret +; CHECK-IR-LABEL: define void @test_strncpy_to_memcpy +; CHECK-IR-SAME: (ptr addrspace(200) align 4 [[DST:%.*]]) addrspace(200) #[[ATTR1]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: [[CALL:%.*]] = call ptr addrspace(200) @strncpy(ptr addrspace(200) [[DST]], ptr addrspace(200) nonnull @str, i64 17) #[[ATTR1]] +; CHECK-IR-NEXT: ret void +; +entry: + %call = call ptr addrspace(200) @strncpy(ptr addrspace(200) %dst, ptr addrspace(200) @str, i64 17) + ret void +} + +; Note: stpncpy is not handled by SimplifyLibcalls yet, so this should not be changed. +define void @test_stpncpy_to_memcpy(ptr addrspace(200) align 4 %dst) addrspace(200) nounwind { +; CHECK-ASM-LABEL: test_stpncpy_to_memcpy: +; CHECK-ASM: # %bb.0: # %entry +; CHECK-ASM-NEXT: addiy csp, csp, -16 +; CHECK-ASM-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; CHECK-ASM-NEXT: .LBB4_1: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca1, %got_pcrel_hi(.Lstr) +; CHECK-ASM-NEXT: ly ca1, %pcrel_lo(.LBB4_1)(ca1) +; CHECK-ASM-NEXT: li a2, 17 +; CHECK-ASM-NEXT: li a3, 0 +; CHECK-ASM-NEXT: call stpncpy +; CHECK-ASM-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; CHECK-ASM-NEXT: addiy csp, csp, 16 +; CHECK-ASM-NEXT: ret +; CHECK-IR-LABEL: define void @test_stpncpy_to_memcpy +; CHECK-IR-SAME: (ptr addrspace(200) align 4 [[DST:%.*]]) addrspace(200) #[[ATTR1]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: [[CALL:%.*]] = call ptr addrspace(200) @stpncpy(ptr addrspace(200) [[DST]], ptr addrspace(200) nonnull @str, i64 17) #[[ATTR1]] +; CHECK-IR-NEXT: ret void +; +entry: + %call = call ptr addrspace(200) @stpncpy(ptr addrspace(200) %dst, ptr addrspace(200) @str, i64 17) + ret void +} + +; UTC_ARGS: --disable +; CHECK-IR: attributes #[[ATTR1]] = { nounwind } +; The no_preserve_cheri_tags should be attribute 4/5 in all cases +; CHECK-IR: attributes #[[ATTR4]] = { no_preserve_cheri_tags nounwind } +; CHECK-IR: attributes #[[ATTR5]] = { no_preserve_cheri_tags } diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/subobject-bounds-redundant-setbounds.c.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/subobject-bounds-redundant-setbounds.c.ll new file mode 100644 index 0000000000000..05d33ed63b351 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/subobject-bounds-redundant-setbounds.c.ll @@ -0,0 +1,317 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/subobject-bounds-redundant-setbounds.c.ll +; REQUIRES: asserts +; RUN: rm -f %t.dbg-opt %t.dbg-llc +; RUN: opt -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -cheri-bound-allocas -debug-only=cheri-bound-allocas -S -o - %s 2>%t.dbg-opt | FileCheck %s +; RUN: FileCheck %s -input-file=%t.dbg-opt -check-prefix DBG +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f -debug-only=cheri-bound-allocas -o - %s 2>%t.dbg-llc | FileCheck %s -check-prefix ASM +; RUN: FileCheck %s -input-file=%t.dbg-llc -check-prefix DBG +target datalayout = "e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128-A200-P200-G200" + +; created from the following C source code (when compiled with subobject bounds): +; void use(void* arg); +; +;void use_inline(int* arg) { +; *arg = 2; +;} +; +;int stack_array() { +; int array[10]; +; use(array); +; return array[5]; +;} +; +;int stack_int() { +; int value = 1; +; use(&value); +; return value; +;} +; +;int stack_int_inlined() { +; int value = 1; +; use_inline(&value); +; return value; +;} + + +define void @use_inline(ptr addrspace(200) nocapture %arg) local_unnamed_addr addrspace(200) { +; ASM-LABEL: use_inline: +; ASM: # %bb.0: +; ASM-NEXT: li a1, 2 +; ASM-NEXT: sw a1, 0(ca0) +; ASM-NEXT: ret +; CHECK-LABEL: define void @use_inline +; CHECK-SAME: (ptr addrspace(200) nocapture [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: store i32 2, ptr addrspace(200) [[ARG]], align 4 +; CHECK-NEXT: ret void +; + store i32 2, ptr addrspace(200) %arg, align 4 + ret void +} + +define signext i32 @stack_array() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: stack_array: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -64 +; ASM-NEXT: sy cra, 56(csp) # 8-byte Folded Spill +; ASM-NEXT: sy cs0, 48(csp) # 8-byte Folded Spill +; ASM-NEXT: li a0, 40 +; ASM-NEXT: addiy ca1, csp, 8 +; ASM-NEXT: ybndsrw cs0, ca1, a0 +; ASM-NEXT: ymv ca0, cs0 +; ASM-NEXT: call use +; ASM-NEXT: lw a0, 20(cs0) +; ASM-NEXT: ly cra, 56(csp) # 8-byte Folded Reload +; ASM-NEXT: ly cs0, 48(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 64 +; ASM-NEXT: ret +; CHECK-LABEL: define signext i32 @stack_array +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [10 x i32], align 4, addrspace(200) +; CHECK-NEXT: call void @llvm.lifetime.start.p200(i64 40, ptr addrspace(200) nonnull [[ARRAY]]) +; CHECK-NEXT: [[BOUNDED:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull [[ARRAY]], i32 40) +; CHECK-NEXT: call void @use(ptr addrspace(200) [[BOUNDED]]) +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(200) [[BOUNDED]], i64 20 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(200) [[ARRAYIDX]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p200(i64 40, ptr addrspace(200) nonnull [[ARRAY]]) +; CHECK-NEXT: ret i32 [[LD]] +; + %array = alloca [10 x i32], align 4, addrspace(200) + call void @llvm.lifetime.start.p200(i64 40, ptr addrspace(200) nonnull %array) + %bounded = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %array, i32 40) + call void @use(ptr addrspace(200) %bounded) + %arrayidx = getelementptr inbounds i8, ptr addrspace(200) %bounded, i64 20 + %ld = load i32, ptr addrspace(200) %arrayidx, align 4 + call void @llvm.lifetime.end.p200(i64 40, ptr addrspace(200) nonnull %array) + ret i32 %ld +} + +; DBG-LABEL: Checking function stack_array +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.end.p200(i64 40, ptr addrspace(200) nonnull %array) +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for use in setbounds with smaller or equal size: original size=40, setbounds size=40 current offset=0: %bounded = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %array, i32 40) +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.start.p200(i64 40, ptr addrspace(200) nonnull %array) +; DBG-NEXT: cheri-bound-allocas: stack_array: 0 of 3 users need bounds for %array = alloca [10 x i32], align 4, addrspace(200) +; DBG-NEXT: cheri-bound-allocas: No need to set bounds on stack alloca %array = alloca [10 x i32], align 4, addrspace(200) +; DBG-EMPTY: + +declare void @llvm.lifetime.start.p200(i64 immarg, ptr addrspace(200) nocapture) addrspace(200) + +declare void @use(ptr addrspace(200)) local_unnamed_addr addrspace(200) + +declare ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200), i32) addrspace(200) + +declare void @llvm.lifetime.end.p200(i64 immarg, ptr addrspace(200) nocapture) addrspace(200) + +define signext i32 @stack_int() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: stack_int: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; ASM-NEXT: li a0, 1 +; ASM-NEXT: sw a0, 4(csp) +; ASM-NEXT: li a0, 4 +; ASM-NEXT: addiy ca1, csp, 4 +; ASM-NEXT: ybndsrw ca0, ca1, a0 +; ASM-NEXT: call use +; ASM-NEXT: lw a0, 4(csp) +; ASM-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define signext i32 @stack_int +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: [[VALUE:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull [[VALUE]]) +; CHECK-NEXT: store i32 1, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: [[BOUNDED:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull [[VALUE]], i32 4) +; CHECK-NEXT: call void @use(ptr addrspace(200) [[BOUNDED]]) +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull [[VALUE]]) +; CHECK-NEXT: ret i32 [[LD]] +; + %value = alloca i32, align 4, addrspace(200) + call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull %value) + store i32 1, ptr addrspace(200) %value, align 4 + %bounded = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %value, i32 4) + call void @use(ptr addrspace(200) %bounded) + %ld = load i32, ptr addrspace(200) %value, align 4 + call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull %value) + ret i32 %ld +} + +; DBG-LABEL: Checking function stack_int +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull %value) +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for use in setbounds with smaller or equal size: original size=4, setbounds size=4 current offset=0: %bounded = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %value, i32 4) +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): store i32 1, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for store i32 1, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull %value) +; DBG-NEXT: cheri-bound-allocas: stack_int: 0 of 5 users need bounds for %value = alloca i32, align 4, addrspace(200) +; DBG-NEXT: cheri-bound-allocas: No need to set bounds on stack alloca %value = alloca i32, align 4, addrspace(200) +; DBG-EMPTY: + +define signext i32 @stack_int_inlined() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: stack_int_inlined: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: li a0, 1 +; ASM-NEXT: sw a0, 12(csp) +; ASM-NEXT: li a0, 4 +; ASM-NEXT: addiy ca1, csp, 12 +; ASM-NEXT: ybndsrw ca0, ca1, a0 +; ASM-NEXT: li a1, 2 +; ASM-NEXT: sw a1, 0(ca0) +; ASM-NEXT: lw a0, 12(csp) +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define signext i32 @stack_int_inlined +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: [[VALUE:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull [[VALUE]]) +; CHECK-NEXT: store i32 1, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: [[ADDRESS_WITH_BOUNDS:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull [[VALUE]], i32 4) +; CHECK-NEXT: store i32 2, ptr addrspace(200) [[ADDRESS_WITH_BOUNDS]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull [[VALUE]]) +; CHECK-NEXT: ret i32 [[LD]] +; + %value = alloca i32, align 4, addrspace(200) + call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull %value) + store i32 1, ptr addrspace(200) %value, align 4 + %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %value, i32 4) + store i32 2, ptr addrspace(200) %address.with.bounds, align 4 + %ld = load i32, ptr addrspace(200) %value, align 4 + call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull %value) + ret i32 %ld +} + +; DBG-LABEL: Checking function stack_int_inlined +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull %value) +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for use in setbounds with smaller or equal size: original size=4, setbounds size=4 current offset=0: %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %value, i32 4) +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): store i32 1, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for store i32 1, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull %value) +; DBG-NEXT: cheri-bound-allocas: stack_int_inlined: 0 of 5 users need bounds for %value = alloca i32, align 4, addrspace(200) +; DBG-NEXT: cheri-bound-allocas: No need to set bounds on stack alloca %value = alloca i32, align 4, addrspace(200) +; DBG-EMPTY: + +define signext i32 @out_of_bounds_setbounds() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: out_of_bounds_setbounds: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: addiy ca0, csp, 12 +; ASM-NEXT: ybndsiw ca0, ca0, 4 +; ASM-NEXT: li a1, 5 +; ASM-NEXT: ybndsrw ca0, ca0, a1 +; ASM-NEXT: li a1, 2 +; ASM-NEXT: sw a1, 0(ca0) +; ASM-NEXT: lw a0, 12(csp) +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define signext i32 @out_of_bounds_setbounds +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: [[VALUE:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i32(ptr addrspace(200) [[VALUE]], i32 4) +; CHECK-NEXT: [[ADDRESS_WITH_BOUNDS:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull [[TMP1]], i32 5) +; CHECK-NEXT: store i32 2, ptr addrspace(200) [[ADDRESS_WITH_BOUNDS]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: ret i32 [[LD]] +; + %value = alloca i32, align 4, addrspace(200) + ; TOO big, cannot elide the setbonds: + %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %value, i32 5) + store i32 2, ptr addrspace(200) %address.with.bounds, align 4 + %ld = load i32, ptr addrspace(200) %value, align 4 + ret i32 %ld +} + +; DBG-NEXT: Checking function out_of_bounds_setbounds +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -out_of_bounds_setbounds: setbounds use offset OUT OF BOUNDS and will trap -> adding csetbounds: %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %value, i32 5) +; DBG-NEXT: cheri-bound-allocas: Found alloca use that needs bounds: %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %value, i32 5) +; DBG-NEXT: cheri-bound-allocas: out_of_bounds_setbounds: 1 of 2 users need bounds for %value = alloca i32, align 4, addrspace(200) +; DBG-NEXT: out_of_bounds_setbounds: setting bounds on stack alloca to 4 %value = alloca i32, align 4, addrspace(200) +; DBG-EMPTY: + +define signext i32 @setbounds_escapes() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: setbounds_escapes: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: sy cra, 8(csp) # 8-byte Folded Spill +; ASM-NEXT: li a0, 4 +; ASM-NEXT: addiy ca1, csp, 4 +; ASM-NEXT: ybndsrw ca0, ca1, a0 +; ASM-NEXT: li a1, 2 +; ASM-NEXT: sw a1, 0(ca0) +; ASM-NEXT: call use +; ASM-NEXT: lw a0, 4(csp) +; ASM-NEXT: ly cra, 8(csp) # 8-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define signext i32 @setbounds_escapes +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: [[VALUE:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: [[ADDRESS_WITH_BOUNDS:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull [[VALUE]], i32 4) +; CHECK-NEXT: store i32 2, ptr addrspace(200) [[ADDRESS_WITH_BOUNDS]], align 4 +; CHECK-NEXT: call void @use(ptr addrspace(200) [[ADDRESS_WITH_BOUNDS]]) +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: ret i32 [[LD]] +; + %value = alloca i32, align 4, addrspace(200) + ; Too big, cannot elide the setbonds: + %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %value, i32 4) + store i32 2, ptr addrspace(200) %address.with.bounds, align 4 + call void @use(ptr addrspace(200) %address.with.bounds) + %ld = load i32, ptr addrspace(200) %value, align 4 + ret i32 %ld +} + +; DBG-NEXT: Checking function setbounds_escapes +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for use in setbounds with smaller or equal size: original size=4, setbounds size=4 current offset=0: %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i32(ptr addrspace(200) nonnull %value, i32 4) +; DBG-NEXT: cheri-bound-allocas: setbounds_escapes: 0 of 2 users need bounds for %value = alloca i32, align 4, addrspace(200) +; DBG-NEXT: cheri-bound-allocas: No need to set bounds on stack alloca %value = alloca i32, align 4, addrspace(200) +; DBG-EMPTY: + +; llvm.assume() should not add bounds: +define void @assume_aligned() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: assume_aligned: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: li a0, 1 +; ASM-NEXT: sw a0, 12(csp) +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define void @assume_aligned +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca [4 x i8], align 4, addrspace(200) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(200) [[TMP1]], i64 4) ] +; CHECK-NEXT: store i32 1, ptr addrspace(200) [[TMP1]], align 4 +; CHECK-NEXT: ret void +; + %1 = alloca [4 x i8], align 4, addrspace(200) + call void @llvm.assume(i1 true) [ "align"([4 x i8] addrspace(200)* %1, i64 4) ] + store i32 1, ptr addrspace(200) %1 + ret void +} + +; DBG-NEXT: Checking function assume_aligned +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): store i32 1, ptr addrspace(200) %1, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for store i32 1, ptr addrspace(200) %1, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for assume: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(200) %1, i64 4) ] +; DBG-NEXT: cheri-bound-allocas: assume_aligned: 0 of 2 users need bounds for %1 = alloca [4 x i8], align 4, addrspace(200) +; DBG-NEXT: cheri-bound-allocas: No need to set bounds on stack alloca %1 = alloca [4 x i8], align 4, addrspace(200) +; DBG-EMPTY: + +declare void @llvm.assume(i1) addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/trunc-load.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/trunc-load.ll new file mode 100644 index 0000000000000..715b3d32f404e --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/trunc-load.ll @@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/trunc-load.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - < %s | FileCheck %s --check-prefix=PURECAP +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f -o - < %s | FileCheck %s --check-prefix=HYBRID + +define zeroext i16 @trunc_load_zext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_load_zext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lhu a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_load_zext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lhu a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = load i32, i32 addrspace(200)* %p + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +define signext i16 @trunc_load_sext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_load_sext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lh a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_load_sext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lh a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = load i32, i32 addrspace(200)* %p + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +define zeroext i16 @trunc_load_gep_zext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_load_gep_zext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lhu a0, 4(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_load_gep_zext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addiy ca0, ca0, 4 +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lhu a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = getelementptr i32, i32 addrspace(200)* %p, i32 1 + %2 = load i32, i32 addrspace(200)* %1 + %3 = trunc i32 %2 to i16 + ret i16 %3 +} + +define signext i16 @trunc_load_gep_sext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_load_gep_sext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lh a0, 4(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_load_gep_sext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addiy ca0, ca0, 4 +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lh a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = getelementptr i32, i32 addrspace(200)* %p, i32 1 + %2 = load i32, i32 addrspace(200)* %1 + %3 = trunc i32 %2 to i16 + ret i16 %3 +} + +define zeroext i16 @trunc_lshr_load_zext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_lshr_load_zext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lhu a0, 2(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_lshr_load_zext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addiy ca0, ca0, 2 +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lhu a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = load i32, i32 addrspace(200)* %p + %2 = lshr i32 %1, 16 + %3 = trunc i32 %2 to i16 + ret i16 %3 +} + +define signext i16 @trunc_lshr_load_sext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_lshr_load_sext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lh a0, 2(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_lshr_load_sext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addiy ca0, ca0, 2 +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lh a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = load i32, i32 addrspace(200)* %p + %2 = lshr i32 %1, 16 + %3 = trunc i32 %2 to i16 + ret i16 %3 +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/unaligned-loads-stores-hybrid.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/unaligned-loads-stores-hybrid.ll new file mode 100644 index 0000000000000..c9eaa11b153e6 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/unaligned-loads-stores-hybrid.ll @@ -0,0 +1,368 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/unaligned-loads-stores-hybrid.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi ilp32f -mattr=+y,+zyhybrid,+f %s -o - | FileCheck %s + +@a1 = global i64 0, align 1 +@a2 = global i64 0, align 2 +@a4 = global i64 0, align 4 +@a8 = global i64 0, align 8 + +define i64 @load_global_i64_align_1(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_1: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi0: +; CHECK-NEXT: auipc a0, %got_pcrel_hi(a1) +; CHECK-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi0)(a0) +; CHECK-NEXT: csrrc ca1, ddc, zero +; CHECK-NEXT: bnez a0, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca1, cnull +; CHECK-NEXT: j .LBB0_3 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: yaddrw ca1, ca1, a0 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a0, 0(ca1) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca1, 1 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a2, 0(ca2) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca1, 2 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a3, 0(ca3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca4, ca1, 3 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a4, 0(ca4) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: slli a2, a2, 8 +; CHECK-NEXT: or a0, a2, a0 +; CHECK-NEXT: slli a3, a3, 16 +; CHECK-NEXT: slli a4, a4, 24 +; CHECK-NEXT: or a3, a4, a3 +; CHECK-NEXT: or a0, a3, a0 +; CHECK-NEXT: addiy ca2, ca1, 4 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a2, 0(ca2) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca1, 5 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a3, 0(ca3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca4, ca1, 6 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a4, 0(ca4) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca1, ca1, 7 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a1, 0(ca1) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: slli a3, a3, 8 +; CHECK-NEXT: or a2, a3, a2 +; CHECK-NEXT: slli a4, a4, 16 +; CHECK-NEXT: slli a1, a1, 24 +; CHECK-NEXT: or a1, a1, a4 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* addrspacecast(i64* @a1 to i64 addrspace(200)*), align 1 + ret i64 %ret +} + +define i64 @load_global_i64_align_2(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_2: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi1: +; CHECK-NEXT: auipc a0, %got_pcrel_hi(a2) +; CHECK-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi1)(a0) +; CHECK-NEXT: csrrc ca1, ddc, zero +; CHECK-NEXT: bnez a0, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca0, cnull +; CHECK-NEXT: j .LBB1_3 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: yaddrw ca0, ca1, a0 +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lhu a1, 0(ca0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca0, 2 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lhu a2, 0(ca2) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca0, 4 +; CHECK-NEXT: addiy ca0, ca0, 6 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lhu a4, 0(ca0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lhu a3, 0(ca3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: slli a0, a2, 16 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: slli a1, a4, 16 +; CHECK-NEXT: or a1, a1, a3 +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* addrspacecast(i64* @a2 to i64 addrspace(200)*), align 2 + ret i64 %ret +} + +define i64 @load_global_i64_align_4(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_4: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi2: +; CHECK-NEXT: auipc a0, %got_pcrel_hi(a4) +; CHECK-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi2)(a0) +; CHECK-NEXT: csrrc ca1, ddc, zero +; CHECK-NEXT: bnez a0, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca1, cnull +; CHECK-NEXT: j .LBB2_3 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: yaddrw ca1, ca1, a0 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lw a0, 0(ca1) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca1, ca1, 4 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lw a1, 0(ca1) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* addrspacecast(i64* @a4 to i64 addrspace(200)*), align 4 + ret i64 %ret +} + +define i64 @load_global_i64_align_8(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_8: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi3: +; CHECK-NEXT: auipc a0, %got_pcrel_hi(a8) +; CHECK-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; CHECK-NEXT: csrrc ca1, ddc, zero +; CHECK-NEXT: bnez a0, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca1, cnull +; CHECK-NEXT: j .LBB3_3 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: yaddrw ca1, ca1, a0 +; CHECK-NEXT: .LBB3_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lw a0, 0(ca1) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca1, ca1, 4 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lw a1, 0(ca1) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* addrspacecast(i64* @a8 to i64 addrspace(200)*), align 8 + ret i64 %ret +} + +define void @store_global_i64_align_1(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_1: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi4: +; CHECK-NEXT: auipc a2, %got_pcrel_hi(a1) +; CHECK-NEXT: lw a2, %pcrel_lo(.Lpcrel_hi4)(a2) +; CHECK-NEXT: csrrc ca3, ddc, zero +; CHECK-NEXT: bnez a2, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca2, cnull +; CHECK-NEXT: j .LBB4_3 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: yaddrw ca2, ca3, a2 +; CHECK-NEXT: .LBB4_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca2, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca2, 7 +; CHECK-NEXT: srli a4, a1, 24 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca3, 0(a4) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca2, 6 +; CHECK-NEXT: srli a4, a1, 16 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca3, 0(a4) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca2, 5 +; CHECK-NEXT: srli a4, a1, 8 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca3, 0(a4) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca2, 4 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca3, 0(a1) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca1, ca2, 3 +; CHECK-NEXT: srli a3, a0, 24 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca1, 0(a3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca1, ca2, 2 +; CHECK-NEXT: srli a3, a0, 16 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca1, 0(a3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca1, ca2, 1 +; CHECK-NEXT: srli a0, a0, 8 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca1, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* addrspacecast(i64* @a1 to i64 addrspace(200)*), align 1 + ret void +} + +define void @store_global_i64_align_2(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_2: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi5: +; CHECK-NEXT: auipc a2, %got_pcrel_hi(a2) +; CHECK-NEXT: lw a2, %pcrel_lo(.Lpcrel_hi5)(a2) +; CHECK-NEXT: csrrc ca3, ddc, zero +; CHECK-NEXT: bnez a2, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca2, cnull +; CHECK-NEXT: j .LBB5_3 +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: yaddrw ca2, ca3, a2 +; CHECK-NEXT: .LBB5_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sh ca2, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca2, 6 +; CHECK-NEXT: srli a4, a1, 16 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sh ca3, 0(a4) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca2, 4 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sh ca3, 0(a1) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca1, ca2, 2 +; CHECK-NEXT: srli a0, a0, 16 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sh ca1, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* addrspacecast(i64* @a2 to i64 addrspace(200)*), align 2 + ret void +} + +define void @store_global_i64_align_4(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_4: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi6: +; CHECK-NEXT: auipc a2, %got_pcrel_hi(a4) +; CHECK-NEXT: lw a2, %pcrel_lo(.Lpcrel_hi6)(a2) +; CHECK-NEXT: csrrc ca3, ddc, zero +; CHECK-NEXT: bnez a2, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca2, cnull +; CHECK-NEXT: j .LBB6_3 +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: yaddrw ca2, ca3, a2 +; CHECK-NEXT: .LBB6_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sw ca2, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca0, ca2, 4 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sw ca0, 0(a1) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* addrspacecast(i64* @a4 to i64 addrspace(200)*), align 4 + ret void +} + +define void @store_global_i64_align_8(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_8: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi7: +; CHECK-NEXT: auipc a2, %got_pcrel_hi(a8) +; CHECK-NEXT: lw a2, %pcrel_lo(.Lpcrel_hi7)(a2) +; CHECK-NEXT: csrrc ca3, ddc, zero +; CHECK-NEXT: bnez a2, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca2, cnull +; CHECK-NEXT: j .LBB7_3 +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: yaddrw ca2, ca3, a2 +; CHECK-NEXT: .LBB7_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sw ca2, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca0, ca2, 4 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sw ca0, 0(a1) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* addrspacecast(i64* @a8 to i64 addrspace(200)*), align 8 + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/unaligned-loads-stores-purecap.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/unaligned-loads-stores-purecap.ll new file mode 100644 index 0000000000000..8e60faa7efa6b --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV32Y/unaligned-loads-stores-purecap.ll @@ -0,0 +1,152 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/unaligned-loads-stores-purecap.ll +; RUN: llc -mtriple=riscv32 --relocation-model=pic -target-abi il32pc64f -mattr=+y,+cap-mode,+f %s -o - | FileCheck %s + +@a1 = addrspace(200) global i64 0, align 1 +@a2 = addrspace(200) global i64 0, align 2 +@a4 = addrspace(200) global i64 0, align 4 +@a8 = addrspace(200) global i64 0, align 8 + +define i64 @load_global_i64_align_1(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_1: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB0_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(a1) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB0_1)(ca1) +; CHECK-NEXT: lbu a0, 1(ca1) +; CHECK-NEXT: lbu a2, 0(ca1) +; CHECK-NEXT: lbu a3, 2(ca1) +; CHECK-NEXT: lbu a4, 3(ca1) +; CHECK-NEXT: slli a0, a0, 8 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: slli a3, a3, 16 +; CHECK-NEXT: slli a4, a4, 24 +; CHECK-NEXT: or a3, a4, a3 +; CHECK-NEXT: or a0, a3, a0 +; CHECK-NEXT: lbu a2, 5(ca1) +; CHECK-NEXT: lbu a3, 4(ca1) +; CHECK-NEXT: lbu a4, 6(ca1) +; CHECK-NEXT: lbu a1, 7(ca1) +; CHECK-NEXT: slli a2, a2, 8 +; CHECK-NEXT: or a2, a2, a3 +; CHECK-NEXT: slli a4, a4, 16 +; CHECK-NEXT: slli a1, a1, 24 +; CHECK-NEXT: or a1, a1, a4 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* @a1, align 1 + ret i64 %ret +} + +define i64 @load_global_i64_align_2(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_2: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB1_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca0, %got_pcrel_hi(a2) +; CHECK-NEXT: ly ca0, %pcrel_lo(.LBB1_1)(ca0) +; CHECK-NEXT: lhu a1, 2(ca0) +; CHECK-NEXT: lhu a2, 0(ca0) +; CHECK-NEXT: lhu a3, 6(ca0) +; CHECK-NEXT: lhu a4, 4(ca0) +; CHECK-NEXT: slli a0, a1, 16 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: slli a1, a3, 16 +; CHECK-NEXT: or a1, a1, a4 +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* @a2, align 2 + ret i64 %ret +} + +define i64 @load_global_i64_align_4(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_4: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB2_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(a4) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB2_1)(ca1) +; CHECK-NEXT: lw a0, 0(ca1) +; CHECK-NEXT: lw a1, 4(ca1) +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* @a4, align 4 + ret i64 %ret +} + +define i64 @load_global_i64_align_8(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_8: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB3_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(a8) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB3_1)(ca1) +; CHECK-NEXT: lw a0, 0(ca1) +; CHECK-NEXT: lw a1, 4(ca1) +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* @a8, align 8 + ret i64 %ret +} + +define void @store_global_i64_align_1(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_1: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB4_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca2, %got_pcrel_hi(a1) +; CHECK-NEXT: ly ca2, %pcrel_lo(.LBB4_1)(ca2) +; CHECK-NEXT: sb a1, 4(ca2) +; CHECK-NEXT: sb a0, 0(ca2) +; CHECK-NEXT: srli a3, a1, 24 +; CHECK-NEXT: sb a3, 7(ca2) +; CHECK-NEXT: srli a3, a1, 16 +; CHECK-NEXT: sb a3, 6(ca2) +; CHECK-NEXT: srli a1, a1, 8 +; CHECK-NEXT: sb a1, 5(ca2) +; CHECK-NEXT: srli a1, a0, 24 +; CHECK-NEXT: sb a1, 3(ca2) +; CHECK-NEXT: srli a1, a0, 16 +; CHECK-NEXT: sb a1, 2(ca2) +; CHECK-NEXT: srli a0, a0, 8 +; CHECK-NEXT: sb a0, 1(ca2) +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* @a1, align 1 + ret void +} + +define void @store_global_i64_align_2(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_2: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB5_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca2, %got_pcrel_hi(a2) +; CHECK-NEXT: ly ca2, %pcrel_lo(.LBB5_1)(ca2) +; CHECK-NEXT: sh a1, 4(ca2) +; CHECK-NEXT: sh a0, 0(ca2) +; CHECK-NEXT: srli a1, a1, 16 +; CHECK-NEXT: sh a1, 6(ca2) +; CHECK-NEXT: srli a0, a0, 16 +; CHECK-NEXT: sh a0, 2(ca2) +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* @a2, align 2 + ret void +} + +define void @store_global_i64_align_4(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_4: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB6_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca2, %got_pcrel_hi(a4) +; CHECK-NEXT: ly ca2, %pcrel_lo(.LBB6_1)(ca2) +; CHECK-NEXT: sw a1, 4(ca2) +; CHECK-NEXT: sw a0, 0(ca2) +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* @a4, align 4 + ret void +} + +define void @store_global_i64_align_8(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_8: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB7_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca2, %got_pcrel_hi(a8) +; CHECK-NEXT: ly ca2, %pcrel_lo(.LBB7_1)(ca2) +; CHECK-NEXT: sw a1, 4(ca2) +; CHECK-NEXT: sw a0, 0(ca2) +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* @a8, align 8 + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/atomic-rmw-cap-ptr-arg.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/atomic-rmw-cap-ptr-arg.ll new file mode 100644 index 0000000000000..665bf53c57cc7 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/atomic-rmw-cap-ptr-arg.ll @@ -0,0 +1,1043 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/atomic-rmw-cap-ptr-arg.ll +; Check that we can generate sensible code for atomic operations using capability pointers on capabilities +; See https://github.com/CTSRD-CHERI/llvm-project/issues/470 +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -mattr=+a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -mattr=-a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-LIBCALLS --allow-unused-prefixes +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d -mattr=+a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d -mattr=-a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-LIBCALLS --allow-unused-prefixes + +define i32 addrspace(200)* @atomic_cap_ptr_xchg_sc(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_sc: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y.aqrl ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_sc: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_sc: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_xchg_relaxed(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_relaxed: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_relaxed: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 0 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_relaxed: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 0 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val monotonic + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_xchg_acquire(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_acquire: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y.aq ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_acquire: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_acquire: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 2 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val acquire + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_xchg_rel(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_rel: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y.rl ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_rel: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 3 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_rel: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 3 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val release + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_xchg_acq_rel(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_acq_rel: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y.aqrl ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_acq_rel: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 4 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_acq_rel: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 4 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val acq_rel + ret i32 addrspace(200)* %tmp +} + +; Also check non-i8* xchg: +define i32 addrspace(200)* @atomic_cap_ptr_xchg_i32ptr(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg_i32ptr: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.y.aqrl ca0, ca1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg_i32ptr: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 4 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg_i32ptr: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 4 +; HYBRID-NEXT: call __atomic_exchange_cap_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val acq_rel + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_add(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_add: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: addy ca3, ca2, a1 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB6_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_add: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 16 +; PURECAP-LIBCALLS-NEXT: .LBB6_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: add a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB6_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_add: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -64 +; HYBRID-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 16(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .LBB6_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: add a0, a3, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 32(sp) +; HYBRID-NEXT: addi a1, sp, 32 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 32(sp) +; HYBRID-NEXT: beqz a0, .LBB6_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 64 +; HYBRID-NEXT: ret + %tmp = atomicrmw add i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_sub(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_sub: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: sub a3, a2, a1 +; PURECAP-ATOMICS-NEXT: yaddrw ca3, ca2, a3 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB7_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_sub: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 16 +; PURECAP-LIBCALLS-NEXT: .LBB7_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: sub a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB7_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_sub: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -64 +; HYBRID-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 16(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .LBB7_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: sub a0, a3, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 32(sp) +; HYBRID-NEXT: addi a1, sp, 32 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 32(sp) +; HYBRID-NEXT: beqz a0, .LBB7_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 64 +; HYBRID-NEXT: ret + %tmp = atomicrmw sub i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_and(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_and: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: and a3, a2, a1 +; PURECAP-ATOMICS-NEXT: yaddrw ca3, ca2, a3 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB8_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_and: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 16 +; PURECAP-LIBCALLS-NEXT: .LBB8_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: and a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB8_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_and: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -64 +; HYBRID-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 16(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .LBB8_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: and a0, a3, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 32(sp) +; HYBRID-NEXT: addi a1, sp, 32 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 32(sp) +; HYBRID-NEXT: beqz a0, .LBB8_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 64 +; HYBRID-NEXT: ret + %tmp = atomicrmw and i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_nand(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_nand: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: and a3, a2, a1 +; PURECAP-ATOMICS-NEXT: not a3, a3 +; PURECAP-ATOMICS-NEXT: yaddrw ca3, ca2, a3 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB9_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_nand: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 16 +; PURECAP-LIBCALLS-NEXT: .LBB9_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: and a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: not a0, a0 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB9_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_nand: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -64 +; HYBRID-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 16(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .LBB9_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: and a0, a3, a0 +; HYBRID-NEXT: not a0, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 32(sp) +; HYBRID-NEXT: addi a1, sp, 32 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 32(sp) +; HYBRID-NEXT: beqz a0, .LBB9_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 64 +; HYBRID-NEXT: ret + %tmp = atomicrmw nand i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_or(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_or: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: or a3, a2, a1 +; PURECAP-ATOMICS-NEXT: yaddrw ca3, ca2, a3 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB10_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_or: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 16 +; PURECAP-LIBCALLS-NEXT: .LBB10_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: or a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB10_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_or: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -64 +; HYBRID-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 16(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .LBB10_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: or a0, a3, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 32(sp) +; HYBRID-NEXT: addi a1, sp, 32 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 32(sp) +; HYBRID-NEXT: beqz a0, .LBB10_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 64 +; HYBRID-NEXT: ret + %tmp = atomicrmw or i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_xor(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xor: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: xor a3, a2, a1 +; PURECAP-ATOMICS-NEXT: yaddrw ca3, ca2, a3 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB11_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xor: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 16 +; PURECAP-LIBCALLS-NEXT: .LBB11_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: xor a0, a3, s1 +; PURECAP-LIBCALLS-NEXT: yaddrw ca2, ca3, a0 +; PURECAP-LIBCALLS-NEXT: sy ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB11_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xor: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -64 +; HYBRID-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 16(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .LBB11_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: xor a0, a3, a0 +; HYBRID-NEXT: yaddrw ca2, ca3, a0 +; HYBRID-NEXT: sy ca3, 32(sp) +; HYBRID-NEXT: addi a1, sp, 32 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 32(sp) +; HYBRID-NEXT: beqz a0, .LBB11_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 64 +; HYBRID-NEXT: ret + %tmp = atomicrmw xor i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_max(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_max: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: bge a3, a1, .LBB12_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB12_1 +; PURECAP-ATOMICS-NEXT: # %bb.4: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_max: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 16 +; PURECAP-LIBCALLS-NEXT: j .LBB12_2 +; PURECAP-LIBCALLS-NEXT: .LBB12_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB12_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sy ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB12_4 +; PURECAP-LIBCALLS-NEXT: .LBB12_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, ca3 +; PURECAP-LIBCALLS-NEXT: blt s1, a3, .LBB12_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB12_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, cs1 +; PURECAP-LIBCALLS-NEXT: j .LBB12_1 +; PURECAP-LIBCALLS-NEXT: .LBB12_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_max: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -64 +; HYBRID-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 16(sp) # 16-byte Folded Spill +; HYBRID-NEXT: j .LBB12_2 +; HYBRID-NEXT: .LBB12_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB12_2 Depth=1 +; HYBRID-NEXT: sy ca3, 32(sp) +; HYBRID-NEXT: addi a1, sp, 32 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 32(sp) +; HYBRID-NEXT: bnez a0, .LBB12_4 +; HYBRID-NEXT: .LBB12_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ymv ca2, ca3 +; HYBRID-NEXT: ly ca0, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: blt a0, a3, .LBB12_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB12_2 Depth=1 +; HYBRID-NEXT: ly ca2, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: j .LBB12_1 +; HYBRID-NEXT: .LBB12_4: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 64 +; HYBRID-NEXT: ret + %tmp = atomicrmw max i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_min(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_min: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: bge a1, a3, .LBB13_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB13_1 +; PURECAP-ATOMICS-NEXT: # %bb.4: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_min: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 16 +; PURECAP-LIBCALLS-NEXT: j .LBB13_2 +; PURECAP-LIBCALLS-NEXT: .LBB13_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB13_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sy ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB13_4 +; PURECAP-LIBCALLS-NEXT: .LBB13_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, ca3 +; PURECAP-LIBCALLS-NEXT: bge s1, a3, .LBB13_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB13_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, cs1 +; PURECAP-LIBCALLS-NEXT: j .LBB13_1 +; PURECAP-LIBCALLS-NEXT: .LBB13_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_min: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -64 +; HYBRID-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 16(sp) # 16-byte Folded Spill +; HYBRID-NEXT: j .LBB13_2 +; HYBRID-NEXT: .LBB13_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB13_2 Depth=1 +; HYBRID-NEXT: sy ca3, 32(sp) +; HYBRID-NEXT: addi a1, sp, 32 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 32(sp) +; HYBRID-NEXT: bnez a0, .LBB13_4 +; HYBRID-NEXT: .LBB13_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ymv ca2, ca3 +; HYBRID-NEXT: ly ca0, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: bge a0, a3, .LBB13_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB13_2 Depth=1 +; HYBRID-NEXT: ly ca2, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: j .LBB13_1 +; HYBRID-NEXT: .LBB13_4: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 64 +; HYBRID-NEXT: ret + %tmp = atomicrmw min i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_umax(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_umax: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: bgeu a3, a1, .LBB14_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: .LBB14_3: # in Loop: Header=BB14_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB14_1 +; PURECAP-ATOMICS-NEXT: # %bb.4: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_umax: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 16 +; PURECAP-LIBCALLS-NEXT: j .LBB14_2 +; PURECAP-LIBCALLS-NEXT: .LBB14_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB14_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sy ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB14_4 +; PURECAP-LIBCALLS-NEXT: .LBB14_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, ca3 +; PURECAP-LIBCALLS-NEXT: bltu s1, a3, .LBB14_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB14_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, cs1 +; PURECAP-LIBCALLS-NEXT: j .LBB14_1 +; PURECAP-LIBCALLS-NEXT: .LBB14_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_umax: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -64 +; HYBRID-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 16(sp) # 16-byte Folded Spill +; HYBRID-NEXT: j .LBB14_2 +; HYBRID-NEXT: .LBB14_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB14_2 Depth=1 +; HYBRID-NEXT: sy ca3, 32(sp) +; HYBRID-NEXT: addi a1, sp, 32 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 32(sp) +; HYBRID-NEXT: bnez a0, .LBB14_4 +; HYBRID-NEXT: .LBB14_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ymv ca2, ca3 +; HYBRID-NEXT: ly ca0, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: bltu a0, a3, .LBB14_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB14_2 Depth=1 +; HYBRID-NEXT: ly ca2, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: j .LBB14_1 +; HYBRID-NEXT: .LBB14_4: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 64 +; HYBRID-NEXT: ret + %tmp = atomicrmw umax i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} + +define i32 addrspace(200)* @atomic_cap_ptr_umin(i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_umin: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aqrl ca2, (ca0) +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: bgeu a1, a3, .LBB15_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; PURECAP-ATOMICS-NEXT: ymv ca3, ca2 +; PURECAP-ATOMICS-NEXT: .LBB15_3: # in Loop: Header=BB15_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aqrl a3, ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB15_1 +; PURECAP-ATOMICS-NEXT: # %bb.4: +; PURECAP-ATOMICS-NEXT: ymv ca0, ca2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_umin: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: ymv cs1, ca1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 16 +; PURECAP-LIBCALLS-NEXT: j .LBB15_2 +; PURECAP-LIBCALLS-NEXT: .LBB15_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB15_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sy ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca3, 0(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB15_4 +; PURECAP-LIBCALLS-NEXT: .LBB15_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, ca3 +; PURECAP-LIBCALLS-NEXT: bgeu s1, a3, .LBB15_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB15_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: ymv ca2, cs1 +; PURECAP-LIBCALLS-NEXT: j .LBB15_1 +; PURECAP-LIBCALLS-NEXT: .LBB15_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_umin: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -64 +; HYBRID-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ly ca3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: sy ca1, 16(sp) # 16-byte Folded Spill +; HYBRID-NEXT: j .LBB15_2 +; HYBRID-NEXT: .LBB15_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB15_2 Depth=1 +; HYBRID-NEXT: sy ca3, 32(sp) +; HYBRID-NEXT: addi a1, sp, 32 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca3, 32(sp) +; HYBRID-NEXT: bnez a0, .LBB15_4 +; HYBRID-NEXT: .LBB15_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ymv ca2, ca3 +; HYBRID-NEXT: ly ca0, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: bgeu a0, a3, .LBB15_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB15_2 Depth=1 +; HYBRID-NEXT: ly ca2, 16(sp) # 16-byte Folded Reload +; HYBRID-NEXT: j .LBB15_1 +; HYBRID-NEXT: .LBB15_4: # %atomicrmw.end +; HYBRID-NEXT: ymv ca0, ca3 +; HYBRID-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 64 +; HYBRID-NEXT: ret + %tmp = atomicrmw umin i32 addrspace(200)* addrspace(200)* %ptr, i32 addrspace(200)* %val seq_cst + ret i32 addrspace(200)* %tmp +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/atomic-rmw-cap-ptr.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/atomic-rmw-cap-ptr.ll new file mode 100644 index 0000000000000..dec1da0d61cfb --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/atomic-rmw-cap-ptr.ll @@ -0,0 +1,755 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/atomic-rmw-cap-ptr.ll +; Check that we can generate sensible code for atomic operations using capability pointers +; https://github.com/CTSRD-CHERI/llvm-project/issues/470 +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -mattr=+a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -mattr=-a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-LIBCALLS --allow-unused-prefixes +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d -mattr=+a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d -mattr=-a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-LIBCALLS --allow-unused-prefixes + +define i64 @atomic_cap_ptr_xchg(i64 addrspace(200)* %ptr, i64 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xchg: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoswap.d.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xchg: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_exchange_8 +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xchg: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_exchange_8_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xchg i64 addrspace(200)* %ptr, i64 %val seq_cst + ret i64 %tmp +} + +define i64 @atomic_cap_ptr_add(i64 addrspace(200)* %ptr, i64 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_add: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoadd.d.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_add: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_add_8 +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_add: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_add_8_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw add i64 addrspace(200)* %ptr, i64 %val seq_cst + ret i64 %tmp +} + +define i64 @atomic_cap_ptr_sub(i64 addrspace(200)* %ptr, i64 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_sub: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: neg a1, a1 +; PURECAP-ATOMICS-NEXT: amoadd.d.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_sub: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_sub_8 +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_sub: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_sub_8_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw sub i64 addrspace(200)* %ptr, i64 %val seq_cst + ret i64 %tmp +} + +define i64 @atomic_cap_ptr_and(i64 addrspace(200)* %ptr, i64 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_and: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoand.d.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_and: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_and_8 +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_and: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_and_8_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw and i64 addrspace(200)* %ptr, i64 %val seq_cst + ret i64 %tmp +} + +define i64 @atomic_cap_ptr_nand(i64 addrspace(200)* %ptr, i64 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_nand: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.d.aqrl a2, (ca0) +; PURECAP-ATOMICS-NEXT: and a3, a2, a1 +; PURECAP-ATOMICS-NEXT: not a3, a3 +; PURECAP-ATOMICS-NEXT: sc.d.rl a3, a3, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a3, .LBB4_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: +; PURECAP-ATOMICS-NEXT: mv a0, a2 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_nand: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_nand_8 +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_nand: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_nand_8_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw nand i64 addrspace(200)* %ptr, i64 %val seq_cst + ret i64 %tmp +} + +define i64 @atomic_cap_ptr_or(i64 addrspace(200)* %ptr, i64 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_or: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoor.d.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_or: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_or_8 +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_or: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_or_8_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw or i64 addrspace(200)* %ptr, i64 %val seq_cst + ret i64 %tmp +} + +define i64 @atomic_cap_ptr_xor(i64 addrspace(200)* %ptr, i64 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_xor: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amoxor.d.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_xor: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -16 +; PURECAP-LIBCALLS-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: li a2, 5 +; PURECAP-LIBCALLS-NEXT: call __atomic_fetch_xor_8 +; PURECAP-LIBCALLS-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 16 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_xor: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 5 +; HYBRID-NEXT: call __atomic_fetch_xor_8_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %tmp = atomicrmw xor i64 addrspace(200)* %ptr, i64 %val seq_cst + ret i64 %tmp +} + +define i64 @atomic_cap_ptr_max(i64 addrspace(200)* %ptr, i64 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_max: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amomax.d.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_max: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ld a3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: mv s1, a1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: j .LBB7_2 +; PURECAP-LIBCALLS-NEXT: .LBB7_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB7_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sd a3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_8 +; PURECAP-LIBCALLS-NEXT: ld a3, 8(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB7_4 +; PURECAP-LIBCALLS-NEXT: .LBB7_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, a3 +; PURECAP-LIBCALLS-NEXT: blt s1, a3, .LBB7_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB7_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, s1 +; PURECAP-LIBCALLS-NEXT: j .LBB7_1 +; PURECAP-LIBCALLS-NEXT: .LBB7_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: mv a0, a3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_max: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -48 +; HYBRID-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ld a3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: mv s0, a1 +; HYBRID-NEXT: j .LBB7_2 +; HYBRID-NEXT: .LBB7_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB7_2 Depth=1 +; HYBRID-NEXT: sd a3, 24(sp) +; HYBRID-NEXT: addi a1, sp, 24 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_8_c@plt +; HYBRID-NEXT: ld a3, 24(sp) +; HYBRID-NEXT: bnez a0, .LBB7_4 +; HYBRID-NEXT: .LBB7_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: mv a2, a3 +; HYBRID-NEXT: blt s0, a3, .LBB7_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB7_2 Depth=1 +; HYBRID-NEXT: mv a2, s0 +; HYBRID-NEXT: j .LBB7_1 +; HYBRID-NEXT: .LBB7_4: # %atomicrmw.end +; HYBRID-NEXT: mv a0, a3 +; HYBRID-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; HYBRID-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 48 +; HYBRID-NEXT: ret + %tmp = atomicrmw max i64 addrspace(200)* %ptr, i64 %val seq_cst + ret i64 %tmp +} + +define i64 @atomic_cap_ptr_min(i64 addrspace(200)* %ptr, i64 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_min: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amomin.d.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_min: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ld a3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: mv s1, a1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: j .LBB8_2 +; PURECAP-LIBCALLS-NEXT: .LBB8_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB8_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sd a3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_8 +; PURECAP-LIBCALLS-NEXT: ld a3, 8(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB8_4 +; PURECAP-LIBCALLS-NEXT: .LBB8_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, a3 +; PURECAP-LIBCALLS-NEXT: bge s1, a3, .LBB8_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB8_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, s1 +; PURECAP-LIBCALLS-NEXT: j .LBB8_1 +; PURECAP-LIBCALLS-NEXT: .LBB8_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: mv a0, a3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_min: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -48 +; HYBRID-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ld a3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: mv s0, a1 +; HYBRID-NEXT: j .LBB8_2 +; HYBRID-NEXT: .LBB8_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB8_2 Depth=1 +; HYBRID-NEXT: sd a3, 24(sp) +; HYBRID-NEXT: addi a1, sp, 24 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_8_c@plt +; HYBRID-NEXT: ld a3, 24(sp) +; HYBRID-NEXT: bnez a0, .LBB8_4 +; HYBRID-NEXT: .LBB8_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: mv a2, a3 +; HYBRID-NEXT: bge s0, a3, .LBB8_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB8_2 Depth=1 +; HYBRID-NEXT: mv a2, s0 +; HYBRID-NEXT: j .LBB8_1 +; HYBRID-NEXT: .LBB8_4: # %atomicrmw.end +; HYBRID-NEXT: mv a0, a3 +; HYBRID-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; HYBRID-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 48 +; HYBRID-NEXT: ret + %tmp = atomicrmw min i64 addrspace(200)* %ptr, i64 %val seq_cst + ret i64 %tmp +} + +define i64 @atomic_cap_ptr_umax(i64 addrspace(200)* %ptr, i64 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_umax: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amomaxu.d.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_umax: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ld a3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: mv s1, a1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: j .LBB9_2 +; PURECAP-LIBCALLS-NEXT: .LBB9_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB9_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sd a3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_8 +; PURECAP-LIBCALLS-NEXT: ld a3, 8(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB9_4 +; PURECAP-LIBCALLS-NEXT: .LBB9_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, a3 +; PURECAP-LIBCALLS-NEXT: bltu s1, a3, .LBB9_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB9_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, s1 +; PURECAP-LIBCALLS-NEXT: j .LBB9_1 +; PURECAP-LIBCALLS-NEXT: .LBB9_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: mv a0, a3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_umax: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -48 +; HYBRID-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ld a3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: mv s0, a1 +; HYBRID-NEXT: j .LBB9_2 +; HYBRID-NEXT: .LBB9_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB9_2 Depth=1 +; HYBRID-NEXT: sd a3, 24(sp) +; HYBRID-NEXT: addi a1, sp, 24 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_8_c@plt +; HYBRID-NEXT: ld a3, 24(sp) +; HYBRID-NEXT: bnez a0, .LBB9_4 +; HYBRID-NEXT: .LBB9_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: mv a2, a3 +; HYBRID-NEXT: bltu s0, a3, .LBB9_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB9_2 Depth=1 +; HYBRID-NEXT: mv a2, s0 +; HYBRID-NEXT: j .LBB9_1 +; HYBRID-NEXT: .LBB9_4: # %atomicrmw.end +; HYBRID-NEXT: mv a0, a3 +; HYBRID-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; HYBRID-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 48 +; HYBRID-NEXT: ret + %tmp = atomicrmw umax i64 addrspace(200)* %ptr, i64 %val seq_cst + ret i64 %tmp +} + +define i64 @atomic_cap_ptr_umin(i64 addrspace(200)* %ptr, i64 %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_umin: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: amominu.d.aqrl a0, a1, (ca0) +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_umin: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -80 +; PURECAP-LIBCALLS-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs2, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: ld a3, 0(ca0) +; PURECAP-LIBCALLS-NEXT: mv s1, a1 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs2, ca0, 8 +; PURECAP-LIBCALLS-NEXT: j .LBB10_2 +; PURECAP-LIBCALLS-NEXT: .LBB10_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB10_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: sd a3, 8(csp) +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_8 +; PURECAP-LIBCALLS-NEXT: ld a3, 8(csp) +; PURECAP-LIBCALLS-NEXT: bnez a0, .LBB10_4 +; PURECAP-LIBCALLS-NEXT: .LBB10_2: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, a3 +; PURECAP-LIBCALLS-NEXT: bgeu s1, a3, .LBB10_1 +; PURECAP-LIBCALLS-NEXT: # %bb.3: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # in Loop: Header=BB10_2 Depth=1 +; PURECAP-LIBCALLS-NEXT: mv a2, s1 +; PURECAP-LIBCALLS-NEXT: j .LBB10_1 +; PURECAP-LIBCALLS-NEXT: .LBB10_4: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: mv a0, a3 +; PURECAP-LIBCALLS-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs2, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 80 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_umin: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -48 +; HYBRID-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: ld a3, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: mv s0, a1 +; HYBRID-NEXT: j .LBB10_2 +; HYBRID-NEXT: .LBB10_1: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB10_2 Depth=1 +; HYBRID-NEXT: sd a3, 24(sp) +; HYBRID-NEXT: addi a1, sp, 24 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_8_c@plt +; HYBRID-NEXT: ld a3, 24(sp) +; HYBRID-NEXT: bnez a0, .LBB10_4 +; HYBRID-NEXT: .LBB10_2: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: mv a2, a3 +; HYBRID-NEXT: bgeu s0, a3, .LBB10_1 +; HYBRID-NEXT: # %bb.3: # %atomicrmw.start +; HYBRID-NEXT: # in Loop: Header=BB10_2 Depth=1 +; HYBRID-NEXT: mv a2, s0 +; HYBRID-NEXT: j .LBB10_1 +; HYBRID-NEXT: .LBB10_4: # %atomicrmw.end +; HYBRID-NEXT: mv a0, a3 +; HYBRID-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; HYBRID-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 48 +; HYBRID-NEXT: ret + %tmp = atomicrmw umin i64 addrspace(200)* %ptr, i64 %val seq_cst + ret i64 %tmp +} + +define float @atomic_cap_ptr_fadd(float addrspace(200)* %ptr, float %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_fadd: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: flw fa5, 0(ca0) +; PURECAP-ATOMICS-NEXT: .LBB11_1: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # =>This Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: # Child Loop BB11_3 Depth 2 +; PURECAP-ATOMICS-NEXT: fadd.s fa4, fa5, fa0 +; PURECAP-ATOMICS-NEXT: fmv.x.w a1, fa4 +; PURECAP-ATOMICS-NEXT: fmv.x.w a2, fa5 +; PURECAP-ATOMICS-NEXT: .LBB11_3: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # Parent Loop BB11_1 Depth=1 +; PURECAP-ATOMICS-NEXT: # => This Inner Loop Header: Depth=2 +; PURECAP-ATOMICS-NEXT: lr.w.aqrl a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a2, .LBB11_5 +; PURECAP-ATOMICS-NEXT: # %bb.4: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # in Loop: Header=BB11_3 Depth=2 +; PURECAP-ATOMICS-NEXT: sc.w.rl a4, a1, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB11_3 +; PURECAP-ATOMICS-NEXT: .LBB11_5: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # in Loop: Header=BB11_1 Depth=1 +; PURECAP-ATOMICS-NEXT: fmv.w.x fa5, a3 +; PURECAP-ATOMICS-NEXT: bne a3, a2, .LBB11_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-ATOMICS-NEXT: fmv.s fa0, fa5 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_fadd: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -64 +; PURECAP-LIBCALLS-NEXT: sy cra, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: fsd fs0, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: flw fa5, 0(ca0) +; PURECAP-LIBCALLS-NEXT: fmv.s fs0, fa0 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 4 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs1, ca0, 4 +; PURECAP-LIBCALLS-NEXT: .LBB11_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: fadd.s fa4, fa5, fs0 +; PURECAP-LIBCALLS-NEXT: fsw fa5, 4(csp) +; PURECAP-LIBCALLS-NEXT: fmv.x.w a2, fa4 +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs1 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: flw fa5, 4(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB11_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: fmv.s fa0, fa5 +; PURECAP-LIBCALLS-NEXT: ly cra, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: fld fs0, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 64 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_fadd: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -48 +; HYBRID-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; HYBRID-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lw a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: fmv.s fs0, fa0 +; HYBRID-NEXT: fmv.w.x fa0, a0 +; HYBRID-NEXT: .LBB11_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: fadd.s fa5, fa0, fs0 +; HYBRID-NEXT: fsw fa0, 28(sp) +; HYBRID-NEXT: fmv.x.w a2, fa5 +; HYBRID-NEXT: addi a1, sp, 28 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: flw fa0, 28(sp) +; HYBRID-NEXT: beqz a0, .LBB11_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; HYBRID-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 48 +; HYBRID-NEXT: ret + %tmp = atomicrmw fadd float addrspace(200)* %ptr, float %val seq_cst + ret float %tmp +} + +define float @atomic_cap_ptr_fsub(float addrspace(200)* %ptr, float %val) nounwind { +; PURECAP-ATOMICS-LABEL: atomic_cap_ptr_fsub: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: flw fa5, 0(ca0) +; PURECAP-ATOMICS-NEXT: .LBB12_1: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # =>This Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: # Child Loop BB12_3 Depth 2 +; PURECAP-ATOMICS-NEXT: fsub.s fa4, fa5, fa0 +; PURECAP-ATOMICS-NEXT: fmv.x.w a1, fa4 +; PURECAP-ATOMICS-NEXT: fmv.x.w a2, fa5 +; PURECAP-ATOMICS-NEXT: .LBB12_3: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # Parent Loop BB12_1 Depth=1 +; PURECAP-ATOMICS-NEXT: # => This Inner Loop Header: Depth=2 +; PURECAP-ATOMICS-NEXT: lr.w.aqrl a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a2, .LBB12_5 +; PURECAP-ATOMICS-NEXT: # %bb.4: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # in Loop: Header=BB12_3 Depth=2 +; PURECAP-ATOMICS-NEXT: sc.w.rl a4, a1, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB12_3 +; PURECAP-ATOMICS-NEXT: .LBB12_5: # %atomicrmw.start +; PURECAP-ATOMICS-NEXT: # in Loop: Header=BB12_1 Depth=1 +; PURECAP-ATOMICS-NEXT: fmv.w.x fa5, a3 +; PURECAP-ATOMICS-NEXT: bne a3, a2, .LBB12_1 +; PURECAP-ATOMICS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-ATOMICS-NEXT: fmv.s fa0, fa5 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: atomic_cap_ptr_fsub: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -64 +; PURECAP-LIBCALLS-NEXT: sy cra, 48(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs0, 32(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy cs1, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: fsd fs0, 8(csp) # 8-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: ymv cs0, ca0 +; PURECAP-LIBCALLS-NEXT: flw fa5, 0(ca0) +; PURECAP-LIBCALLS-NEXT: fmv.s fs0, fa0 +; PURECAP-LIBCALLS-NEXT: addiy ca0, csp, 4 +; PURECAP-LIBCALLS-NEXT: ybndsiw cs1, ca0, 4 +; PURECAP-LIBCALLS-NEXT: .LBB12_1: # %atomicrmw.start +; PURECAP-LIBCALLS-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-LIBCALLS-NEXT: fsub.s fa4, fa5, fs0 +; PURECAP-LIBCALLS-NEXT: fsw fa5, 4(csp) +; PURECAP-LIBCALLS-NEXT: fmv.x.w a2, fa4 +; PURECAP-LIBCALLS-NEXT: li a3, 5 +; PURECAP-LIBCALLS-NEXT: li a4, 5 +; PURECAP-LIBCALLS-NEXT: ymv ca0, cs0 +; PURECAP-LIBCALLS-NEXT: ymv ca1, cs1 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: flw fa5, 4(csp) +; PURECAP-LIBCALLS-NEXT: beqz a0, .LBB12_1 +; PURECAP-LIBCALLS-NEXT: # %bb.2: # %atomicrmw.end +; PURECAP-LIBCALLS-NEXT: fmv.s fa0, fa5 +; PURECAP-LIBCALLS-NEXT: ly cra, 48(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs0, 32(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: ly cs1, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: fld fs0, 8(csp) # 8-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 64 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: atomic_cap_ptr_fsub: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -48 +; HYBRID-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; HYBRID-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca0, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lw a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: fmv.s fs0, fa0 +; HYBRID-NEXT: fmv.w.x fa0, a0 +; HYBRID-NEXT: .LBB12_1: # %atomicrmw.start +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: fsub.s fa5, fa0, fs0 +; HYBRID-NEXT: fsw fa0, 28(sp) +; HYBRID-NEXT: fmv.x.w a2, fa5 +; HYBRID-NEXT: addi a1, sp, 28 +; HYBRID-NEXT: li a3, 5 +; HYBRID-NEXT: li a4, 5 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: flw fa0, 28(sp) +; HYBRID-NEXT: beqz a0, .LBB12_1 +; HYBRID-NEXT: # %bb.2: # %atomicrmw.end +; HYBRID-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; HYBRID-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 48 +; HYBRID-NEXT: ret + %tmp = atomicrmw fsub float addrspace(200)* %ptr, float %val seq_cst + ret float %tmp +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/bounded-allocas-lifetimes.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/bounded-allocas-lifetimes.ll new file mode 100644 index 0000000000000..8524c828b1164 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/bounded-allocas-lifetimes.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/bounded-allocas-lifetimes.ll +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; CHERI-GENERIC-UTC: mir +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - --stop-after=finalize-isel | FileCheck %s + +; Check that lifetime markers don't get lost due to CheriBoundAllocas, as we'd +; risk StackSlotColoring reusing the slot. + +declare void @use(i8 addrspace(200)*) + +define void @static_alloca() { + ; CHECK-LABEL: name: static_alloca + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: [[ADDIY:%[0-9]+]]:gpcr = ADDIY %stack.0, 0 + ; CHECK-NEXT: [[YBNDSIW:%[0-9]+]]:gpcr = YBNDSIW killed [[ADDIY]], 4 + ; CHECK-NEXT: LIFETIME_START %stack.0 + ; CHECK-NEXT: ADJCALLSTACKDOWNCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: $c10 = COPY [[YBNDSIW]] + ; CHECK-NEXT: PseudoCCALL target-flags(riscv-call) @use, csr_il32pc64d_l64pc128d, implicit-def dead $c1, implicit $c10, implicit-def $c2 + ; CHECK-NEXT: ADJCALLSTACKUPCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: LIFETIME_END %stack.0 + ; CHECK-NEXT: PseudoCRET + %1 = alloca i32, align 4, addrspace(200) + %2 = bitcast i32 addrspace(200)* %1 to i8 addrspace(200)* + call void @llvm.lifetime.start.p200i8(i64 4, i8 addrspace(200)* %2) + call void @use(i8 addrspace(200)* %2) + call void @llvm.lifetime.end.p200i8(i64 4, i8 addrspace(200)* %2) + ret void +} + +; LIFETIME_START/LIFETIME_END only apply to static allocas, so we can't verify +; that the analysis works correctly, but the IR is here for completeness. +define void @dynamic_alloca(i64 zeroext %n) { + ; CHECK-LABEL: name: dynamic_alloca + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 2 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = nuw ADDI [[SLLI]], 15 + ; CHECK-NEXT: [[ANDI:%[0-9]+]]:gpr = ANDI killed [[ADDI]], -16 + ; CHECK-NEXT: [[YAMASK:%[0-9]+]]:gpr = YAMASK [[ANDI]] + ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[YAMASK]], -1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[ANDI]], killed [[XORI]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:gpr = AND killed [[ADD]], [[YAMASK]] + ; CHECK-NEXT: ADJCALLSTACKDOWNCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpcr = COPY $c2 + ; CHECK-NEXT: [[PseudoCGetAddr:%[0-9]+]]:gpr = PseudoCGetAddr [[COPY1]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB killed [[PseudoCGetAddr]], [[AND]] + ; CHECK-NEXT: [[YAMASK1:%[0-9]+]]:gpr = YAMASK [[ANDI]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:gpr = AND killed [[SUB]], killed [[YAMASK1]] + ; CHECK-NEXT: [[YADDRW:%[0-9]+]]:gpcr = YADDRW [[COPY1]], killed [[AND1]] + ; CHECK-NEXT: [[YBNDSRW:%[0-9]+]]:gpcr = YBNDSRW [[YADDRW]], [[AND]] + ; CHECK-NEXT: $c2 = COPY [[YADDRW]] + ; CHECK-NEXT: ADJCALLSTACKUPCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: [[YBNDSRW1:%[0-9]+]]:gpcr = YBNDSRW killed [[YBNDSRW]], [[SLLI]] + ; CHECK-NEXT: ADJCALLSTACKDOWNCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: $c10 = COPY [[YBNDSRW1]] + ; CHECK-NEXT: PseudoCCALL target-flags(riscv-call) @use, csr_il32pc64d_l64pc128d, implicit-def dead $c1, implicit $c10, implicit-def $c2 + ; CHECK-NEXT: ADJCALLSTACKUPCAP 0, 0, implicit-def dead $c2, implicit $c2 + ; CHECK-NEXT: PseudoCRET + %1 = alloca i32, i64 %n, align 4, addrspace(200) + %2 = bitcast i32 addrspace(200)* %1 to i8 addrspace(200)* + call void @llvm.lifetime.start.p200i8(i64 -1, i8 addrspace(200)* %2) + call void @use(i8 addrspace(200)* %2) + call void @llvm.lifetime.end.p200i8(i64 -1, i8 addrspace(200)* %2) + ret void +} + +declare void @llvm.lifetime.start.p200i8(i64, i8 addrspace(200)*) +declare void @llvm.lifetime.end.p200i8(i64, i8 addrspace(200)*) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cap-from-ptr.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cap-from-ptr.ll new file mode 100644 index 0000000000000..5b9143d1f67f9 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cap-from-ptr.ll @@ -0,0 +1,204 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cap-from-ptr.ll +;; Check that we can correctly generate code for llvm.cheri.cap.from.pointer() +;; This previously asserted on RISC-V due to a broken ISel pattern. +;; We pipe this input through instcombine first to ensure SelectionDAG sees canonical IR. +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -passes=instcombine -S < %s | FileCheck %s --check-prefix=CHECK-IR +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -passes=instcombine -S < %s | llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d | FileCheck %s --check-prefix=PURECAP +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d -passes=instcombine -S < %s | llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d | FileCheck %s --check-prefix=HYBRID + +define internal ptr addrspace(200) @test(ptr addrspace(200) %ptr, ptr addrspace(200) %cap, i64 %offset) nounwind { +; PURECAP-LABEL: test: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bnez a2, .LBB0_2 +; PURECAP-NEXT: # %bb.1: # %entry +; PURECAP-NEXT: ymv ca1, cnull +; PURECAP-NEXT: j .LBB0_3 +; PURECAP-NEXT: .LBB0_2: +; PURECAP-NEXT: yaddrw ca1, ca1, a2 +; PURECAP-NEXT: .LBB0_3: # %entry +; PURECAP-NEXT: sy ca1, 0(ca0) +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: test: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bnez a2, .LBB0_2 +; HYBRID-NEXT: # %bb.1: # %entry +; HYBRID-NEXT: ymv ca1, cnull +; HYBRID-NEXT: j .LBB0_3 +; HYBRID-NEXT: .LBB0_2: +; HYBRID-NEXT: yaddrw ca1, ca1, a2 +; HYBRID-NEXT: .LBB0_3: # %entry +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: sy ca0, 0(ca1) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: ret +; CHECK-IR-LABEL: define internal ptr addrspace(200) @test +; CHECK-IR-SAME: (ptr addrspace(200) [[PTR:%.*]], ptr addrspace(200) [[CAP:%.*]], i64 [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: [[NEW:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i64(ptr addrspace(200) [[CAP]], i64 [[OFFSET]]) +; CHECK-IR-NEXT: store ptr addrspace(200) [[NEW]], ptr addrspace(200) [[PTR]], align 16 +; CHECK-IR-NEXT: ret ptr addrspace(200) [[NEW]] +; +entry: + %new = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i64(ptr addrspace(200) %cap, i64 %offset) + store ptr addrspace(200) %new, ptr addrspace(200) %ptr, align 16 + ret ptr addrspace(200) %new +} + +;; (int_cheri_cap_from_ptr x, 0) -> null +define internal ptr addrspace(200) @cap_from_ptr_zero(ptr addrspace(200) %ptr, ptr addrspace(200) %cap) nounwind { +; PURECAP-LABEL: cap_from_ptr_zero: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: sy cnull, 0(ca0) +; PURECAP-NEXT: ymv ca0, cnull +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: cap_from_ptr_zero: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: sy ca0, 0(cnull) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ymv ca0, cnull +; HYBRID-NEXT: ret +; CHECK-IR-LABEL: define internal ptr addrspace(200) @cap_from_ptr_zero +; CHECK-IR-SAME: (ptr addrspace(200) [[PTR:%.*]], ptr addrspace(200) [[CAP:%.*]]) #[[ATTR0]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: store ptr addrspace(200) null, ptr addrspace(200) [[PTR]], align 16 +; CHECK-IR-NEXT: ret ptr addrspace(200) null +; +entry: + %new = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i64(ptr addrspace(200) %cap, i64 0) + store ptr addrspace(200) %new, ptr addrspace(200) %ptr, align 16 + ret ptr addrspace(200) %new +} + +;; Check that (int_cheri_cap_from_ptr ddc, x) can use the DDC register directly +define internal ptr addrspace(200) @cap_from_ptr_ddc(ptr addrspace(200) %ptr, i64 %offset) nounwind { +; PURECAP-LABEL: cap_from_ptr_ddc: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bnez a1, .LBB2_2 +; PURECAP-NEXT: # %bb.1: # %entry +; PURECAP-NEXT: ymv ca1, cnull +; PURECAP-NEXT: j .LBB2_3 +; PURECAP-NEXT: .LBB2_2: +; PURECAP-NEXT: yaddrw ca1, cnull, a1 +; PURECAP-NEXT: .LBB2_3: # %entry +; PURECAP-NEXT: sy ca1, 0(ca0) +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: cap_from_ptr_ddc: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: csrrc ca2, ddc, zero +; HYBRID-NEXT: bnez a1, .LBB2_2 +; HYBRID-NEXT: # %bb.1: # %entry +; HYBRID-NEXT: ymv ca1, cnull +; HYBRID-NEXT: j .LBB2_3 +; HYBRID-NEXT: .LBB2_2: +; HYBRID-NEXT: yaddrw ca1, ca2, a1 +; HYBRID-NEXT: .LBB2_3: # %entry +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: sy ca0, 0(ca1) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: ret +; CHECK-IR-LABEL: define internal ptr addrspace(200) @cap_from_ptr_ddc +; CHECK-IR-SAME: (ptr addrspace(200) [[PTR:%.*]], i64 [[OFFSET:%.*]]) #[[ATTR0]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: [[DDC:%.*]] = call ptr addrspace(200) @llvm.cheri.ddc.get() +; CHECK-IR-NEXT: [[NEW:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i64(ptr addrspace(200) [[DDC]], i64 [[OFFSET]]) +; CHECK-IR-NEXT: store ptr addrspace(200) [[NEW]], ptr addrspace(200) [[PTR]], align 16 +; CHECK-IR-NEXT: ret ptr addrspace(200) [[NEW]] +; +entry: + %ddc = call ptr addrspace(200) @llvm.cheri.ddc.get() + %new = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i64(ptr addrspace(200) %ddc, i64 %offset) + store ptr addrspace(200) %new, ptr addrspace(200) %ptr, align 16 + ret ptr addrspace(200) %new +} + +;; Check that (int_cheri_cap_from_ptr x, 0) -> null has priority over direct DDC usage +define internal ptr addrspace(200) @cap_from_ptr_ddc_zero(ptr addrspace(200) %ptr) nounwind { +; PURECAP-LABEL: cap_from_ptr_ddc_zero: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: sy cnull, 0(ca0) +; PURECAP-NEXT: ymv ca0, cnull +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: cap_from_ptr_ddc_zero: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: sy ca0, 0(cnull) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ymv ca0, cnull +; HYBRID-NEXT: ret +; CHECK-IR-LABEL: define internal ptr addrspace(200) @cap_from_ptr_ddc_zero +; CHECK-IR-SAME: (ptr addrspace(200) [[PTR:%.*]]) #[[ATTR0]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: store ptr addrspace(200) null, ptr addrspace(200) [[PTR]], align 16 +; CHECK-IR-NEXT: ret ptr addrspace(200) null +; +entry: + %ddc = call ptr addrspace(200) @llvm.cheri.ddc.get() + %new = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i64(ptr addrspace(200) %ddc, i64 0) + store ptr addrspace(200) %new, ptr addrspace(200) %ptr, align 16 + ret ptr addrspace(200) %new +} + +;; Check that (int_cheri_cap_from_ptr null, x) does not use register zero (since that is DDC) +define internal ptr addrspace(200) @cap_from_ptr_null(ptr addrspace(200) %ptr, i64 %offset) nounwind { +; PURECAP-LABEL: cap_from_ptr_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bnez a1, .LBB4_2 +; PURECAP-NEXT: # %bb.1: # %entry +; PURECAP-NEXT: ymv ca1, cnull +; PURECAP-NEXT: j .LBB4_3 +; PURECAP-NEXT: .LBB4_2: +; PURECAP-NEXT: yaddrw ca1, cnull, a1 +; PURECAP-NEXT: .LBB4_3: # %entry +; PURECAP-NEXT: sy ca1, 0(ca0) +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: cap_from_ptr_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bnez a1, .LBB4_2 +; HYBRID-NEXT: # %bb.1: # %entry +; HYBRID-NEXT: ymv ca1, cnull +; HYBRID-NEXT: j .LBB4_3 +; HYBRID-NEXT: .LBB4_2: +; HYBRID-NEXT: yaddrw ca1, cnull, a1 +; HYBRID-NEXT: .LBB4_3: # %entry +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: sy ca0, 0(ca1) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: ret +; CHECK-IR-LABEL: define internal ptr addrspace(200) @cap_from_ptr_null +; CHECK-IR-SAME: (ptr addrspace(200) [[PTR:%.*]], i64 [[OFFSET:%.*]]) #[[ATTR0]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: [[NEW:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i64(ptr addrspace(200) null, i64 [[OFFSET]]) +; CHECK-IR-NEXT: store ptr addrspace(200) [[NEW]], ptr addrspace(200) [[PTR]], align 16 +; CHECK-IR-NEXT: ret ptr addrspace(200) [[NEW]] +; +entry: + %new = call ptr addrspace(200) @llvm.cheri.cap.from.pointer.i64(ptr addrspace(200) null, i64 %offset) + store ptr addrspace(200) %new, ptr addrspace(200) %ptr, align 16 + ret ptr addrspace(200) %new +} + +declare ptr addrspace(200) @llvm.cheri.cap.from.pointer.i64(ptr addrspace(200), i64) +declare ptr addrspace(200) @llvm.cheri.ddc.get() diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-csub.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-csub.ll new file mode 100644 index 0000000000000..9540dc4cd4742 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-csub.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cheri-csub.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d %s -o - | FileCheck %s --check-prefix=HYBRID +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - | FileCheck %s --check-prefix=PURECAP + +define i64 @subp(i8 addrspace(200)* readnone %a, i8 addrspace(200)* readnone %b) nounwind { +; HYBRID-LABEL: subp: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sub a0, a0, a1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: subp: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sub a0, a0, a1 +; PURECAP-NEXT: ret + %1 = tail call i64 @llvm.cheri.cap.diff.i64(i8 addrspace(200)* %a, i8 addrspace(200)* %b) + ret i64 %1 +} + +declare i64 @llvm.cheri.cap.diff.i64(i8 addrspace(200)*, i8 addrspace(200)*) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-intrinsics-folding-broken-module-regression.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-intrinsics-folding-broken-module-regression.ll new file mode 100644 index 0000000000000..990bcc267cbe4 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-intrinsics-folding-broken-module-regression.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cheri-intrinsics-folding-broken-module-regression.ll +; This used to create a broken function. +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -S -passes=instcombine %s -o - | FileCheck %s +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -S '-passes=default' %s | llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -O3 -o - | FileCheck %s --check-prefix ASM +target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128-A200-P200-G200" + +@d = common addrspace(200) global i64 0, align 4 +@e = common addrspace(200) global ptr addrspace(200) null, align 32 + +; C Source code: +;int d; +;void* e; +;void g(int x, int y) { +; e = (__uintcap_t)&d + x + y; +;} + +define void @g(i64 %x, i64 %y) addrspace(200) nounwind { +; ASM-LABEL: g: +; ASM: # %bb.0: +; ASM-NEXT: .LBB0_1: # Label of block must be emitted +; ASM-NEXT: auipcc ca2, %got_pcrel_hi(d) +; ASM-NEXT: ly ca2, %pcrel_lo(.LBB0_1)(ca2) +; ASM-NEXT: .LBB0_2: # Label of block must be emitted +; ASM-NEXT: auipcc ca3, %got_pcrel_hi(e) +; ASM-NEXT: ly ca3, %pcrel_lo(.LBB0_2)(ca3) +; ASM-NEXT: addy ca0, ca2, a0 +; ASM-NEXT: addy ca0, ca0, a1 +; ASM-NEXT: sy ca0, 0(ca3) +; ASM-NEXT: ret +; CHECK-LABEL: define void @g +; CHECK-SAME: (i64 [[X:%.*]], i64 [[Y:%.*]]) addrspace(200) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(200) @d, i64 [[X]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(200) [[TMP5]], i64 [[Y]] +; CHECK-NEXT: store ptr addrspace(200) [[TMP11]], ptr addrspace(200) @e, align 32 +; CHECK-NEXT: ret void +; + %x.addr = alloca i64, align 4, addrspace(200) + %y.addr = alloca i64, align 4, addrspace(200) + store i64 %x, ptr addrspace(200) %x.addr, align 4 + store i64 %y, ptr addrspace(200) %y.addr, align 4 + %tmp1 = load i64, ptr addrspace(200) %x.addr, align 4 + %tmp2 = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i64(ptr addrspace(200) null, i64 %tmp1) + %tmp3 = call i64 @llvm.cheri.cap.offset.get.i64(ptr addrspace(200) @d) + %tmp4 = call i64 @llvm.cheri.cap.offset.get.i64(ptr addrspace(200) %tmp2) + %add = add i64 %tmp3, %tmp4 + %tmp5 = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i64(ptr addrspace(200) @d, i64 %add) + %tmp7 = load i64, ptr addrspace(200) %y.addr, align 4 + %tmp8 = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i64(ptr addrspace(200) null, i64 %tmp7) + %tmp9 = call i64 @llvm.cheri.cap.offset.get.i64(ptr addrspace(200) %tmp5) + %tmp10 = call i64 @llvm.cheri.cap.offset.get.i64(ptr addrspace(200) %tmp8) + %add1 = add i64 %tmp9, %tmp10 + %tmp11 = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i64(ptr addrspace(200) %tmp5, i64 %add1) + store ptr addrspace(200) %tmp11, ptr addrspace(200) @e, align 32 + ret void +} + +; define void @g(i64 %x, i64 %y) nounwind { +; %tmp1 = tail call i8 addrspace(200)* @llvm.cheri.cap.offset.increment.i64(i8 addrspace(200)* bitcast (i64 addrspace(200)* @d to i8 addrspace(200)*), i64 %x) +; %tmp3 = tail call i8 addrspace(200)* @llvm.cheri.cap.offset.increment.i64(i8 addrspace(200)* %tmp1, i64 %y) +; store i8 addrspace(200)* %tmp3, i8 addrspace(200)* addrspace(200)* @e, align 32 +; ret void +; } +; +declare ptr addrspace(200) @llvm.cheri.cap.offset.set.i64(ptr addrspace(200), i64) addrspace(200) +declare i64 @llvm.cheri.cap.offset.get.i64(ptr addrspace(200)) addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-memfn-call.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-memfn-call.ll new file mode 100644 index 0000000000000..e9a997a212687 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-memfn-call.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cheri-memfn-call.ll +; Check that we call memset_c/memmove_c/memcpy_c in hybrid mode. +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - | FileCheck %s --check-prefix=PURECAP +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d %s -o - | FileCheck %s --check-prefix=HYBRID +%struct.x = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } + +declare void @llvm.memmove.p200.p200.i64(ptr addrspace(200) nocapture, ptr addrspace(200) nocapture readonly, i64, i1) +declare void @llvm.memset.p200.i64(ptr addrspace(200) nocapture, i8, i64, i1) +declare void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) nocapture, ptr addrspace(200) nocapture readonly, i64, i1) + +define void @call_memset(ptr addrspace(200) align 4 %dst) nounwind { +; PURECAP-LABEL: call_memset: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -16 +; PURECAP-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-NEXT: li a2, 40 +; PURECAP-NEXT: li a1, 0 +; PURECAP-NEXT: call memset +; PURECAP-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-NEXT: addiy csp, csp, 16 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: call_memset: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 40 +; HYBRID-NEXT: li a1, 0 +; HYBRID-NEXT: call memset_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret +entry: + call void @llvm.memset.p200.i64(ptr addrspace(200) align 4 %dst, i8 0, i64 40, i1 false) + ret void +} + +define void @call_memcpy(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 %src) nounwind { +; PURECAP-LABEL: call_memcpy: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -16 +; PURECAP-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-NEXT: li a2, 40 +; PURECAP-NEXT: call memcpy +; PURECAP-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-NEXT: addiy csp, csp, 16 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: call_memcpy: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 40 +; HYBRID-NEXT: call memcpy_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret +entry: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 %src, i64 40, i1 false) + ret void +} + +define void @call_memmove(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 %src) nounwind { +; PURECAP-LABEL: call_memmove: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -16 +; PURECAP-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; PURECAP-NEXT: li a2, 40 +; PURECAP-NEXT: call memmove +; PURECAP-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; PURECAP-NEXT: addiy csp, csp, 16 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: call_memmove: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a2, 40 +; HYBRID-NEXT: call memmove_c@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret +entry: + call void @llvm.memmove.p200.p200.i64(ptr addrspace(200) align 4 %dst, ptr addrspace(200) align 4 %src, i64 40, i1 false) + ret void +} + diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-pointer-comparison.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-pointer-comparison.ll new file mode 100644 index 0000000000000..dc737760863df --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cheri-pointer-comparison.ll @@ -0,0 +1,1324 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cheri-pointer-comparison.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d %s -o - | FileCheck %s --check-prefix=HYBRID +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - | FileCheck %s --check-prefix=PURECAP +; This series of tests serves two purposes. +; The first purpose is to check that we generate efficient code for all +; capability comparisons, conditional branches and conditional selects. +; The second purpose is to check that we generate code that honours the +; signedness of the IR (which is always unsigned when emitting IR for C +; langage-level pointers, whereas __intcap uses the signedness of the type). +; NGINX has a loop with (void*)-1 as a sentinel value which was never entered +; due to this bug. +; Original issue: https://github.com/CTSRD-CHERI/llvm/issues/199 +; Fixed upstream in https://reviews.llvm.org/D70917 +; (be15dfa88fb1ed94d12f374797f98ede6808f809) +; +; Original source code showing this surprising behaviour (for CHERI-MIPS): +; int +; main(void) +; { +; void *a, *b; +; +; a = (void *)0x12033091e; +; b = (void *)0xffffffffffffffff; +; +; if (a < b) { +; printf("ok\n"); +; return (0); +; } +; +; printf("surprising result\n"); +; return (1); +; } +; +; Morello had a similar code generation issue for selects, where a less than +; generated a csel instruction using a singed predicate instead of the unsigned +; one: +; void *select_lt(void *p1, void *p2) { +; return p1 < p2 ? p1 : p2; +; } +; See https://git.morello-project.org/morello/llvm-project/-/issues/22 + +define i32 @eq(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: eq: +; HYBRID: # %bb.0: +; HYBRID-NEXT: xor a0, a0, a1 +; HYBRID-NEXT: seqz a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: eq: +; PURECAP: # %bb.0: +; PURECAP-NEXT: xor a0, a0, a1 +; PURECAP-NEXT: seqz a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp eq i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ne(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: ne: +; HYBRID: # %bb.0: +; HYBRID-NEXT: xor a0, a0, a1 +; HYBRID-NEXT: snez a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ne: +; PURECAP: # %bb.0: +; PURECAP-NEXT: xor a0, a0, a1 +; PURECAP-NEXT: snez a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp ne i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ugt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: ugt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltu a0, a1, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ugt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltu a0, a1, a0 +; PURECAP-NEXT: ret + %cmp = icmp ugt i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @uge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: uge: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltu a0, a0, a1 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: uge: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltu a0, a0, a1 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp uge i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ult(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: ult: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltu a0, a0, a1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ult: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltu a0, a0, a1 +; PURECAP-NEXT: ret + %cmp = icmp ult i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ule(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: ule: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltu a0, a1, a0 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ule: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltu a0, a1, a0 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp ule i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sgt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: sgt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slt a0, a1, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sgt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slt a0, a1, a0 +; PURECAP-NEXT: ret + %cmp = icmp sgt i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: sge: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slt a0, a0, a1 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sge: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slt a0, a0, a1 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp sge i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @slt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: slt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slt a0, a0, a1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: slt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slt a0, a0, a1 +; PURECAP-NEXT: ret + %cmp = icmp slt i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sle(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: sle: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slt a0, a1, a0 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sle: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slt a0, a1, a0 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp sle i8 addrspace(200)* %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @eq_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: eq_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: seqz a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: eq_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: seqz a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp eq i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ne_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: ne_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: snez a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ne_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: snez a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp ne i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ugt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: ugt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: snez a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ugt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: snez a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp ugt i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @uge_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: uge_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltu a0, a0, zero +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: uge_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltu a0, a0, zero +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp uge i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ult_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: ult_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltiu a0, a0, 0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ult_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltiu a0, a0, 0 +; PURECAP-NEXT: ret + %cmp = icmp ult i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @ule_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: ule_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: snez a0, a0 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: ule_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: snez a0, a0 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp ule i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sgt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: sgt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sgtz a0, a0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sgt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sgtz a0, a0 +; PURECAP-NEXT: ret + %cmp = icmp sgt i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sge_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: sge_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sltz a0, a0 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sge_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sltz a0, a0 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp sge i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @slt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: slt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slti a0, a0, 0 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: slt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slti a0, a0, 0 +; PURECAP-NEXT: ret + %cmp = icmp slt i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @sle_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: sle_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sgtz a0, a0 +; HYBRID-NEXT: xori a0, a0, 1 +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: sle_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sgtz a0, a0 +; PURECAP-NEXT: xori a0, a0, 1 +; PURECAP-NEXT: ret + %cmp = icmp sle i8 addrspace(200)* %a, null + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i8 addrspace(200)* @select_eq(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_eq: +; HYBRID: # %bb.0: +; HYBRID-NEXT: beq a0, a1, .LBB20_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB20_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_eq: +; PURECAP: # %bb.0: +; PURECAP-NEXT: beq a0, a1, .LBB20_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB20_2: +; PURECAP-NEXT: ret + %cmp = icmp eq i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ne(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ne: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bne a0, a1, .LBB21_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB21_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ne: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bne a0, a1, .LBB21_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB21_2: +; PURECAP-NEXT: ret + %cmp = icmp ne i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ugt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ugt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bltu a1, a0, .LBB22_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB22_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ugt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bltu a1, a0, .LBB22_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB22_2: +; PURECAP-NEXT: ret + %cmp = icmp ugt i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_uge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_uge: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgeu a0, a1, .LBB23_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB23_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_uge: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgeu a0, a1, .LBB23_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB23_2: +; PURECAP-NEXT: ret + %cmp = icmp uge i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ult(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ult: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bltu a0, a1, .LBB24_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB24_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ult: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bltu a0, a1, .LBB24_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB24_2: +; PURECAP-NEXT: ret + %cmp = icmp ult i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ule(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ule: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgeu a1, a0, .LBB25_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB25_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ule: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgeu a1, a0, .LBB25_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB25_2: +; PURECAP-NEXT: ret + %cmp = icmp ule i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sgt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sgt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: blt a1, a0, .LBB26_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB26_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sgt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: blt a1, a0, .LBB26_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB26_2: +; PURECAP-NEXT: ret + %cmp = icmp sgt i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sge: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bge a0, a1, .LBB27_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB27_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sge: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bge a0, a1, .LBB27_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB27_2: +; PURECAP-NEXT: ret + %cmp = icmp sge i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_slt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_slt: +; HYBRID: # %bb.0: +; HYBRID-NEXT: blt a0, a1, .LBB28_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB28_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_slt: +; PURECAP: # %bb.0: +; PURECAP-NEXT: blt a0, a1, .LBB28_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB28_2: +; PURECAP-NEXT: ret + %cmp = icmp slt i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sle(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sle: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bge a1, a0, .LBB29_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB29_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sle: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bge a1, a0, .LBB29_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB29_2: +; PURECAP-NEXT: ret + %cmp = icmp sle i8 addrspace(200)* %a, %b + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_eq_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_eq_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: beqz a0, .LBB30_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB30_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_eq_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: beqz a0, .LBB30_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB30_2: +; PURECAP-NEXT: ret + %cmp = icmp eq i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ne_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ne_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bnez a0, .LBB31_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB31_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ne_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bnez a0, .LBB31_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB31_2: +; PURECAP-NEXT: ret + %cmp = icmp ne i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ugt_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ugt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bltu zero, a0, .LBB32_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB32_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ugt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bltu zero, a0, .LBB32_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB32_2: +; PURECAP-NEXT: ret + %cmp = icmp ugt i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_uge_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_uge_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgeu a0, zero, .LBB33_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB33_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_uge_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgeu a0, zero, .LBB33_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB33_2: +; PURECAP-NEXT: ret + %cmp = icmp uge i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ult_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ult_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bltu a0, zero, .LBB34_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB34_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ult_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bltu a0, zero, .LBB34_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB34_2: +; PURECAP-NEXT: ret + %cmp = icmp ult i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_ule_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_ule_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgeu zero, a0, .LBB35_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB35_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_ule_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgeu zero, a0, .LBB35_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB35_2: +; PURECAP-NEXT: ret + %cmp = icmp ule i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sgt_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sgt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgtz a0, .LBB36_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB36_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sgt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgtz a0, .LBB36_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB36_2: +; PURECAP-NEXT: ret + %cmp = icmp sgt i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sge_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sge_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bgez a0, .LBB37_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB37_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sge_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bgez a0, .LBB37_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB37_2: +; PURECAP-NEXT: ret + %cmp = icmp sge i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_slt_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_slt_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bltz a0, .LBB38_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB38_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_slt_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bltz a0, .LBB38_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB38_2: +; PURECAP-NEXT: ret + %cmp = icmp slt i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +define i8 addrspace(200)* @select_sle_null(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: select_sle_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: blez a0, .LBB39_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: .LBB39_2: +; HYBRID-NEXT: ret +; +; PURECAP-LABEL: select_sle_null: +; PURECAP: # %bb.0: +; PURECAP-NEXT: blez a0, .LBB39_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, ca1 +; PURECAP-NEXT: .LBB39_2: +; PURECAP-NEXT: ret + %cmp = icmp sle i8 addrspace(200)* %a, null + %cond = select i1 %cmp, i8 addrspace(200)* %a, i8 addrspace(200)* %b + ret i8 addrspace(200)* %cond +} + +declare i32 @func1() nounwind +declare i32 @func2() nounwind + +define i32 @branch_eq(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_eq: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: beq a0, a1, .LBB40_2 +; HYBRID-NEXT: # %bb.1: # %if.end +; HYBRID-NEXT: tail func2@plt +; HYBRID-NEXT: .LBB40_2: # %if.then +; HYBRID-NEXT: tail func1@plt +; +; PURECAP-LABEL: branch_eq: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: beq a0, a1, .LBB40_2 +; PURECAP-NEXT: # %bb.1: # %if.end +; PURECAP-NEXT: tail func2 +; PURECAP-NEXT: .LBB40_2: # %if.then +; PURECAP-NEXT: tail func1 +entry: + %cmp = icmp eq i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ne(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_ne: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: beq a0, a1, .LBB41_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB41_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ne: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: beq a0, a1, .LBB41_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB41_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ne i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ugt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_ugt: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgeu a1, a0, .LBB42_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB42_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ugt: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgeu a1, a0, .LBB42_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB42_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ugt i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_uge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_uge: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bltu a0, a1, .LBB43_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB43_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_uge: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bltu a0, a1, .LBB43_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB43_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp uge i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ult(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_ult: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgeu a0, a1, .LBB44_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB44_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ult: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgeu a0, a1, .LBB44_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB44_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ult i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ule(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_ule: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bltu a1, a0, .LBB45_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB45_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ule: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bltu a1, a0, .LBB45_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB45_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ule i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sgt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_sgt: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bge a1, a0, .LBB46_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB46_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sgt: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bge a1, a0, .LBB46_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB46_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sgt i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sge(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_sge: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: blt a0, a1, .LBB47_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB47_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sge: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: blt a0, a1, .LBB47_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB47_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sge i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_slt(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_slt: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bge a0, a1, .LBB48_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB48_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_slt: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bge a0, a1, .LBB48_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB48_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp slt i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sle(i8 addrspace(200)* %a, i8 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: branch_sle: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: blt a1, a0, .LBB49_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB49_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sle: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: blt a1, a0, .LBB49_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB49_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sle i8 addrspace(200)* %a, %b + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_eq_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_eq_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: beqz a0, .LBB50_2 +; HYBRID-NEXT: # %bb.1: # %if.end +; HYBRID-NEXT: tail func2@plt +; HYBRID-NEXT: .LBB50_2: # %if.then +; HYBRID-NEXT: tail func1@plt +; +; PURECAP-LABEL: branch_eq_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: beqz a0, .LBB50_2 +; PURECAP-NEXT: # %bb.1: # %if.end +; PURECAP-NEXT: tail func2 +; PURECAP-NEXT: .LBB50_2: # %if.then +; PURECAP-NEXT: tail func1 +entry: + %cmp = icmp eq i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ne_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_ne_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: beqz a0, .LBB51_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB51_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ne_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: beqz a0, .LBB51_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB51_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ne i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ugt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_ugt_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgeu zero, a0, .LBB52_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB52_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ugt_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgeu zero, a0, .LBB52_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB52_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ugt i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_uge_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_uge_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bltu a0, zero, .LBB53_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB53_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_uge_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bltu a0, zero, .LBB53_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB53_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp uge i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ult_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_ult_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgeu a0, zero, .LBB54_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB54_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ult_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgeu a0, zero, .LBB54_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB54_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ult i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_ule_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_ule_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bltu zero, a0, .LBB55_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB55_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_ule_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bltu zero, a0, .LBB55_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB55_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp ule i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sgt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_sgt_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: blez a0, .LBB56_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB56_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sgt_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: blez a0, .LBB56_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB56_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sgt i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sge_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_sge_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bltz a0, .LBB57_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB57_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sge_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bltz a0, .LBB57_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB57_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sge i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_slt_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_slt_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgez a0, .LBB58_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB58_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_slt_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgez a0, .LBB58_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB58_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp slt i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} + +define i32 @branch_sle_null(i8 addrspace(200)* %a) nounwind { +; HYBRID-LABEL: branch_sle_null: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: bgtz a0, .LBB59_2 +; HYBRID-NEXT: # %bb.1: # %if.then +; HYBRID-NEXT: tail func1@plt +; HYBRID-NEXT: .LBB59_2: # %if.end +; HYBRID-NEXT: tail func2@plt +; +; PURECAP-LABEL: branch_sle_null: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: bgtz a0, .LBB59_2 +; PURECAP-NEXT: # %bb.1: # %if.then +; PURECAP-NEXT: tail func1 +; PURECAP-NEXT: .LBB59_2: # %if.end +; PURECAP-NEXT: tail func2 +entry: + %cmp = icmp sle i8 addrspace(200)* %a, null + br i1 %cmp, label %if.then, label %if.end +if.then: + %retval1 = tail call i32 @func1() + ret i32 %retval1 +if.end: + %retval2 = tail call i32 @func2() + ret i32 %retval2 +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cmpxchg-cap-ptr.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cmpxchg-cap-ptr.ll new file mode 100644 index 0000000000000..974a3d84a998d --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/cmpxchg-cap-ptr.ll @@ -0,0 +1,656 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/cmpxchg-cap-ptr.ll +; Check that we can generate sensible code for atomic operations using capability pointers on capabilities +; in both hybrid and purecap mode. +; See https://github.com/CTSRD-CHERI/llvm-project/issues/470 +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -mattr=+a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -mattr=-a < %s | FileCheck %s --check-prefixes=PURECAP,PURECAP-LIBCALLS --allow-unused-prefixes +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d -mattr=+a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-ATOMICS --allow-unused-prefixes +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d -mattr=+a < %s | FileCheck %s --check-prefixes=HYBRID,HYBRID-LIBCALLS --allow-unused-prefixes + +define { i8, i1 } @test_cmpxchg_strong_i8(ptr addrspace(200) %ptr, i8 %exp, i8 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_strong_i8: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: slli a1, a1, 56 +; PURECAP-ATOMICS-NEXT: srai a1, a1, 56 +; PURECAP-ATOMICS-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.b.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB0_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.b.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB0_1 +; PURECAP-ATOMICS-NEXT: .LBB0_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_strong_i8: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sb a1, 15(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 15 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 1 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_1 +; PURECAP-LIBCALLS-NEXT: lb a1, 15(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_i8: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sb a1, 7(sp) +; HYBRID-NEXT: addi a1, sp, 7 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_1_c@plt +; HYBRID-NEXT: lbu a1, 7(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg ptr addrspace(200) %ptr, i8 %exp, i8 %new acq_rel acquire + ret { i8, i1 } %1 +} + +define { i16, i1 } @test_cmpxchg_strong_i16(ptr addrspace(200) %ptr, i16 %exp, i16 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_strong_i16: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: slli a1, a1, 48 +; PURECAP-ATOMICS-NEXT: srai a1, a1, 48 +; PURECAP-ATOMICS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.h.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB1_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.h.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB1_1 +; PURECAP-ATOMICS-NEXT: .LBB1_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_strong_i16: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sh a1, 14(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 14 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 2 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_2 +; PURECAP-LIBCALLS-NEXT: lh a1, 14(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_i16: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sh a1, 6(sp) +; HYBRID-NEXT: addi a1, sp, 6 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_2_c@plt +; HYBRID-NEXT: lh a1, 6(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg ptr addrspace(200) %ptr, i16 %exp, i16 %new acq_rel acquire + ret { i16, i1 } %1 +} + +define { i32, i1 } @test_cmpxchg_strong_i32(ptr addrspace(200) %ptr, i32 %exp, i32 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_strong_i32: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: sext.w a1, a1 +; PURECAP-ATOMICS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.w.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB2_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.w.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB2_1 +; PURECAP-ATOMICS-NEXT: .LBB2_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_strong_i32: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sw a1, 12(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 12 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 4 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: lw a1, 12(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_i32: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sw a1, 4(sp) +; HYBRID-NEXT: addi a1, sp, 4 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: lw a1, 4(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg ptr addrspace(200) %ptr, i32 %exp, i32 %new acq_rel acquire + ret { i32, i1 } %1 +} + +define { i64, i1 } @test_cmpxchg_strong_i64(ptr addrspace(200) %ptr, i64 %exp, i64 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_strong_i64: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.d.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB3_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.d.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB3_1 +; PURECAP-ATOMICS-NEXT: .LBB3_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_strong_i64: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sd a1, 8(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 8 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_8 +; PURECAP-LIBCALLS-NEXT: ld a1, 8(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_i64: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sd a1, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_8_c@plt +; HYBRID-NEXT: ld a1, 0(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg ptr addrspace(200) %ptr, i64 %exp, i64 %new acq_rel acquire + ret { i64, i1 } %1 +} + +define { ptr addrspace(200), i1 } @test_cmpxchg_strong_cap(ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_strong_cap: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aq ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB4_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aq a4, ca2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB4_1 +; PURECAP-ATOMICS-NEXT: .LBB4_3: +; PURECAP-ATOMICS-NEXT: xor a0, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a0 +; PURECAP-ATOMICS-NEXT: ymv ca0, ca3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_strong_cap: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 16 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_cap: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca1, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca1, 0(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %1 = cmpxchg ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new acq_rel acquire + ret { ptr addrspace(200), i1 } %1 +} + +define { ptr addrspace(200), i1 } @test_cmpxchg_strong_cap_i32(ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_strong_cap_i32: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aq ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB5_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aq a4, ca2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB5_1 +; PURECAP-ATOMICS-NEXT: .LBB5_3: +; PURECAP-ATOMICS-NEXT: xor a0, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a0 +; PURECAP-ATOMICS-NEXT: ymv ca0, ca3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_strong_cap_i32: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 16 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_strong_cap_i32: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca1, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca1, 0(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new acq_rel acquire + ret { ptr addrspace(200), i1 } %1 +} + + +define { i8, i1 } @test_cmpxchg_weak_i8(ptr addrspace(200) %ptr, i8 %exp, i8 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_weak_i8: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: slli a1, a1, 56 +; PURECAP-ATOMICS-NEXT: srai a1, a1, 56 +; PURECAP-ATOMICS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.b.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB6_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.b.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB6_1 +; PURECAP-ATOMICS-NEXT: .LBB6_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_weak_i8: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sb a1, 15(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 15 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 1 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_1 +; PURECAP-LIBCALLS-NEXT: lb a1, 15(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_i8: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sb a1, 7(sp) +; HYBRID-NEXT: addi a1, sp, 7 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_1_c@plt +; HYBRID-NEXT: lbu a1, 7(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, i8 %exp, i8 %new acq_rel acquire + ret { i8, i1 } %1 +} + +define { i16, i1 } @test_cmpxchg_weak_i16(ptr addrspace(200) %ptr, i16 %exp, i16 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_weak_i16: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: slli a1, a1, 48 +; PURECAP-ATOMICS-NEXT: srai a1, a1, 48 +; PURECAP-ATOMICS-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.h.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB7_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.h.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB7_1 +; PURECAP-ATOMICS-NEXT: .LBB7_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_weak_i16: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sh a1, 14(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 14 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 2 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_2 +; PURECAP-LIBCALLS-NEXT: lh a1, 14(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_i16: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sh a1, 6(sp) +; HYBRID-NEXT: addi a1, sp, 6 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_2_c@plt +; HYBRID-NEXT: lh a1, 6(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, i16 %exp, i16 %new acq_rel acquire + ret { i16, i1 } %1 +} + +define { i32, i1 } @test_cmpxchg_weak_i32(ptr addrspace(200) %ptr, i32 %exp, i32 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_weak_i32: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: sext.w a1, a1 +; PURECAP-ATOMICS-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.w.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB8_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.w.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB8_1 +; PURECAP-ATOMICS-NEXT: .LBB8_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_weak_i32: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sw a1, 12(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 12 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 4 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_4 +; PURECAP-LIBCALLS-NEXT: lw a1, 12(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_i32: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sw a1, 4(sp) +; HYBRID-NEXT: addi a1, sp, 4 +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_4_c@plt +; HYBRID-NEXT: lw a1, 4(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, i32 %exp, i32 %new acq_rel acquire + ret { i32, i1 } %1 +} + +define { i64, i1 } @test_cmpxchg_weak_i64(ptr addrspace(200) %ptr, i64 %exp, i64 %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_weak_i64: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.d.aq a3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB9_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.d.rl a4, a2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB9_1 +; PURECAP-ATOMICS-NEXT: .LBB9_3: +; PURECAP-ATOMICS-NEXT: xor a1, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a1 +; PURECAP-ATOMICS-NEXT: mv a0, a3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_weak_i64: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sd a1, 8(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 8 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 8 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_8 +; PURECAP-LIBCALLS-NEXT: ld a1, 8(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: mv a0, a1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_i64: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sd a1, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_8_c@plt +; HYBRID-NEXT: ld a1, 0(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: mv a0, a1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, i64 %exp, i64 %new acq_rel acquire + ret { i64, i1 } %1 +} + +define { ptr addrspace(200), i1 } @test_cmpxchg_weak_cap(ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_weak_cap: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aq ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB10_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aq a4, ca2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB10_1 +; PURECAP-ATOMICS-NEXT: .LBB10_3: +; PURECAP-ATOMICS-NEXT: xor a0, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a0 +; PURECAP-ATOMICS-NEXT: ymv ca0, ca3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_weak_cap: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 16 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_cap: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca1, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca1, 0(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new acq_rel acquire + ret { ptr addrspace(200), i1 } %1 +} + +define { ptr addrspace(200), i1 } @test_cmpxchg_weak_cap_i32(ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new) nounwind { +; PURECAP-ATOMICS-LABEL: test_cmpxchg_weak_cap_i32: +; PURECAP-ATOMICS: # %bb.0: +; PURECAP-ATOMICS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; PURECAP-ATOMICS-NEXT: lr.c.aq ca3, (ca0) +; PURECAP-ATOMICS-NEXT: bne a3, a1, .LBB11_3 +; PURECAP-ATOMICS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; PURECAP-ATOMICS-NEXT: sc.y.aq a4, ca2, (ca0) +; PURECAP-ATOMICS-NEXT: bnez a4, .LBB11_1 +; PURECAP-ATOMICS-NEXT: .LBB11_3: +; PURECAP-ATOMICS-NEXT: xor a0, a3, a1 +; PURECAP-ATOMICS-NEXT: seqz a1, a0 +; PURECAP-ATOMICS-NEXT: ymv ca0, ca3 +; PURECAP-ATOMICS-NEXT: ret +; +; PURECAP-LIBCALLS-LABEL: test_cmpxchg_weak_cap_i32: +; PURECAP-LIBCALLS: # %bb.0: +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, -32 +; PURECAP-LIBCALLS-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-LIBCALLS-NEXT: sy ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: addiy ca1, csp, 0 +; PURECAP-LIBCALLS-NEXT: ybndsiw ca1, ca1, 16 +; PURECAP-LIBCALLS-NEXT: li a3, 4 +; PURECAP-LIBCALLS-NEXT: li a4, 2 +; PURECAP-LIBCALLS-NEXT: call __atomic_compare_exchange_cap +; PURECAP-LIBCALLS-NEXT: ly ca1, 0(csp) +; PURECAP-LIBCALLS-NEXT: mv a2, a0 +; PURECAP-LIBCALLS-NEXT: ymv ca0, ca1 +; PURECAP-LIBCALLS-NEXT: mv a1, a2 +; PURECAP-LIBCALLS-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; PURECAP-LIBCALLS-NEXT: addiy csp, csp, 32 +; PURECAP-LIBCALLS-NEXT: ret +; +; HYBRID-LABEL: test_cmpxchg_weak_cap_i32: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sy ca1, 0(sp) +; HYBRID-NEXT: mv a1, sp +; HYBRID-NEXT: li a3, 4 +; HYBRID-NEXT: li a4, 2 +; HYBRID-NEXT: call __atomic_compare_exchange_cap_c@plt +; HYBRID-NEXT: ly ca1, 0(sp) +; HYBRID-NEXT: mv a2, a0 +; HYBRID-NEXT: ymv ca0, ca1 +; HYBRID-NEXT: mv a1, a2 +; HYBRID-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret + %1 = cmpxchg weak ptr addrspace(200) %ptr, ptr addrspace(200) %exp, ptr addrspace(200) %new acq_rel acquire + ret { ptr addrspace(200), i1 } %1 +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/dagcombine-ptradd-deleted-regression.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/dagcombine-ptradd-deleted-regression.ll new file mode 100644 index 0000000000000..39acd9f7f35fa --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/dagcombine-ptradd-deleted-regression.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/dagcombine-ptradd-deleted-regression.ll +; This would previously crash DAGCombiner::visitPTRADD since the PTRADD +; corresponding to the second GEP would be collapsed to a no-op when +; reassociated and delete the synthesised PTRADD node, not just the ADD, which +; the folding code was not prepared for. +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d %s -o - | FileCheck %s --check-prefix=HYBRID +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - | FileCheck %s --check-prefix=PURECAP + +declare i32 @bar(i32 addrspace(200)*) + +define internal i32 @foo(i32 addrspace(200)* %a, i64 addrspace(200)* %b) nounwind { +; HYBRID-LABEL: foo: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; HYBRID-NEXT: addiy ca0, ca0, 4 +; HYBRID-NEXT: sy ca0, 0(sp) # 16-byte Folded Spill +; HYBRID-NEXT: .LBB0_1: # %loop +; HYBRID-NEXT: # =>This Inner Loop Header: Depth=1 +; HYBRID-NEXT: ly ca0, 0(sp) # 16-byte Folded Reload +; HYBRID-NEXT: call bar@plt +; HYBRID-NEXT: j .LBB0_1 +; +; PURECAP-LABEL: foo: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -32 +; PURECAP-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; PURECAP-NEXT: sy cs0, 0(csp) # 16-byte Folded Spill +; PURECAP-NEXT: addiy cs0, ca0, 4 +; PURECAP-NEXT: .LBB0_1: # %loop +; PURECAP-NEXT: # =>This Inner Loop Header: Depth=1 +; PURECAP-NEXT: ymv ca0, cs0 +; PURECAP-NEXT: call bar +; PURECAP-NEXT: j .LBB0_1 +entry: + br label %loop + +loop: + %0 = getelementptr inbounds i32, i32 addrspace(200)* %a, i64 1 + %1 = load i64, i64 addrspace(200)* %b, align 16 + %2 = mul i64 0, %1 + %3 = getelementptr inbounds i32, i32 addrspace(200)* %0, i64 %2 + %4 = call i32 @bar(i32 addrspace(200)* %3) + br label %loop +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/frameindex-arith.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/frameindex-arith.ll new file mode 100644 index 0000000000000..2c757d6bdc45d --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/frameindex-arith.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/frameindex-arith.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - | FileCheck %s + +; Check that we can fold the GEP (PTRADD) into the FrameIndex calculation +; rather than emitting two instructions. + +; Contains an explicit @llvm.cheri.cap.bounds.set so CheriBoundAllocas sees the +; use as safe and doesn't interfere by inserting bounds on the FrameIndex +; before the GEP/PTRADD. +define void @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -32 +; CHECK-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; CHECK-NEXT: addiy ca0, csp, 15 +; CHECK-NEXT: ybndsrw ca0, ca0, zero +; CHECK-NEXT: call bar +; CHECK-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 32 +; CHECK-NEXT: ret + %x = alloca [2 x i8], align 1, addrspace(200) + %x_plus_1 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(200)* %x, i64 0, i64 1 + %p = call i8 addrspace(200)* @llvm.cheri.cap.bounds.set.i64(i8 addrspace(200)* %x_plus_1, i64 0) + call void @bar(i8 addrspace(200)* %p) + ret void +} + +declare void @bar(i8 addrspace(200)*) + +declare i8 addrspace(200)* @llvm.cheri.cap.bounds.set.i64(i8 addrspace(200)*, i64) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/function-alias-size.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/function-alias-size.ll new file mode 100644 index 0000000000000..b16442826172d --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/function-alias-size.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/function-alias-size.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - < %s | FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - -filetype=obj < %s | llvm-objdump --syms -r - | FileCheck %s --check-prefix=OBJDUMP +; The MIPS backend asserts emitting a relocation against an unsized but defined +; function-type global, which was happening with destructor aliases: +; The _ZN*D1Ev destructor is emitted as an alias for the defined _ZN*D2Ev destructor, +; and did not have size information, which triggered the assertion after the April 2021 merge. +; Check that we emit size information for function aliases: + +@a = constant i8 addrspace(200)* bitcast (void () addrspace(200)* @_ZN3fooD1Ev to i8 addrspace(200)*) +@_ZN3fooD1Ev = alias void (), void () addrspace(200)* @_ZN3fooD2Ev +define void @_ZN3fooD2Ev() addrspace(200) nounwind { +; ASM-LABEL: _ZN3fooD2Ev: +; ASM: # %bb.0: +; ASM-NEXT: ret + ret void +} + +@two_ints = private global {i32, i32} {i32 1, i32 2} +@elem0 = alias i32, getelementptr({i32, i32}, {i32, i32}* @two_ints, i32 0, i32 0) +@elem1 = alias i32, getelementptr({i32, i32}, {i32, i32}* @two_ints, i32 0, i32 1) + +; UTC_ARGS: --disable +; ASM: .size _ZN3fooD2Ev, .Lfunc_end0-_ZN3fooD2Ev + +; ASM-LABEL: .Ltwo_ints: +; ASM-NEXT: .{{4byte|word}} 1 +; ASM-NEXT: .{{4byte|word}} 2 +; ASM-NEXT: .size .Ltwo_ints, 8 + +; The function alias symbol should have the same size expression: +; ASM-LABEL: .globl _ZN3fooD1Ev +; ASM-NEXT: .type _ZN3fooD1Ev,@function +; ASM-NEXT: .set _ZN3fooD1Ev, _ZN3fooD2Ev +; ASM-NEXT: .size _ZN3fooD1Ev, .Lfunc_end0-_ZN3fooD2Ev + +; But for the aliases using a GEP, we have to subtract the offset: +; ASM-LABEL: .globl elem0 +; ASM-NEXT: .set elem0, .Ltwo_ints +; ASM-NEXT: .size elem0, 4 +; ASM-LABEL: .globl elem1 +; ASM-NEXT: .set elem1, .Ltwo_ints+4 +; ASM-NEXT: .size elem1, 4 + +; Check that the ELF st_size value was set correctly: +; OBJDUMP-LABEL: SYMBOL TABLE: +; OBJDUMP-NEXT: {{0+}}0 l df *ABS* {{0+}} function-alias-size.ll +; OBJDUMP-DAG: {{0+}}0 g F .text [[SIZE:[0-9a-f]+]] _ZN3fooD2Ev +; OBJDUMP-DAG: {{0+}}0 g O .data.rel.ro {{0+(10|8)}} a +; OBJDUMP-DAG: {{0+}}0 g F .text [[SIZE]] _ZN3fooD1Ev +; elem1 should have a size of 4 and not 8: +; OBJDUMP-DAG: {{0+}}0 g O .{{s?}}data {{0+}}4 elem0 +; OBJDUMP-DAG: {{0+}}4 g O .{{s?}}data {{0+}}4 elem1 diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/global-capinit-hybrid.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/global-capinit-hybrid.ll new file mode 100644 index 0000000000000..2ebe5e6da64ca --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/global-capinit-hybrid.ll @@ -0,0 +1,163 @@ +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/global-capinit-hybrid.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d %s -o - | \ +; RUN: FileCheck %s --check-prefix=ASM -DPTR_DIRECTIVE=.quad +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d %s -filetype=obj -o - | llvm-objdump -r -t - | \ +; RUN: FileCheck %s --check-prefix=RELOCS -DINTEGER_RELOC=R_RISCV_64 '-DCAPABILITY_RELOC=R_RISCV_CHERI_CAPABILITY' +target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128" + +declare void @extern_fn() +@extern_data = external global i8, align 1 + +; TODO: should the inttoptr ones be tagged -> emit a constructor? + +@global_ptr_const = global i8* inttoptr (i64 1234 to i8*), align 8 +; ASM-LABEL: .globl global_ptr_const +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_ptr_const: +; ASM-NEXT: [[PTR_DIRECTIVE]] 1234 +; ASM-NEXT: .size global_ptr_const, 8 +@global_cap_inttoptr = global i8 addrspace(200)* inttoptr (i64 1234 to i8 addrspace(200)*), align 16 +; ASM-LABEL: .globl global_cap_inttoptr +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_cap_inttoptr: +; ASM-NEXT: .chericap 1234 +; ASM-NEXT: .size global_cap_inttoptr, 16 +@global_cap_addrspacecast = global i8 addrspace(200)* addrspacecast (i8* inttoptr (i64 1234 to i8*) to i8 addrspace(200)*), align 16 +; ASM-LABEL: .globl global_cap_addrspacecast +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_cap_addrspacecast: +; ASM-NEXT: .chericap 1234 +; ASM-NEXT: .size global_cap_addrspacecast, 16 +@global_cap_nullgep = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i64 1234), align 16 +; ASM-LABEL: .globl global_cap_nullgep +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_cap_nullgep: +; ASM-NEXT: .chericap 1234 +; ASM-NEXT: .size global_cap_nullgep, 16 + +@global_ptr_data = global i8* @extern_data, align 8 +; ASM-LABEL: .globl global_ptr_data +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_ptr_data: +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data +; ASM-NEXT: .size global_ptr_data, 8 +@global_ptr_data_past_end = global i8* getelementptr inbounds (i8, i8* @extern_data, i64 1), align 8 +; ASM-LABEL: .globl global_ptr_data_past_end +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_ptr_data_past_end: +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data+1 +; ASM-NEXT: .size global_ptr_data_past_end, 8 +@global_ptr_data_two_past_end = global i8* getelementptr (i8, i8* @extern_data, i64 2), align 8 +; ASM-LABEL: .globl global_ptr_data_two_past_end +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_ptr_data_two_past_end: +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data+2 +; ASM-NEXT: .size global_ptr_data_two_past_end, 8 + +@global_cap_data_addrspacecast = global i8 addrspace(200)* addrspacecast (i8* @extern_data to i8 addrspace(200)*), align 16 +; ASM-LABEL: .globl global_cap_data_addrspacecast +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_cap_data_addrspacecast: +; ASM-NEXT: .chericap extern_data +; ASM-NEXT: .size global_cap_data_addrspacecast, 16 +@global_cap_data_addrspacecast_past_end = global i8 addrspace(200)* addrspacecast (i8* getelementptr inbounds (i8, i8* @extern_data, i64 1) to i8 addrspace(200)*), align 16 +; ASM-LABEL: .globl global_cap_data_addrspacecast_past_end +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_cap_data_addrspacecast_past_end: +; ASM-NEXT: .chericap extern_data+1 +; ASM-NEXT: .size global_cap_data_addrspacecast_past_end, 16 +@global_cap_data_addrspacecast_two_past_end = global i8 addrspace(200)* addrspacecast (i8* getelementptr (i8, i8* @extern_data, i64 2) to i8 addrspace(200)*), align 16 +; ASM-LABEL: .globl global_cap_data_addrspacecast_two_past_end +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_cap_data_addrspacecast_two_past_end: +; ASM-NEXT: .chericap extern_data+2 +; ASM-NEXT: .size global_cap_data_addrspacecast_two_past_end, 16 + +@global_cap_data_nullgep = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i64 ptrtoint (i8* @extern_data to i64)), align 16 +; ASM-LABEL: .globl global_cap_data_nullgep +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_cap_data_nullgep: +; ASM-NEXT: .p2align 4 +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data +; ASM-NEXT: [[PTR_DIRECTIVE]] 0 +; ASM-NEXT: .size global_cap_data_nullgep, 16 +@global_cap_data_nullgep_past_end = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i64 ptrtoint (i8* getelementptr inbounds (i8, i8* @extern_data, i64 1) to i64)), align 16 +; ASM-LABEL: .globl global_cap_data_nullgep_past_end +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_cap_data_nullgep_past_end: +; ASM-NEXT: .p2align 4 +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data+1 +; ASM-NEXT: [[PTR_DIRECTIVE]] 0 +; ASM-NEXT: .size global_cap_data_nullgep_past_end, 16 +@global_cap_data_nullgep_two_past_end = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i64 ptrtoint (i8* getelementptr (i8, i8* @extern_data, i64 2) to i64)), align 16 +; ASM-LABEL: .globl global_cap_data_nullgep_two_past_end +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_cap_data_nullgep_two_past_end: +; ASM-NEXT: .p2align 4 +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_data+2 +; ASM-NEXT: [[PTR_DIRECTIVE]] 0 +; ASM-NEXT: .size global_cap_data_nullgep_two_past_end, 16 + +@global_fnptr = global void ()* @extern_fn, align 8 +; ASM-LABEL: .globl global_fnptr +; ASM-NEXT: .p2align 3 +; ASM-NEXT: global_fnptr: +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_fn +; ASM-NEXT: .size global_fnptr, 8 +@global_fncap_addrspacecast = global void () addrspace(200)* addrspacecast (void ()* @extern_fn to void () addrspace(200)*), align 16 +; ASM-LABEL: .globl global_fncap_addrspacecast +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_fncap_addrspacecast: +; ASM-NEXT: .chericap extern_fn +; ASM-NEXT: .size global_fncap_addrspacecast, 16 +@global_fncap_intcap_addrspacecast = global i8 addrspace(200)* addrspacecast (i8* bitcast (void ()* @extern_fn to i8*) to i8 addrspace(200)*), align 16 +; ASM-LABEL: .globl global_fncap_intcap_addrspacecast +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_fncap_intcap_addrspacecast: +; ASM-NEXT: .chericap extern_fn +; ASM-NEXT: .size global_fncap_intcap_addrspacecast, 16 +@global_fncap_intcap_nullgep = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i64 ptrtoint (void ()* @extern_fn to i64)), align 16 +; ASM-LABEL: .globl global_fncap_intcap_nullgep +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_fncap_intcap_nullgep: +; ASM-NEXT: .p2align 4 +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_fn +; ASM-NEXT: [[PTR_DIRECTIVE]] 0 +; ASM-NEXT: .size global_fncap_intcap_nullgep, 16 +@global_fncap_addrspacecast_plus_two = global i8 addrspace(200)* addrspacecast (i8* getelementptr (i8, i8* bitcast (void ()* @extern_fn to i8*), i64 2) to i8 addrspace(200)*), align 16 +; ASM-LABEL: .globl global_fncap_addrspacecast_plus_two +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_fncap_addrspacecast_plus_two: +; ASM-NEXT: .chericap extern_fn+2 +; ASM-NEXT: .size global_fncap_addrspacecast_plus_two, 16 +@global_fncap_nullgep_plus_two = global i8 addrspace(200)* getelementptr (i8, i8 addrspace(200)* null, i64 ptrtoint (i8* getelementptr (i8, i8* bitcast (void ()* @extern_fn to i8*), i64 2) to i64)), align 16 +; ASM-LABEL: .globl global_fncap_nullgep_plus_two +; ASM-NEXT: .p2align 4 +; ASM-NEXT: global_fncap_nullgep_plus_two: +; ASM-NEXT: .p2align 4 +; ASM-NEXT: [[PTR_DIRECTIVE]] extern_fn+2 +; ASM-NEXT: [[PTR_DIRECTIVE]] 0 +; ASM-NEXT: .size global_fncap_nullgep_plus_two, 16 + + +; RELOCS-LABEL: RELOCATION RECORDS FOR [.{{s?}}data]: +; RELOCS-NEXT: OFFSET TYPE VALUE +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data+0x1 +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data+0x2 +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_data +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_data+0x1 +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_data+0x2 +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data+0x1 +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_data+0x2 +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_fn +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_fn +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_fn +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_fn +; RELOCS-NEXT: [[CAPABILITY_RELOC]] extern_fn+0x2 +; RELOCS-NEXT: [[INTEGER_RELOC]] extern_fn+0x2 + +; Don't use .sdata for RISC-V, to allow re-using the same RELOCS lines. +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"SmallDataLimit", i32 0} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/gvn-capability-store-to-load-fwd.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/gvn-capability-store-to-load-fwd.ll new file mode 100644 index 0000000000000..87ed1a2b72940 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/gvn-capability-store-to-load-fwd.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/gvn-capability-store-to-load-fwd.ll +; Check that GVN does not attempt to read capability fields that it can't get the bits for +; This is https://github.com/CTSRD-CHERI/llvm-project/issues/385 +; GVN was previously doing the following invalid transformation (Note the shift by 64 of the ptrtoint result) +; %ai = alloca %suspicious_type, align 16, addrspace(200) +; %tmp33 = bitcast %2 addrspace(200)* %ai to i8 addrspace(200)* addrspace(200)* +; %tmp34 = load i8 addrspace(200)*, i8 addrspace(200)* addrspace(200)* %tmp33, align 16 +; %0 = ptrtoint i8 addrspace(200)* %tmp34 to i64 ; INCORRECT transformation (does not transfer all bits) +; %1 = lshr i64 %0, 64 ; Shift right by 64 to get field #2 +; %2 = trunc i64 %1 to i32 ; truncate to drop the high bits +; It assumed it could get bits 32-63 by doing a ptrtoint, but on CHERI-MIPS ptrtoint returns bits 65-127 + +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -S -aa-pipeline=basic-aa -passes=gvn -o - %s | FileCheck %s +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -S -aa-pipeline=basic-aa -passes=gvn -o - %s | llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -O0 -o - | FileCheck %s --check-prefix=ASM + +; Check in the baseline (broken test now) to show the diff in the fixed commit + +target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128-A200-P200-G200" + +%struct.addrinfo = type { i32, i32, i32, i32, i32, ptr addrspace(200), ptr addrspace(200), ptr addrspace(200) } + + +define i32 @first_i32_store_to_load_fwd(ptr addrspace(200) %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: first_i32_store_to_load_fwd: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -80 +; ASM-NEXT: sy ca0, 0(csp) +; ASM-NEXT: lw a0, 0(csp) +; ASM-NEXT: addiy csp, csp, 80 +; ASM-NEXT: ret +; CHECK-LABEL: define i32 @first_i32_store_to_load_fwd +; CHECK-SAME: (ptr addrspace(200) [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[STACKVAL:%.*]] = alloca [[STRUCT_ADDRINFO:%.*]], align 16, addrspace(200) +; CHECK-NEXT: store ptr addrspace(200) [[ARG]], ptr addrspace(200) [[STACKVAL]], align 16 +; CHECK-NEXT: [[RESULT:%.*]] = load i32, ptr addrspace(200) [[STACKVAL]], align 4 +; CHECK-NEXT: ret i32 [[RESULT]] +; + %stackval = alloca %struct.addrinfo, align 16, addrspace(200) + %field = getelementptr inbounds %struct.addrinfo, ptr addrspace(200) %stackval, i64 0, i32 0 + store ptr addrspace(200) %arg, ptr addrspace(200) %stackval, align 16 + %result = load i32, ptr addrspace(200) %field, align 4 + ret i32 %result +} + +define i32 @second_i32_store_to_load_fwd(ptr addrspace(200) %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: second_i32_store_to_load_fwd: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -80 +; ASM-NEXT: sy ca0, 0(csp) +; ASM-NEXT: lw a0, 4(csp) +; ASM-NEXT: addiy csp, csp, 80 +; ASM-NEXT: ret +; CHECK-LABEL: define i32 @second_i32_store_to_load_fwd +; CHECK-SAME: (ptr addrspace(200) [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0]] { +; CHECK-NEXT: [[STACKVAL:%.*]] = alloca [[STRUCT_ADDRINFO:%.*]], align 16, addrspace(200) +; CHECK-NEXT: [[FIELD:%.*]] = getelementptr inbounds [[STRUCT_ADDRINFO]], ptr addrspace(200) [[STACKVAL]], i64 0, i32 1 +; CHECK-NEXT: store ptr addrspace(200) [[ARG]], ptr addrspace(200) [[STACKVAL]], align 16 +; CHECK-NEXT: [[RESULT:%.*]] = load i32, ptr addrspace(200) [[FIELD]], align 4 +; CHECK-NEXT: ret i32 [[RESULT]] +; + %stackval = alloca %struct.addrinfo, align 16, addrspace(200) + %field = getelementptr inbounds %struct.addrinfo, ptr addrspace(200) %stackval, i64 0, i32 1 + store ptr addrspace(200) %arg, ptr addrspace(200) %stackval, align 16 + %result = load i32, ptr addrspace(200) %field, align 4 + ret i32 %result +} + +define i32 @third_i32_store_to_load_fwd(ptr addrspace(200) %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: third_i32_store_to_load_fwd: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -80 +; ASM-NEXT: sy ca0, 0(csp) +; ASM-NEXT: lw a0, 8(csp) +; ASM-NEXT: addiy csp, csp, 80 +; ASM-NEXT: ret +; CHECK-LABEL: define i32 @third_i32_store_to_load_fwd +; CHECK-SAME: (ptr addrspace(200) [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0]] { +; CHECK-NEXT: [[STACKVAL:%.*]] = alloca [[STRUCT_ADDRINFO:%.*]], align 16, addrspace(200) +; CHECK-NEXT: [[FIELD:%.*]] = getelementptr inbounds [[STRUCT_ADDRINFO]], ptr addrspace(200) [[STACKVAL]], i64 0, i32 2 +; CHECK-NEXT: store ptr addrspace(200) [[ARG]], ptr addrspace(200) [[STACKVAL]], align 16 +; CHECK-NEXT: [[RESULT:%.*]] = load i32, ptr addrspace(200) [[FIELD]], align 4 +; CHECK-NEXT: ret i32 [[RESULT]] +; + %stackval = alloca %struct.addrinfo, align 16, addrspace(200) + %field = getelementptr inbounds %struct.addrinfo, ptr addrspace(200) %stackval, i64 0, i32 2 + store ptr addrspace(200) %arg, ptr addrspace(200) %stackval, align 16 + %result = load i32, ptr addrspace(200) %field, align 4 + ret i32 %result +} + +define i32 @fourth_i32_store_to_load_fwd(ptr addrspace(200) %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: fourth_i32_store_to_load_fwd: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -80 +; ASM-NEXT: sy ca0, 0(csp) +; ASM-NEXT: lw a0, 12(csp) +; ASM-NEXT: addiy csp, csp, 80 +; ASM-NEXT: ret +; CHECK-LABEL: define i32 @fourth_i32_store_to_load_fwd +; CHECK-SAME: (ptr addrspace(200) [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0]] { +; CHECK-NEXT: [[STACKVAL:%.*]] = alloca [[STRUCT_ADDRINFO:%.*]], align 16, addrspace(200) +; CHECK-NEXT: [[FIELD:%.*]] = getelementptr inbounds [[STRUCT_ADDRINFO]], ptr addrspace(200) [[STACKVAL]], i64 0, i32 3 +; CHECK-NEXT: store ptr addrspace(200) [[ARG]], ptr addrspace(200) [[STACKVAL]], align 16 +; CHECK-NEXT: [[RESULT:%.*]] = load i32, ptr addrspace(200) [[FIELD]], align 4 +; CHECK-NEXT: ret i32 [[RESULT]] +; + %stackval = alloca %struct.addrinfo, align 16, addrspace(200) + %field = getelementptr inbounds %struct.addrinfo, ptr addrspace(200) %stackval, i64 0, i32 3 + store ptr addrspace(200) %arg, ptr addrspace(200) %stackval, align 16 + %result = load i32, ptr addrspace(200) %field, align 4 + ret i32 %result +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/hoist-alloca.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/hoist-alloca.ll new file mode 100644 index 0000000000000..706cef0b80d43 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/hoist-alloca.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/hoist-alloca.ll +; REQUIRES: asserts +; Check that we can hoist the csetbounds for a local alloca outside of loops +; We know that it's always tagged and unsealed so machinelicm should be able to +; to hoist the csetbounds instructions. +; TODO: for MIPS "simple-register-coalescing" moves the CheriBoundedStackPseudoImm back into the loop. +; In general this will be faster than loading from the stack, but it's probably worse +; than using a callee-saved register for loops with many iterations. + +; Generated from this code: +; void call(int *src, int *dst); +; +; void hoist_alloca_uncond(int cond) { +; int buf1[123]; +; int buf2[22]; +; for (int i = 0; i < 100; i++) { +; call(buf1, buf2); +; } +; } +; +; void hoist_alloca_cond(int cond) { +; int buf1[123]; +; int buf2[22]; +; for (int i = 0; i < 100; i++) { +; if (cond) { +; call(buf1, buf2); +; } +; } +; } + +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -o %t.mir -stop-before=early-machinelicm < %s +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -run-pass=early-machinelicm -debug-only=machinelicm %t.mir -o /dev/null 2>%t.dbg +; RUN: FileCheck --input-file=%t.dbg --check-prefix=MACHINELICM-DBG %s +; Check that MachineLICM hoists the CheriBoundedStackPseudoImm (MIPS) / IncOffset+SetBoundsImm (RISCV) instructions +; MACHINELICM-DBG-LABEL: ******** Pre-regalloc Machine LICM: hoist_alloca_uncond +; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 492 +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.0.buf1, 0 +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 88 +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.1.buf2, 0 +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +; MACHINELICM-DBG-NEXT: from %bb.2 to %bb.0 +; MACHINELICM-DBG-LABEL: ******** Pre-regalloc Machine LICM: hoist_alloca_cond +; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 492 +; from %bb.3 to %bb.0 +; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.0.buf1, 0 +; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 +; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 +; MACHINELICM-DBG: Hoisting [[IMM:%[0-9]+]]:gpr = ADDI $x0, 88 +; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 +; MACHINELICM-DBG: Hoisting [[INC:%[0-9]+]]:gpcr = ADDIY %stack.1.buf2, 0 +; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 +; MACHINELICM-DBG: Hoisting [[BOUNDS:%[0-9]+]]:gpcr = YBNDSRW [[INC]]:gpcr, [[IMM]]:gpr +; MACHINELICM-DBG-NEXT: from %bb.3 to %bb.0 + +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -O1 -o - < %s | FileCheck %s + +define void @hoist_alloca_uncond(i32 signext %cond) local_unnamed_addr addrspace(200) nounwind { +; CHECK-LABEL: hoist_alloca_uncond: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -656 +; CHECK-NEXT: sy cra, 640(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs0, 624(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs1, 608(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs2, 592(csp) # 16-byte Folded Spill +; CHECK-NEXT: li s2, 100 +; CHECK-NEXT: li a0, 492 +; CHECK-NEXT: addiy ca1, csp, 100 +; CHECK-NEXT: ybndsrw cs0, ca1, a0 +; CHECK-NEXT: li a0, 88 +; CHECK-NEXT: addiy ca1, csp, 12 +; CHECK-NEXT: ybndsrw cs1, ca1, a0 +; CHECK-NEXT: .LBB0_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ymv ca0, cs0 +; CHECK-NEXT: ymv ca1, cs1 +; CHECK-NEXT: call call +; CHECK-NEXT: addiw s2, s2, -1 +; CHECK-NEXT: bnez s2, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ly cra, 640(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs0, 624(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs1, 608(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs2, 592(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 656 +; CHECK-NEXT: ret +entry: + %buf1 = alloca [123 x i32], align 4, addrspace(200) + %buf2 = alloca [22 x i32], align 4, addrspace(200) + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arraydecay = getelementptr inbounds [123 x i32], [123 x i32] addrspace(200)* %buf1, i64 0, i64 0 + %arraydecay1 = getelementptr inbounds [22 x i32], [22 x i32] addrspace(200)* %buf2, i64 0, i64 0 + call void @call(i32 addrspace(200)* nonnull %arraydecay, i32 addrspace(200)* nonnull %arraydecay1) + %inc = add nuw nsw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +declare void @call(i32 addrspace(200)*, i32 addrspace(200)*) local_unnamed_addr addrspace(200) nounwind + +define void @hoist_alloca_cond(i32 signext %cond) local_unnamed_addr addrspace(200) nounwind { +; CHECK-LABEL: hoist_alloca_cond: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -672 +; CHECK-NEXT: sy cra, 656(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs0, 640(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs1, 624(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs2, 608(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs3, 592(csp) # 16-byte Folded Spill +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: li s3, 100 +; CHECK-NEXT: li a0, 492 +; CHECK-NEXT: addiy ca1, csp, 100 +; CHECK-NEXT: ybndsrw cs1, ca1, a0 +; CHECK-NEXT: li a0, 88 +; CHECK-NEXT: addiy ca1, csp, 12 +; CHECK-NEXT: ybndsrw cs2, ca1, a0 +; CHECK-NEXT: j .LBB1_2 +; CHECK-NEXT: .LBB1_1: # %for.inc +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: addiw s3, s3, -1 +; CHECK-NEXT: beqz s3, .LBB1_4 +; CHECK-NEXT: .LBB1_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: beqz s0, .LBB1_1 +; CHECK-NEXT: # %bb.3: # %if.then +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: ymv ca0, cs1 +; CHECK-NEXT: ymv ca1, cs2 +; CHECK-NEXT: call call +; CHECK-NEXT: j .LBB1_1 +; CHECK-NEXT: .LBB1_4: # %for.cond.cleanup +; CHECK-NEXT: ly cra, 656(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs0, 640(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs1, 624(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs2, 608(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs3, 592(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 672 +; CHECK-NEXT: ret +entry: + %buf1 = alloca [123 x i32], align 4, addrspace(200) + %buf2 = alloca [22 x i32], align 4, addrspace(200) + %tobool.not = icmp eq i32 %cond, 0 + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + br i1 %tobool.not, label %for.inc, label %if.then + +if.then: + %arraydecay = getelementptr inbounds [123 x i32], [123 x i32] addrspace(200)* %buf1, i64 0, i64 0 + %arraydecay1 = getelementptr inbounds [22 x i32], [22 x i32] addrspace(200)* %buf2, i64 0, i64 0 + call void @call(i32 addrspace(200)* nonnull %arraydecay, i32 addrspace(200)* nonnull %arraydecay1) + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/intrinsics-purecap-only.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/intrinsics-purecap-only.ll new file mode 100644 index 0000000000000..1908423bb2692 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/intrinsics-purecap-only.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/intrinsics-purecap-only.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d < %s -o - | FileCheck %s --check-prefix=PURECAP +; RUN: not --crash llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d < %s -o - 2>&1 | FileCheck %s --check-prefix HYBRID-ERROR +; This test checks target-independent CHERI intrinsics that are only available for purecap code + +; Currently the only purecap-only intrinsic is llvm.cheri.stack.cap.get() +declare i8 addrspace(200)* @llvm.cheri.stack.cap.get() + +define i8 addrspace(200)* @stack_get() nounwind { +; PURECAP-LABEL: stack_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ymv ca0, csp +; PURECAP-NEXT: ret + %cap = call i8 addrspace(200)* @llvm.cheri.stack.cap.get() + ret i8 addrspace(200)* %cap +} +; HYBRID-ERROR: LLVM ERROR: Cannot select: intrinsic %llvm.cheri.stack.cap.get diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/intrinsics.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/intrinsics.ll new file mode 100644 index 0000000000000..36f97c08c38d8 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/intrinsics.ll @@ -0,0 +1,563 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/intrinsics.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - < %s | FileCheck %s --check-prefix=PURECAP +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d -o - < %s | FileCheck %s --check-prefix=HYBRID +; Check that the target-independent CHERI intrinsics are support for all architectures +; The grouping/ordering in this test is based on the RISC-V instruction listing +; in the CHERI ISA specification (Appendix C.1 in ISAv7). + +; Capability-Inspection Instructions + +declare i64 @llvm.cheri.cap.perms.get.i64(i8 addrspace(200)*) +declare i64 @llvm.cheri.cap.type.get.i64(i8 addrspace(200)*) +declare i64 @llvm.cheri.cap.base.get.i64(i8 addrspace(200)*) +declare i64 @llvm.cheri.cap.length.get.i64(i8 addrspace(200)*) +declare i1 @llvm.cheri.cap.tag.get(i8 addrspace(200)*) +declare i1 @llvm.cheri.cap.sealed.get(i8 addrspace(200)*) +declare i64 @llvm.cheri.cap.offset.get.i64(i8 addrspace(200)*) +declare i64 @llvm.cheri.cap.flags.get.i64(i8 addrspace(200)*) +declare i64 @llvm.cheri.cap.address.get.i64(i8 addrspace(200)*) +declare i64 @llvm.cheri.cap.high.get.i64(i8 addrspace(200)*) + +define i64 @perms_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: perms_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ypermr a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: perms_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ypermr a0, ca0 +; HYBRID-NEXT: ret + %perms = call i64 @llvm.cheri.cap.perms.get.i64(i8 addrspace(200)* %cap) + ret i64 %perms +} + +define i64 @type_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: type_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ytyper a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: type_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytyper a0, ca0 +; HYBRID-NEXT: ret + %type = call i64 @llvm.cheri.cap.type.get.i64(i8 addrspace(200)* %cap) + ret i64 %type +} + +define i64 @base_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: base_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybaser a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: base_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybaser a0, ca0 +; HYBRID-NEXT: ret + %base = call i64 @llvm.cheri.cap.base.get.i64(i8 addrspace(200)* %cap) + ret i64 %base +} + +define i64 @length_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: length_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ylenr a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: length_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ylenr a0, ca0 +; HYBRID-NEXT: ret + %length = call i64 @llvm.cheri.cap.length.get.i64(i8 addrspace(200)* %cap) + ret i64 %length +} + +define i64 @tag_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: tag_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ytagr a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: tag_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a0, ca0 +; HYBRID-NEXT: ret + %tag = call i1 @llvm.cheri.cap.tag.get(i8 addrspace(200)* %cap) + %tag.zext = zext i1 %tag to i64 + ret i64 %tag.zext +} + +define i64 @sealed_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: sealed_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ytyper a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: sealed_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytyper a0, ca0 +; HYBRID-NEXT: ret + %sealed = call i1 @llvm.cheri.cap.sealed.get(i8 addrspace(200)* %cap) + %sealed.zext = zext i1 %sealed to i64 + ret i64 %sealed.zext +} + +define i64 @offset_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: offset_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybaser a1, ca0 +; PURECAP-NEXT: sub a0, a0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: offset_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybaser a1, ca0 +; HYBRID-NEXT: sub a0, a0, a1 +; HYBRID-NEXT: ret + %offset = call i64 @llvm.cheri.cap.offset.get.i64(i8 addrspace(200)* %cap) + ret i64 %offset +} + +define i64 @flags_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: flags_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: li a0, 0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: flags_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ymoder a0, ca0 +; HYBRID-NEXT: ret + %flags = call i64 @llvm.cheri.cap.flags.get.i64(i8 addrspace(200)* %cap) + ret i64 %flags +} + +define i64 @address_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: address_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: mv a0, a0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: address_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: mv a0, a0 +; HYBRID-NEXT: ret + %address = call i64 @llvm.cheri.cap.address.get.i64(i8 addrspace(200)* %cap) + ret i64 %address +} + +define i64 @high_get(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: high_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: yhir a0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: high_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: yhir a0, ca0 +; HYBRID-NEXT: ret + %high = call i64 @llvm.cheri.cap.high.get.i64(i8 addrspace(200)* %cap) + ret i64 %high +} + +; Capability-Modification Instructions + +declare i8 addrspace(200)* @llvm.cheri.cap.seal(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.unseal(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.perms.and.i64(i8 addrspace(200)*, i64) +declare i8 addrspace(200)* @llvm.cheri.cap.flags.set.i64(i8 addrspace(200)*, i64) +declare i8 addrspace(200)* @llvm.cheri.cap.offset.set.i64(i8 addrspace(200)*, i64) +declare i8 addrspace(200)* @llvm.cheri.cap.address.set.i64(i8 addrspace(200)*, i64) +declare i8 addrspace(200)* @llvm.cheri.cap.bounds.set.i64(i8 addrspace(200)*, i64) +declare i8 addrspace(200)* @llvm.cheri.cap.bounds.set.exact.i64(i8 addrspace(200)*, i64) +declare i8 addrspace(200)* @llvm.cheri.cap.high.set.i64(i8 addrspace(200)*, i64) +declare i8 addrspace(200)* @llvm.cheri.cap.tag.clear(i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.build(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.type.copy(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.conditional.seal(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.seal.entry(i8 addrspace(200)*) + +define i8 addrspace(200)* @seal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: seal: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: seal: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ret + %sealed = call i8 addrspace(200)* @llvm.cheri.cap.seal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i8 addrspace(200)* %sealed +} + +define i8 addrspace(200)* @unseal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: unseal: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: unseal: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ret + %unsealed = call i8 addrspace(200)* @llvm.cheri.cap.unseal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i8 addrspace(200)* %unsealed +} + +define i8 addrspace(200)* @perms_and(i8 addrspace(200)* %cap, i64 %perms) nounwind { +; PURECAP-LABEL: perms_and: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ypermc ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: perms_and: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ypermc ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.perms.and.i64(i8 addrspace(200)* %cap, i64 %perms) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @flags_set(i8 addrspace(200)* %cap, i64 %flags) nounwind { +; PURECAP-LABEL: flags_set: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: flags_set: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ymodew ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.flags.set.i64(i8 addrspace(200)* %cap, i64 %flags) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @offset_set(i8 addrspace(200)* %cap, i64 %offset) nounwind { +; PURECAP-LABEL: offset_set: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybaser a2, ca0 +; PURECAP-NEXT: yaddrw ca0, ca0, a2 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: offset_set: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybaser a2, ca0 +; HYBRID-NEXT: yaddrw ca0, ca0, a2 +; HYBRID-NEXT: addy ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.offset.set.i64(i8 addrspace(200)* %cap, i64 %offset) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @address_set(i8 addrspace(200)* %cap, i64 %address) nounwind { +; PURECAP-LABEL: address_set: +; PURECAP: # %bb.0: +; PURECAP-NEXT: yaddrw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: address_set: +; HYBRID: # %bb.0: +; HYBRID-NEXT: yaddrw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.address.set.i64(i8 addrspace(200)* %cap, i64 %address) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @bounds_set(i8 addrspace(200)* %cap, i64 %bounds) nounwind { +; PURECAP-LABEL: bounds_set: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybndsrw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: bounds_set: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybndsrw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.bounds.set.i64(i8 addrspace(200)* %cap, i64 %bounds) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @bounds_set_exact(i8 addrspace(200)* %cap, i64 %bounds) nounwind { +; PURECAP-LABEL: bounds_set_exact: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybndsw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: bounds_set_exact: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybndsw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.bounds.set.exact.i64(i8 addrspace(200)* %cap, i64 %bounds) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @high_set(i8 addrspace(200)* %cap, i64 %high) nounwind { +; PURECAP-LABEL: high_set: +; PURECAP: # %bb.0: +; PURECAP-NEXT: yhiw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: high_set: +; HYBRID: # %bb.0: +; HYBRID-NEXT: yhiw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.high.set.i64(i8 addrspace(200)* %cap, i64 %high) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @bounds_set_immediate(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: bounds_set_immediate: +; PURECAP: # %bb.0: +; PURECAP-NEXT: li a1, 42 +; PURECAP-NEXT: ybndsrw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: bounds_set_immediate: +; HYBRID: # %bb.0: +; HYBRID-NEXT: li a1, 42 +; HYBRID-NEXT: ybndsrw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.bounds.set.i64(i8 addrspace(200)* %cap, i64 42) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @tag_clear(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: tag_clear: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: tag_clear: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ret + %untagged = call i8 addrspace(200)* @llvm.cheri.cap.tag.clear(i8 addrspace(200)* %cap) + ret i8 addrspace(200)* %untagged +} + +define i8 addrspace(200)* @build(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: build: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ybld ca0, ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: build: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ybld ca0, ca0, ca1 +; HYBRID-NEXT: ret + %built = call i8 addrspace(200)* @llvm.cheri.cap.build(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i8 addrspace(200)* %built +} + +define i8 addrspace(200)* @type_copy(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: type_copy: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: type_copy: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.type.copy(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @conditional_seal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: conditional_seal: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: conditional_seal: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.conditional.seal(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @seal_entry(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: seal_entry: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ysentry ca0, ca0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: seal_entry: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ysentry ca0, ca0 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.seal.entry(i8 addrspace(200)* %cap) + ret i8 addrspace(200)* %newcap +} + +; Pointer-Arithmetic Instructions + +declare i64 @llvm.cheri.cap.to.pointer(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.cap.from.pointer(i8 addrspace(200)*, i64) +declare i64 @llvm.cheri.cap.diff(i8 addrspace(200)*, i8 addrspace(200)*) +declare i8 addrspace(200)* @llvm.cheri.ddc.get() +declare i8 addrspace(200)* @llvm.cheri.pcc.get() + +define i64 @to_pointer(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: to_pointer: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ytagr a0, ca1 +; PURECAP-NEXT: neg a0, a0 +; PURECAP-NEXT: and a0, a1, a0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: to_pointer: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a0, ca1 +; HYBRID-NEXT: neg a0, a0 +; HYBRID-NEXT: and a0, a1, a0 +; HYBRID-NEXT: ret + %ptr = call i64 @llvm.cheri.cap.to.pointer(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i64 %ptr +} + +define i64 @to_pointer_ddc_relative(i8 addrspace(200)* %cap) nounwind { +; PURECAP-LABEL: to_pointer_ddc_relative: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ytagr a1, ca0 +; PURECAP-NEXT: neg a1, a1 +; PURECAP-NEXT: and a0, a0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: to_pointer_ddc_relative: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a1, ca0 +; HYBRID-NEXT: neg a1, a1 +; HYBRID-NEXT: and a0, a0, a1 +; HYBRID-NEXT: ret + %ddc = call i8 addrspace(200)* @llvm.cheri.ddc.get() + %ptr = call i64 @llvm.cheri.cap.to.pointer(i8 addrspace(200)* %ddc, i8 addrspace(200)* %cap) + ret i64 %ptr +} + +define i8 addrspace(200)* @from_pointer(i8 addrspace(200)* %cap, i64 %ptr) nounwind { +; PURECAP-LABEL: from_pointer: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bnez a1, .LBB27_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, cnull +; PURECAP-NEXT: ret +; PURECAP-NEXT: .LBB27_2: +; PURECAP-NEXT: yaddrw ca0, ca0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: from_pointer: +; HYBRID: # %bb.0: +; HYBRID-NEXT: bnez a1, .LBB27_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, cnull +; HYBRID-NEXT: ret +; HYBRID-NEXT: .LBB27_2: +; HYBRID-NEXT: yaddrw ca0, ca0, a1 +; HYBRID-NEXT: ret + %newcap = call i8 addrspace(200)* @llvm.cheri.cap.from.pointer(i8 addrspace(200)* %cap, i64 %ptr) + ret i8 addrspace(200)* %newcap +} + +define i8 addrspace(200)* @from_ddc(i64 %ptr) nounwind { +; PURECAP-LABEL: from_ddc: +; PURECAP: # %bb.0: +; PURECAP-NEXT: bnez a0, .LBB28_2 +; PURECAP-NEXT: # %bb.1: +; PURECAP-NEXT: ymv ca0, cnull +; PURECAP-NEXT: ret +; PURECAP-NEXT: .LBB28_2: +; PURECAP-NEXT: yaddrw ca0, cnull, a0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: from_ddc: +; HYBRID: # %bb.0: +; HYBRID-NEXT: csrrc ca1, ddc, zero +; HYBRID-NEXT: bnez a0, .LBB28_2 +; HYBRID-NEXT: # %bb.1: +; HYBRID-NEXT: ymv ca0, cnull +; HYBRID-NEXT: ret +; HYBRID-NEXT: .LBB28_2: +; HYBRID-NEXT: yaddrw ca0, ca1, a0 +; HYBRID-NEXT: ret + %ddc = call i8 addrspace(200)* @llvm.cheri.ddc.get() + %cap = call i8 addrspace(200)* @llvm.cheri.cap.from.pointer(i8 addrspace(200)* %ddc, i64 %ptr) + ret i8 addrspace(200)* %cap +} + +define i64 @diff(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: diff: +; PURECAP: # %bb.0: +; PURECAP-NEXT: sub a0, a0, a1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: diff: +; HYBRID: # %bb.0: +; HYBRID-NEXT: sub a0, a0, a1 +; HYBRID-NEXT: ret + %diff = call i64 @llvm.cheri.cap.diff(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + ret i64 %diff +} + +define i8 addrspace(200)* @ddc_get() nounwind { +; PURECAP-LABEL: ddc_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ymv ca0, cnull +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: ddc_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: csrrc ca0, ddc, zero +; HYBRID-NEXT: ret + %cap = call i8 addrspace(200)* @llvm.cheri.ddc.get() + ret i8 addrspace(200)* %cap +} + +define i8 addrspace(200)* @pcc_get() nounwind { +; PURECAP-LABEL: pcc_get: +; PURECAP: # %bb.0: +; PURECAP-NEXT: auipc ca0, 0 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: pcc_get: +; HYBRID: # %bb.0: +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: auipcc ca0, 0 +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %cap = call i8 addrspace(200)* @llvm.cheri.pcc.get() + ret i8 addrspace(200)* %cap +} + +; Assertion Instructions + +declare i1 @llvm.cheri.cap.subset.test(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + +define i64 @subset_test(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: subset_test: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ylt a0, ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: subset_test: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ylt a0, ca0, ca1 +; HYBRID-NEXT: ret + %subset = call i1 @llvm.cheri.cap.subset.test(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + %subset.zext = zext i1 %subset to i64 + ret i64 %subset.zext +} + +declare i1 @llvm.cheri.cap.equal.exact(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + +define i64 @equal_exact(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) nounwind { +; PURECAP-LABEL: equal_exact: +; PURECAP: # %bb.0: +; PURECAP-NEXT: syeq a0, ca0, ca1 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: equal_exact: +; HYBRID: # %bb.0: +; HYBRID-NEXT: syeq a0, ca0, ca1 +; HYBRID-NEXT: ret + %eqex = call i1 @llvm.cheri.cap.equal.exact(i8 addrspace(200)* %cap1, i8 addrspace(200)* %cap2) + %eqex.zext = zext i1 %eqex to i64 + ret i64 %eqex.zext +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/landingpad-non-preemptible.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/landingpad-non-preemptible.ll new file mode 100644 index 0000000000000..c42906b1f902b --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/landingpad-non-preemptible.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/landingpad-non-preemptible.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d --relocation-model=pic < %s -o - | FileCheck %s +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d --relocation-model=pic < %s -o - -filetype=obj | llvm-readobj --relocs --symbols - | FileCheck %s --check-prefix=RELOCS +; Capabilities for exception landing pads were using preemptible relocations such as +; .chericap foo + .Ltmp - .Lfunc_begin instead of using a local alias. +; https://github.com/CTSRD-CHERI/llvm-project/issues/512 +; This test case was generated from the following C++ code: +; extern long foo(); +; int do_catch() { +; try { +; return foo(); +; } catch(int &i) { +; return 1; +; } catch(...) { +; return 2; +; } +; } + +@_ZTIi = external dso_local addrspace(200) constant ptr addrspace(200) +define dso_local noundef signext i32 @_Z8do_catchv() local_unnamed_addr addrspace(200) #0 personality ptr addrspace(200) @__gxx_personality_v0 { +; CHECK-LABEL: _Z8do_catchv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -48 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: sy cra, 32(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs0, 16(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs1, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -16 +; CHECK-NEXT: .cfi_offset s0, -32 +; CHECK-NEXT: .cfi_offset s1, -48 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: call _Z3foov +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: .LBB0_2: # %return +; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: ly cra, 32(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs0, 16(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs1, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 48 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_3: # %lpad +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: sext.w s1, a1 +; CHECK-NEXT: call __cxa_begin_catch +; CHECK-NEXT: li s0, 2 +; CHECK-NEXT: bne s1, s0, .LBB0_5 +; CHECK-NEXT: # %bb.4: # %catch1 +; CHECK-NEXT: call __cxa_end_catch +; CHECK-NEXT: li s0, 1 +; CHECK-NEXT: j .LBB0_2 +; CHECK-NEXT: .LBB0_5: # %catch +; CHECK-NEXT: call __cxa_end_catch +; CHECK-NEXT: j .LBB0_2 +entry: + %call = invoke noundef signext i32 @_Z3foov() + to label %return unwind label %lpad + +lpad: ; preds = %entry + %0 = landingpad { ptr addrspace(200), i32 } + catch ptr addrspace(200) @_ZTIi + catch ptr addrspace(200) null + %1 = extractvalue { ptr addrspace(200), i32 } %0, 0 + %2 = extractvalue { ptr addrspace(200), i32 } %0, 1 + %3 = tail call i32 @llvm.eh.typeid.for(ptr addrspacecast (ptr addrspace(200) @_ZTIi to ptr)) nounwind + %matches = icmp eq i32 %2, %3 + %4 = tail call ptr addrspace(200) @__cxa_begin_catch(ptr addrspace(200) %1) nounwind + br i1 %matches, label %catch1, label %catch + +catch1: ; preds = %lpad + tail call void @__cxa_end_catch() nounwind + br label %return + +catch: ; preds = %lpad + tail call void @__cxa_end_catch() + br label %return + +return: ; preds = %entry, %catch1, %catch + %retval.0 = phi i32 [ 1, %catch1 ], [ 2, %catch ], [ %call, %entry ] + ret i32 %retval.0 +} + +declare dso_local i32 @_Z3foov() local_unnamed_addr addrspace(200) + +declare dso_local i32 @__gxx_personality_v0(...) addrspace(200) + +declare i32 @llvm.eh.typeid.for(i8*) addrspace(200) nounwind readnone + +declare dso_local ptr addrspace(200) @__cxa_begin_catch(ptr addrspace(200)) local_unnamed_addr addrspace(200) + +declare dso_local void @__cxa_end_catch() local_unnamed_addr addrspace(200) + +; UTC_ARGS: --disable +; CHECK: .Lfunc_end0: +; CHECK-NEXT: .size _Z8do_catchv, .Lfunc_end0-_Z8do_catchv +; CHECK-NEXT: .size .L_Z8do_catchv$local, .Lfunc_end0-_Z8do_catchv + +; CHECK: GCC_except_table0: +; CHECK-NEXT: .Lexception0: +; CHECK-NEXT: .byte 255 # @LPStart Encoding = omit +; CHECK-NEXT: .byte 155 # @TType Encoding = indirect pcrel sdata4 +; CHECK-NEXT: .uleb128 .Lttbase0-.Lttbaseref0 +; CHECK-NEXT: .Lttbaseref0: +; RISC-V uses DW_EH_PE_udata4 instead of uleb128 since uleb128 causes issues with linker relaxations. +; CHECK-NEXT: .byte 3 # Call site Encoding = udata4 +; CHECK-NEXT: .uleb128 .Lcst_end0-.Lcst_begin0 +; CHECK-NEXT: .Lcst_begin0: +; CHECK-NEXT: [[CS_DIRECTIVE:(\.uleb128)|(\.word)]] .Ltmp0-.Lfunc_begin0 # >> Call Site 1 << +; CHECK-NEXT: [[CS_DIRECTIVE]] .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1 +; Note: RISC-V uses DW_EH_PE_udata4, so the 0xc marker uses 4 bytes instead of 1 +; CHECK-NEXT: [[SMALL_CS_DIRECTIVE:(\.byte)|(\.word)]] 12 # (landing pad is a capability) +; Note: the following line should not be using _Z8do_catchv, but a local alias +; CHECK-NEXT: .chericap %code(.L_Z8do_catchv$local+(.Ltmp2-.Lfunc_begin0)) # jumps to .Ltmp2 +; CHECK-NEXT: .byte 3 # On action: 2 +; CHECK-NEXT: [[CS_DIRECTIVE]] .Ltmp1-.Lfunc_begin0 # >> Call Site 2 << +; CHECK-NEXT: [[CS_DIRECTIVE]] .Lfunc_end0-.Ltmp1 # Call between .Ltmp1 and .Lfunc_end0 +; CHECK-NEXT: [[SMALL_CS_DIRECTIVE]] 0 # has no landing pad +; CHECK-NEXT: .byte 0 # On action: cleanup +; CHECK-NEXT: .Lcst_end0: +; CHECK-NEXT: .byte 1 # >> Action Record 1 << +; CHECK-NEXT: # Catch TypeInfo 1 +; CHECK-NEXT: .byte 0 # No further actions +; CHECK-NEXT: .byte 2 # >> Action Record 2 << +; CHECK-NEXT: # Catch TypeInfo 2 +; CHECK-NEXT: .byte 125 # Continue to action 1 +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: # >> Catch TypeInfos << +; CHECK-NEXT: [[TI_LABEL:\.Ltmp[0-9]+]]: # TypeInfo 2 +; CHECK-NEXT: .{{4byte|word}} .L_ZTIi.DW.stub-[[TI_LABEL]] +; CHECK-NEXT: .{{4byte|word}} 0 # TypeInfo 1 +; CHECK-NEXT: .Lttbase0: +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: # -- End function + + + + +; RELOCS-LABEL: Relocations [ +; RELOCS-LABEL: Section ({{.+}}) .rela.gcc_except_table { +; RELOCS-NEXT: R_RISCV_CHERI_CAPABILITY_CODE .L_Z8do_catchv$local 0x34 +; RELOCS-NEXT: R_RISCV_ADD32 0x0 +; RELOCS-NEXT: R_RISCV_SUB32 0x0 +; RELOCS-NEXT: R_RISCV_ADD32 .L_ZTIi.DW.stub 0x0 +; RELOCS-NEXT: R_RISCV_SUB32 0x0 +; RELOCS-NEXT: } + +; The local alias should have the same type and non-zero size as the real function: +; RELOCS: Symbol { +; RELOCS-LABEL: Name: .L_Z8do_catchv$local ( +; RELOCS-NEXT: Value: 0x0 +; RELOCS-NEXT: Size: [[FN_SIZE:[1-9][0-9]*]] +; RELOCS-NEXT: Binding: Local (0x0) +; RELOCS-NEXT: Type: Function (0x2) +; RELOCS-NEXT: Other: 0 +; RELOCS-NEXT: Section: .text (0x2) +; RELOCS-NEXT: } +; RELOCS: Symbol { +; RELOCS-LABEL: Name: _Z8do_catchv ( +; RELOCS-NEXT: Value: 0x0 +; RELOCS-NEXT: Size: [[FN_SIZE]] +; RELOCS-NEXT: Binding: Global (0x1) +; RELOCS-NEXT: Type: Function (0x2) +; RELOCS-NEXT: Other: 0 +; RELOCS-NEXT: Section: .text (0x2) +; RELOCS-NEXT: } diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/machinelicm-hoist-csetbounds.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/machinelicm-hoist-csetbounds.ll new file mode 100644 index 0000000000000..fb9299fed9854 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/machinelicm-hoist-csetbounds.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/machinelicm-hoist-csetbounds.ll +; Previously LLVM would hoist CSetBounds instructions out of if conditions/loops +; even if the source pointer could be NULL. On MIPS and RISC-V this results in a +; tag violation so we must ensure that the CSetBounds happens after the NULL check. + +; Note: Opt correctly hoists the condition+csetbounds into a preheader, and LLC +; used to unconditionally hoist the csetbounds. +; RUN: opt -data-layout="e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128-A200-P200-G200" -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d "-passes=default" -S < %s | FileCheck %s --check-prefix=HOIST-OPT +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -O3 < %s | FileCheck %s + +; Generated from the following C code (with subobject bounds): +; struct foo { +; int src; +; int dst; +; }; +; +; void call(int* src, int* dst); +; +; void hoist_csetbounds(int cond, struct foo* f) { +; for (int i = 0; i < 100; i++) { +; if (f) { +; call(&f->src, &f->dst); +; } +; } +; } + +%struct.foo = type { i32, i32 } +declare dso_local void @call(ptr addrspace(200), ptr addrspace(200)) local_unnamed_addr addrspace(200) nounwind +declare ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200), i64) addrspace(200) nounwind readnone willreturn + +define dso_local void @hoist_csetbounds(i32 signext %cond, ptr addrspace(200) %f) local_unnamed_addr addrspace(200) nounwind { +; CHECK-LABEL: hoist_csetbounds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -96 +; CHECK-NEXT: sy cra, 80(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs0, 64(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs1, 48(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs2, 32(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs3, 16(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs4, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: ymv cs0, ca1 +; CHECK-NEXT: addiy ca0, ca1, 4 +; CHECK-NEXT: li s3, -1 +; CHECK-NEXT: li s4, 99 +; CHECK-NEXT: li a1, 4 +; CHECK-NEXT: ybndsrw cs1, cs0, a1 +; CHECK-NEXT: ybndsrw cs2, ca0, a1 +; CHECK-NEXT: j .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %for.inc +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: addiw s3, s3, 1 +; CHECK-NEXT: bgeu s3, s4, .LBB0_4 +; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: beqz s0, .LBB0_1 +; CHECK-NEXT: # %bb.3: # %if.then +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: ymv ca0, cs1 +; CHECK-NEXT: ymv ca1, cs2 +; CHECK-NEXT: call call +; CHECK-NEXT: j .LBB0_1 +; CHECK-NEXT: .LBB0_4: # %for.cond.cleanup +; CHECK-NEXT: ly cra, 80(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs0, 64(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs1, 48(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs2, 32(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs3, 16(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs4, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 96 +; CHECK-NEXT: ret +; HOIST-OPT-LABEL: define dso_local void @hoist_csetbounds +; HOIST-OPT-SAME: (i32 signext [[COND:%.*]], ptr addrspace(200) [[F:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0:[0-9]+]] { +; HOIST-OPT-NEXT: entry: +; HOIST-OPT-NEXT: [[TOBOOL:%.*]] = icmp eq ptr addrspace(200) [[F]], null +; HOIST-OPT-NEXT: br i1 [[TOBOOL]], label [[FOR_COND_CLEANUP:%.*]], label [[ENTRY_SPLIT:%.*]] +; HOIST-OPT: entry.split: +; HOIST-OPT-NEXT: [[DST:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr addrspace(200) [[F]], i64 0, i32 1 +; HOIST-OPT-NEXT: [[ADDRESS_WITH_BOUNDS:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull [[F]], i64 4) +; HOIST-OPT-NEXT: [[ADDRESS_WITH_BOUNDS1:%.*]] = tail call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull [[DST]], i64 4) +; HOIST-OPT-NEXT: br label [[FOR_BODY:%.*]] +; HOIST-OPT: for.cond.cleanup: +; HOIST-OPT-NEXT: ret void +; HOIST-OPT: for.body: +; HOIST-OPT-NEXT: [[I_06:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; HOIST-OPT-NEXT: tail call void @call(ptr addrspace(200) [[ADDRESS_WITH_BOUNDS]], ptr addrspace(200) [[ADDRESS_WITH_BOUNDS1]]) +; HOIST-OPT-NEXT: [[INC]] = add nuw nsw i32 [[I_06]], 1 +; HOIST-OPT-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_06]], 99 +; HOIST-OPT-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]] +; +entry: + %tobool = icmp eq ptr addrspace(200) %f, null + %dst = getelementptr inbounds %struct.foo, ptr addrspace(200) %f, i64 0, i32 1 + br label %for.body + +for.cond.cleanup: ; preds = %for.inc + ret void + +for.body: ; preds = %for.inc, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + br i1 %tobool, label %for.inc, label %if.then + +if.then: ; preds = %for.body + %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %f, i64 4) + %address.with.bounds1 = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %dst, i64 4) + call void @call(ptr addrspace(200) %address.with.bounds, ptr addrspace(200) %address.with.bounds1) + br label %for.inc + +for.inc: ; preds = %if.then, %for.body + %inc = add nuw nsw i32 %i.06, 1 + %cmp = icmp ult i32 %i.06, 99 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-from-constant.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-from-constant.ll new file mode 100644 index 0000000000000..2fc06de8b1aae --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-from-constant.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-from-constant.ll +;; Copying from a zero constant can be converted to a memset (even with the tag preservation flags) +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d < %s -o - | FileCheck %s + +@a = internal addrspace(200) constant ptr addrspace(200) null +@b = internal addrspace(200) constant ptr addrspace(200) null +@zero_constant = internal addrspace(200) constant [5 x ptr addrspace(200)] zeroinitializer +@constant_ptrs = internal addrspace(200) constant [2 x ptr addrspace(200)] [ptr addrspace(200) @a, ptr addrspace(200) @b] + +declare void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noalias nocapture writeonly, ptr addrspace(200) noalias nocapture readonly, i64, i1 immarg) addrspace(200) #0 + +define linkonce_odr void @copy_from_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sy cnull, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 16, i1 false) + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_with_offset: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sy cnull, 0(ca0) +; CHECK-NEXT: ret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_large_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sd zero, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 8, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB3_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(constant_ptrs) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB3_1)(ca1) +; CHECK-NEXT: ly ca1, 0(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @constant_ptrs, i64 16, i1 false) + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_with_offset: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB4_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(constant_ptrs) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB4_1)(ca1) +; CHECK-NEXT: ly ca1, 16(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) + ret void +} + +;; Run the same tests again this time with must_preserve_tags to check that we don't call memcpy(). + +define linkonce_odr void @copy_from_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sy cnull, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_zero_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_zero_constant_with_offset_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sy cnull, 0(ca0) +; CHECK-NEXT: ret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @zero_constant, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_large_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_large_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sd zero, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @zero_constant, i64 8, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB8_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(constant_ptrs) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB8_1)(ca1) +; CHECK-NEXT: ly ca1, 0(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 @constant_ptrs, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_ptr_constant_with_offset_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_ptr_constant_with_offset_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: .LBB9_1: # %do.body +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(constant_ptrs) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB9_1)(ca1) +; CHECK-NEXT: ly ca1, 16(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret +do.body: + %src = getelementptr inbounds i8, ptr addrspace(200) @constant_ptrs, i64 16 + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 %dst, ptr addrspace(200) align 16 %src, i64 16, i1 false) #1 + ret void +} + +;; Finally, check copying from a zero constant with insufficient known alignment. +;; We should be able to emit this inline since a zero constant source never has tags. + +define linkonce_odr void @copy_from_underaligned_zero_constant(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_underaligned_zero_constant: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sd zero, 8(ca0) +; CHECK-NEXT: sd zero, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1 + ret void +} + +define linkonce_odr void @copy_from_underaligned_zero_constant_preserve(ptr addrspace(200) %dst) addrspace(200) { +; CHECK-LABEL: copy_from_underaligned_zero_constant_preserve: +; CHECK: # %bb.0: # %do.body +; CHECK-NEXT: sd zero, 8(ca0) +; CHECK-NEXT: sd zero, 0(ca0) +; CHECK-NEXT: ret +do.body: + call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 8 %dst, ptr addrspace(200) align 8 @zero_constant, i64 16, i1 false) #1 + ret void +} + +attributes #0 = { argmemonly nocallback nofree nounwind willreturn } +attributes #1 = { must_preserve_cheri_tags "frontend-memtransfer-type"="'const UChar * __capability' (aka 'const char16_t * __capability')" } diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-no-preserve-tags-attr.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-no-preserve-tags-attr.ll new file mode 100644 index 0000000000000..b8c8ad318583c --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-no-preserve-tags-attr.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-no-preserve-tags-attr.ll +; Check that the no_preserve_tags annotation on memcpy/memmove intrinsics allows +; use to inline struct copies >= capability size. +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -o - < %s | FileCheck %s + +%struct.pair = type { i64, i64 } + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* nocapture writeonly, i8 addrspace(200)* nocapture readonly, i64, i1) +declare void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* nocapture writeonly, i8 addrspace(200)* nocapture readonly, i64, i1) + +; Without a no_preserve_tags attribute we always call memcpy. In this case we +; don't know whether the type might actually contain capabilities (e.g. unions). +define void @memcpy_no_attr(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memcpy_no_attr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: call memcpy +; CHECK-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 8 %a_i8, i8 addrspace(200)* align 8 %b_i8, i64 16, i1 false) + ret void +} + +define void @memmove_no_attr(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memmove_no_attr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: call memmove +; CHECK-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* align 8 %a_i8, i8 addrspace(200)* align 8 %b_i8, i64 16, i1 false) + ret void +} + +; We have to emit a call if the intrinsic has must_preserve_cheri_tags: +define void @memcpy_must_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memcpy_must_preserve: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: call memcpy +; CHECK-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 8 %a_i8, i8 addrspace(200)* align 8 %b_i8, i64 16, i1 false) must_preserve_cheri_tags + ret void +} + +define void @memmove_must_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memmove_must_preserve: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: call memmove +; CHECK-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* align 8 %a_i8, i8 addrspace(200)* align 8 %b_i8, i64 16, i1 false) must_preserve_cheri_tags + ret void +} + +; We should be able to inline the call memcpy/memmove if the intrinsic has no_preserve_cheri_tags: +define void @memcpy_no_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memcpy_no_preserve: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld a2, 8(ca1) +; CHECK-NEXT: sd a2, 8(ca0) +; CHECK-NEXT: ld a1, 0(ca1) +; CHECK-NEXT: sd a1, 0(ca0) +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 8 %a_i8, i8 addrspace(200)* align 8 %b_i8, i64 16, i1 false) no_preserve_cheri_tags + ret void +} + +define void @memmove_no_preserve(%struct.pair addrspace(200)* %a, %struct.pair addrspace(200)* %b) addrspace(200) nounwind { +; CHECK-LABEL: memmove_no_preserve: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld a2, 8(ca1) +; CHECK-NEXT: ld a1, 0(ca1) +; CHECK-NEXT: sd a2, 8(ca0) +; CHECK-NEXT: sd a1, 0(ca0) +; CHECK-NEXT: ret +entry: + %a_i8 = bitcast %struct.pair addrspace(200)* %a to i8 addrspace(200)* + %b_i8 = bitcast %struct.pair addrspace(200)* %b to i8 addrspace(200)* + call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* align 8 %a_i8, i8 addrspace(200)* align 8 %b_i8, i64 16, i1 false) no_preserve_cheri_tags + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-preserve-tags-assume-aligned.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-preserve-tags-assume-aligned.ll new file mode 100644 index 0000000000000..0343452b01ab7 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-preserve-tags-assume-aligned.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-preserve-tags-assume-aligned.ll +; Check that __builtin_assume_aligned does the right thing and allows us to elide the memcpy +; call even with must_preserve_cheri_tags attribute (run instcombine to propagate assume information) +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -S -passes=instcombine < %s | llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -O2 -o - | FileCheck %s +target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128-A200-P200-G200" + +declare void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* nocapture writeonly, i8 addrspace(200)* nocapture readonly, i64, i1) +declare void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* nocapture writeonly, i8 addrspace(200)* nocapture readonly, i64, i1) +declare void @llvm.assume(i1) addrspace(200) + +define void @memcpy_assume(i8 addrspace(200)* addrspace(200)* %local_cap_ptr, i8 addrspace(200)* %align1) addrspace(200) nounwind { +; CHECK-LABEL: memcpy_assume: +; CHECK: # %bb.0: +; CHECK-NEXT: ly ca2, 16(ca0) +; CHECK-NEXT: sy ca2, 16(ca1) +; CHECK-NEXT: ly ca0, 0(ca0) +; CHECK-NEXT: sy ca0, 0(ca1) +; CHECK-NEXT: ret + %ptrint = ptrtoint i8 addrspace(200)* %align1 to i64 + %maskedptr = and i64 %ptrint, 15 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %1 = bitcast i8 addrspace(200)* addrspace(200)* %local_cap_ptr to i8 addrspace(200)* + call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 1 %align1, i8 addrspace(200)* align 16 %1, i64 32, i1 false) must_preserve_cheri_tags + ret void +} + +define void @memmove_assume(i8 addrspace(200)* addrspace(200)* %local_cap_ptr, i8 addrspace(200)* %align1) addrspace(200) nounwind { +; CHECK-LABEL: memmove_assume: +; CHECK: # %bb.0: +; CHECK-NEXT: ly ca2, 16(ca0) +; CHECK-NEXT: ly ca0, 0(ca0) +; CHECK-NEXT: sy ca2, 16(ca1) +; CHECK-NEXT: sy ca0, 0(ca1) +; CHECK-NEXT: ret + %ptrint = ptrtoint i8 addrspace(200)* %align1 to i64 + %maskedptr = and i64 %ptrint, 15 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %1 = bitcast i8 addrspace(200)* addrspace(200)* %local_cap_ptr to i8 addrspace(200)* + call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* align 1 %align1, i8 addrspace(200)* align 16 %1, i64 32, i1 false) must_preserve_cheri_tags + ret void +} + diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-preserve-tags-size-not-multiple.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-preserve-tags-size-not-multiple.ll new file mode 100644 index 0000000000000..e6777c137d1f9 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-preserve-tags-size-not-multiple.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-preserve-tags-size-not-multiple.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -o - -O0 -verify-machineinstrs %s | FileCheck %s -check-prefixes CHECK +; Check that we can inline memmove/memcpy despite having the must_preserve_cheri_tags property and the size not +; being a multiple of CAP_SIZE. Since the pointers are aligned we can start with capability copies and use +; word/byte copies for the trailing bytes. +declare void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* nocapture, i8 addrspace(200)* nocapture readonly, i64, i1) addrspace(200) +declare void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* nocapture, i8 addrspace(200)* nocapture readonly, i64, i1) addrspace(200) + +define void @test_string_memmove(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) addrspace(200) nounwind { + ; Note: has must_preserve_cheri_tags, but this memmove can still be inlined since it's aligned +; CHECK-LABEL: test_string_memmove: +; CHECK: # %bb.0: +; CHECK-NEXT: ymv ca5, ca1 +; CHECK-NEXT: ymv ca1, ca0 +; CHECK-NEXT: ly ca0, 0(ca5) +; CHECK-NEXT: ly ca2, 16(ca5) +; CHECK-NEXT: ld a3, 32(ca5) +; CHECK-NEXT: lw a4, 40(ca5) +; CHECK-NEXT: lb a5, 44(ca5) +; CHECK-NEXT: sb a5, 44(ca1) +; CHECK-NEXT: sw a4, 40(ca1) +; CHECK-NEXT: sd a3, 32(ca1) +; CHECK-NEXT: sy ca2, 16(ca1) +; CHECK-NEXT: sy ca0, 0(ca1) +; CHECK-NEXT: ret + call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* align 16 %dst, i8 addrspace(200)* align 16 %src, i64 45, i1 false) must_preserve_cheri_tags + ret void +} + +define void @test_string_memcpy(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) addrspace(200) nounwind { + ; Note: has must_preserve_cheri_tags, but this memcpy can still be inlined since it's aligned +; CHECK-LABEL: test_string_memcpy: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy ca1, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: ymv ca1, ca0 +; CHECK-NEXT: ly ca0, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: lb a2, 44(ca0) +; CHECK-NEXT: sb a2, 44(ca1) +; CHECK-NEXT: lw a2, 40(ca0) +; CHECK-NEXT: sw a2, 40(ca1) +; CHECK-NEXT: ld a2, 32(ca0) +; CHECK-NEXT: sd a2, 32(ca1) +; CHECK-NEXT: ly ca2, 16(ca0) +; CHECK-NEXT: sy ca2, 16(ca1) +; CHECK-NEXT: ly ca0, 0(ca0) +; CHECK-NEXT: sy ca0, 0(ca1) +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret + call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 16 %dst, i8 addrspace(200)* align 16 %src, i64 45, i1 false) must_preserve_cheri_tags + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-zeroinit.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-zeroinit.ll new file mode 100644 index 0000000000000..916c54cf3fb38 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/memcpy-zeroinit.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/memcpy-zeroinit.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d < %s -o - | FileCheck %s +; Check that the copy from the zeroinitializer global is turned into a series of zero stores +; or memset() as long as the memcpy is not volatile: + +%struct.umutex = type { i32, i32, [2 x i32], i8 addrspace(200)*, i32, [2 x i32] } + +@_thr_umutex_init.default_mtx = internal addrspace(200) constant %struct.umutex zeroinitializer, align 16 + +define void @_thr_umutex_init(%struct.umutex addrspace(200)* %mtx) local_unnamed_addr addrspace(200) nounwind "frame-pointer"="none" { +; CHECK-LABEL: _thr_umutex_init: +; CHECK: # %bb.0: +; CHECK-NEXT: sy cnull, 32(ca0) +; CHECK-NEXT: sy cnull, 16(ca0) +; CHECK-NEXT: sy cnull, 0(ca0) +; CHECK-NEXT: ret + %1 = bitcast %struct.umutex addrspace(200)* %mtx to i8 addrspace(200)* + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 16 %1, i8 addrspace(200)* align 16 bitcast (%struct.umutex addrspace(200)* @_thr_umutex_init.default_mtx to i8 addrspace(200)*), i64 48, i1 false) + ret void +} + +define void @_thr_umutex_init_volatile(%struct.umutex addrspace(200)* %mtx) local_unnamed_addr addrspace(200) nounwind "frame-pointer"="none" { +; CHECK-LABEL: _thr_umutex_init_volatile: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB1_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(_thr_umutex_init.default_mtx) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB1_1)(ca1) +; CHECK-NEXT: ly ca2, 32(ca1) +; CHECK-NEXT: sy ca2, 32(ca0) +; CHECK-NEXT: ly ca2, 16(ca1) +; CHECK-NEXT: sy ca2, 16(ca0) +; CHECK-NEXT: ly ca1, 0(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret + %1 = bitcast %struct.umutex addrspace(200)* %mtx to i8 addrspace(200)* + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 16 %1, i8 addrspace(200)* align 16 bitcast (%struct.umutex addrspace(200)* @_thr_umutex_init.default_mtx to i8 addrspace(200)*), i64 48, i1 true) + ret void +} + +declare void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noalias nocapture writeonly %0, i8 addrspace(200)* noalias nocapture readonly %1, i64 %2, i1 immarg %3) addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/optsize-preserve-tags-memcpy-crash.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/optsize-preserve-tags-memcpy-crash.ll new file mode 100644 index 0000000000000..b05409d8a4658 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/optsize-preserve-tags-memcpy-crash.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/optsize-preserve-tags-memcpy-crash.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d < %s -o - | FileCheck %s +; The following code copying 31 bytes (with capability alignment) using the +; must_preserve_tags attribute used to trigger a "(Align < CapSize)" assertion +; inside diagnoseInefficientCheriMemOp() when compiling with -Oz. +; This function should not be called since the reason we are falling back to memcpy +; is that the load/store limit is reached (and not the alignment). +; However, the code was checking for limit reached using a simple `(CapSize * Limit) < Size` +; check which fails here since the last 15 bytes need four (8 + 4 + 2 + 1 bytes) copies on +; architectures where LLVM does not emit misaligned loads/stores. + +define hidden void @optnone_preserve_tags_memcpy(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) optnone noinline nounwind { +; CHECK-LABEL: optnone_preserve_tags_memcpy: +; CHECK: # %bb.0: +; CHECK-NEXT: lb a2, 30(ca1) +; CHECK-NEXT: sb a2, 30(ca0) +; CHECK-NEXT: lh a2, 28(ca1) +; CHECK-NEXT: sh a2, 28(ca0) +; CHECK-NEXT: lw a2, 24(ca1) +; CHECK-NEXT: sw a2, 24(ca0) +; CHECK-NEXT: ld a2, 16(ca1) +; CHECK-NEXT: sd a2, 16(ca0) +; CHECK-NEXT: ly ca1, 0(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +define hidden void @optsize_preserve_tags_memcpy(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) optsize nounwind { +; CHECK-LABEL: optsize_preserve_tags_memcpy: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: li a2, 31 +; CHECK-NEXT: call memcpy +; CHECK-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +define hidden void @default_preserve_tags_memcpy(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) nounwind { +; CHECK-LABEL: default_preserve_tags_memcpy: +; CHECK: # %bb.0: +; CHECK-NEXT: lb a2, 30(ca1) +; CHECK-NEXT: sb a2, 30(ca0) +; CHECK-NEXT: lh a2, 28(ca1) +; CHECK-NEXT: sh a2, 28(ca0) +; CHECK-NEXT: lw a2, 24(ca1) +; CHECK-NEXT: sw a2, 24(ca0) +; CHECK-NEXT: ld a2, 16(ca1) +; CHECK-NEXT: sd a2, 16(ca0) +; CHECK-NEXT: ly ca1, 0(ca1) +; CHECK-NEXT: sy ca1, 0(ca0) +; CHECK-NEXT: ret + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +define hidden void @optnone_preserve_tags_memmove(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) optnone noinline nounwind { +; CHECK-LABEL: optnone_preserve_tags_memmove: +; CHECK: # %bb.0: +; CHECK-NEXT: ly ca2, 0(ca1) +; CHECK-NEXT: ld a3, 16(ca1) +; CHECK-NEXT: lw a4, 24(ca1) +; CHECK-NEXT: lh a5, 28(ca1) +; CHECK-NEXT: lb a1, 30(ca1) +; CHECK-NEXT: sb a1, 30(ca0) +; CHECK-NEXT: sh a5, 28(ca0) +; CHECK-NEXT: sw a4, 24(ca0) +; CHECK-NEXT: sd a3, 16(ca0) +; CHECK-NEXT: sy ca2, 0(ca0) +; CHECK-NEXT: ret + tail call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +define hidden void @optsize_preserve_tags_memmove(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) optsize nounwind { +; CHECK-LABEL: optsize_preserve_tags_memmove: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: li a2, 31 +; CHECK-NEXT: call memmove +; CHECK-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: ret + tail call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +define hidden void @default_preserve_tags_memmove(i8 addrspace(200)* %dst, i8 addrspace(200)* %src) nounwind{ +; CHECK-LABEL: default_preserve_tags_memmove: +; CHECK: # %bb.0: +; CHECK-NEXT: ly ca2, 0(ca1) +; CHECK-NEXT: lb a3, 30(ca1) +; CHECK-NEXT: lh a4, 28(ca1) +; CHECK-NEXT: lw a5, 24(ca1) +; CHECK-NEXT: ld a1, 16(ca1) +; CHECK-NEXT: sb a3, 30(ca0) +; CHECK-NEXT: sh a4, 28(ca0) +; CHECK-NEXT: sw a5, 24(ca0) +; CHECK-NEXT: sd a1, 16(ca0) +; CHECK-NEXT: sy ca2, 0(ca0) +; CHECK-NEXT: ret + tail call void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %dst, i8 addrspace(200)* noundef nonnull align 16 dereferenceable(31) %src, i64 31, i1 false) must_preserve_cheri_tags + ret void +} + +declare void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noalias nocapture writeonly, i8 addrspace(200)* noalias nocapture readonly, i64, i1 immarg) addrspace(200) +declare void @llvm.memmove.p200i8.p200i8.i64(i8 addrspace(200)* noalias nocapture writeonly, i8 addrspace(200)* noalias nocapture readonly, i64, i1 immarg) addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/ptradd-immediate.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/ptradd-immediate.ll new file mode 100644 index 0000000000000..0b238446fd6aa --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/ptradd-immediate.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/ptradd-immediate.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d < %s | FileCheck %s --check-prefix=PURECAP +;; Hybrid baseline to compare against +; RUN: sed 's/addrspace(200)//g' %s | llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d | FileCheck %s --check-prefix=HYBRID + +;; If both offsets are known to be non-negative it is safe to commute them and +;; use an immediate load. +define i32 @nneg_nneg(ptr addrspace(200) %p, i16 %x) { +; PURECAP-LABEL: nneg_nneg: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slli a1, a1, 48 +; PURECAP-NEXT: srli a1, a1, 48 +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: lw a0, 4(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: nneg_nneg: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slli a1, a1, 48 +; HYBRID-NEXT: srli a1, a1, 48 +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: add a0, a0, a1 +; HYBRID-NEXT: lw a0, 4(a0) +; HYBRID-NEXT: ret + %x.ext = zext i16 %x to i64 + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 1, i64 %x.ext + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} + +;; If both offsets are known to be negative it is safe to commute them and use +;; an immediate load. +define i32 @neg_neg(ptr addrspace(200) %p, i16 %x) { +; PURECAP-LABEL: neg_neg: +; PURECAP: # %bb.0: +; PURECAP-NEXT: ori a1, a1, 1 +; PURECAP-NEXT: slli a1, a1, 48 +; PURECAP-NEXT: srli a1, a1, 48 +; PURECAP-NEXT: neg a1, a1 +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: lw a0, -4(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: neg_neg: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slli a1, a1, 48 +; HYBRID-NEXT: srli a1, a1, 48 +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: ori a1, a1, 4 +; HYBRID-NEXT: sub a0, a0, a1 +; HYBRID-NEXT: lw a0, -4(a0) +; HYBRID-NEXT: ret + %x.ext = zext i16 %x to i64 + %x.pos = or i64 %x.ext, 1 + %x.neg = sub i64 0, %x.pos + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 -1, i64 %x.neg + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} + +;; If one offset is known to be non-negative and the other negative it is not in +;; general safe to commute them and use an immediate load. +define i32 @nneg_neg(ptr addrspace(200) %p, i16 %x) { +; PURECAP-LABEL: nneg_neg: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slli a1, a1, 48 +; PURECAP-NEXT: srli a1, a1, 48 +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: ori a1, a1, 4 +; PURECAP-NEXT: li a2, 4 +; PURECAP-NEXT: sub a2, a2, a1 +; PURECAP-NEXT: addy ca0, ca0, a2 +; PURECAP-NEXT: lw a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: nneg_neg: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slli a1, a1, 48 +; HYBRID-NEXT: srli a1, a1, 48 +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: ori a1, a1, 4 +; HYBRID-NEXT: sub a0, a0, a1 +; HYBRID-NEXT: lw a0, 4(a0) +; HYBRID-NEXT: ret + %x.ext = zext i16 %x to i64 + %x.pos = or i64 %x.ext, 1 + %x.neg = sub i64 0, %x.pos + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 1, i64 %x.neg + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} + +;; If one offset is known to be non-negative and the other negative it is not in +;; general safe to commute them and use an immediate load. +define i32 @neg_nneg(ptr addrspace(200) %p, i16 %x) { +; PURECAP-LABEL: neg_nneg: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slli a1, a1, 48 +; PURECAP-NEXT: srli a1, a1, 48 +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: addi a1, a1, -4 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: lw a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: neg_nneg: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slli a1, a1, 48 +; HYBRID-NEXT: srli a1, a1, 48 +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: add a0, a0, a1 +; HYBRID-NEXT: lw a0, -4(a0) +; HYBRID-NEXT: ret + %x.ext = zext i16 %x to i64 + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 -1, i64 %x.ext + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} + +;; If we do not know the sign of one offset it is not in general safe to +;; commute them and use an immediate load. +define i32 @nneg_unknown(ptr addrspace(200) %p, i64 %x) { +; PURECAP-LABEL: nneg_unknown: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: addi a1, a1, 4 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: lw a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: nneg_unknown: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: add a0, a0, a1 +; HYBRID-NEXT: lw a0, 4(a0) +; HYBRID-NEXT: ret + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 1, i64 %x + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} + +;; If we do not know the sign of one offset it is not in general safe to +;; commute them and use an immediate load. +define i32 @neg_unknown(ptr addrspace(200) %p, i64 %x) { +; PURECAP-LABEL: neg_unknown: +; PURECAP: # %bb.0: +; PURECAP-NEXT: slli a1, a1, 2 +; PURECAP-NEXT: addi a1, a1, -4 +; PURECAP-NEXT: addy ca0, ca0, a1 +; PURECAP-NEXT: lw a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: neg_unknown: +; HYBRID: # %bb.0: +; HYBRID-NEXT: slli a1, a1, 2 +; HYBRID-NEXT: add a0, a0, a1 +; HYBRID-NEXT: lw a0, -4(a0) +; HYBRID-NEXT: ret + %q = getelementptr [1 x i32], ptr addrspace(200) %p, i64 -1, i64 %x + %ret = load i32, ptr addrspace(200) %q + ret i32 %ret +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/ptrtoint.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/ptrtoint.ll new file mode 100644 index 0000000000000..f7099d6f754e5 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/ptrtoint.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/ptrtoint.ll +;; Check that we can correctly generate code for ptrtoint and perform simple folds +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d < %s | FileCheck %s --check-prefix=HYBRID + +define internal i64 @ptrtoint(i8 addrspace(200)* %cap) addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint: +; CHECK: # %bb.0: +; CHECK-NEXT: mv a0, a0 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a1, ca0 +; HYBRID-NEXT: neg a1, a1 +; HYBRID-NEXT: and a0, a0, a1 +; HYBRID-NEXT: ret + %ret = ptrtoint i8 addrspace(200)* %cap to i64 + ret i64 %ret +} + +define internal i64 @ptrtoint_plus_const(i8 addrspace(200)* %cap) addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint_plus_const: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, 2 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint_plus_const: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a1, ca0 +; HYBRID-NEXT: neg a1, a1 +; HYBRID-NEXT: and a0, a0, a1 +; HYBRID-NEXT: addi a0, a0, 2 +; HYBRID-NEXT: ret + %zero = ptrtoint i8 addrspace(200)* %cap to i64 + %ret = add i64 %zero, 2 + ret i64 %ret +} + +define internal i64 @ptrtoint_plus_var(i8 addrspace(200)* %cap, i64 %add) addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint_plus_var: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint_plus_var: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a2, ca0 +; HYBRID-NEXT: neg a2, a2 +; HYBRID-NEXT: and a0, a0, a2 +; HYBRID-NEXT: add a0, a0, a1 +; HYBRID-NEXT: ret + %zero = ptrtoint i8 addrspace(200)* %cap to i64 + %ret = add i64 %zero, %add + ret i64 %ret +} + +define internal i64 @ptrtoint_null() addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint_null: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint_null: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a0, cnull +; HYBRID-NEXT: neg a0, a0 +; HYBRID-NEXT: and a0, zero, a0 +; HYBRID-NEXT: ret + %ret = ptrtoint i8 addrspace(200)* null to i64 + ret i64 %ret +} + +define internal i64 @ptrtoint_null_plus_const() addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint_null_plus_const: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 2 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint_null_plus_const: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a0, cnull +; HYBRID-NEXT: neg a0, a0 +; HYBRID-NEXT: and a0, zero, a0 +; HYBRID-NEXT: addi a0, a0, 2 +; HYBRID-NEXT: ret + %zero = ptrtoint i8 addrspace(200)* null to i64 + %ret = add i64 %zero, 2 + ret i64 %ret +} + +define internal i64 @ptrtoint_null_plus_var(i64 %add) addrspace(200) nounwind { +; CHECK-LABEL: ptrtoint_null_plus_var: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, zero, a0 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: ptrtoint_null_plus_var: +; HYBRID: # %bb.0: +; HYBRID-NEXT: ytagr a1, cnull +; HYBRID-NEXT: neg a1, a1 +; HYBRID-NEXT: and a1, zero, a1 +; HYBRID-NEXT: add a0, a1, a0 +; HYBRID-NEXT: ret + %zero = ptrtoint i8 addrspace(200)* null to i64 + %ret = add i64 %zero, %add + ret i64 %ret +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/purecap-jumptable.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/purecap-jumptable.ll new file mode 100644 index 0000000000000..09297e0b127ec --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/purecap-jumptable.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/purecap-jumptable.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d < %s -o - | FileCheck %s +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -relocation-model=static < %s -o - | FileCheck %s +; Check that we can generate jump tables for switch statements. +; TODO: this is currently not implemented for CHERI-RISC-V + +define void @below_threshold(i32 %in, i32 addrspace(200)* %out) nounwind { +; CHECK-LABEL: below_threshold: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: beq a0, a2, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: bne a0, a2, .LBB0_5 +; CHECK-NEXT: # %bb.2: # %bb1 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: j .LBB0_4 +; CHECK-NEXT: .LBB0_3: # %bb2 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: .LBB0_4: # %exit +; CHECK-NEXT: sw a0, 0(ca1) +; CHECK-NEXT: .LBB0_5: # %exit +; CHECK-NEXT: ret +entry: + switch i32 %in, label %exit [ + i32 1, label %bb1 + i32 2, label %bb2 + ] +bb1: + store i32 4, i32 addrspace(200)* %out + br label %exit +bb2: + store i32 3, i32 addrspace(200)* %out + br label %exit +exit: + ret void +} + +; For RISC-V the jump table threshold is set to 5 cases, but MIPS uses the default +; value of 4 (set in llvm/lib/CodeGen/TargetLoweringBase.cpp). +define void @above_threshold_mips(i32 %in, i32 addrspace(200)* %out) nounwind { +; CHECK-LABEL: above_threshold_mips: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: blt a2, a0, .LBB1_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: beq a0, a2, .LBB1_7 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: bne a0, a2, .LBB1_10 +; CHECK-NEXT: # %bb.3: # %bb2 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: j .LBB1_9 +; CHECK-NEXT: .LBB1_4: # %entry +; CHECK-NEXT: li a2, 3 +; CHECK-NEXT: beq a0, a2, .LBB1_8 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: li a2, 4 +; CHECK-NEXT: bne a0, a2, .LBB1_10 +; CHECK-NEXT: # %bb.6: # %bb4 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: j .LBB1_9 +; CHECK-NEXT: .LBB1_7: # %bb1 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: j .LBB1_9 +; CHECK-NEXT: .LBB1_8: # %bb3 +; CHECK-NEXT: li a0, 2 +; CHECK-NEXT: .LBB1_9: # %exit +; CHECK-NEXT: sw a0, 0(ca1) +; CHECK-NEXT: .LBB1_10: # %exit +; CHECK-NEXT: ret +entry: + switch i32 %in, label %exit [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + ] +bb1: + store i32 4, i32 addrspace(200)* %out + br label %exit +bb2: + store i32 3, i32 addrspace(200)* %out + br label %exit +bb3: + store i32 2, i32 addrspace(200)* %out + br label %exit +bb4: + store i32 1, i32 addrspace(200)* %out + br label %exit +exit: + ret void +} + +; UTC_ARGS: --disable +; UTC_ARGS: --enable + +define void @above_threshold_all(i32 %in, i32 addrspace(200)* %out) nounwind { +; CHECK-LABEL: above_threshold_all: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: li a2, 5 +; CHECK-NEXT: bltu a2, a0, .LBB2_9 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addiy csp, csp, -16 +; CHECK-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: .LBB2_10: # %entry +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca2, %pcrel_hi(.LJTI2_0) +; CHECK-NEXT: addiy ca2, ca2, %pcrel_lo(.LBB2_10) +; CHECK-NEXT: addy ca0, ca2, a0 +; CHECK-NEXT: lw a0, 0(ca0) +; CHECK-NEXT: .LBB2_11: # %entry +; CHECK-NEXT: # Label of block must be emitted +; CHECK-NEXT: auipcc ca2, %pcrel_hi(.Labove_threshold_all$jump_table_base) +; CHECK-NEXT: addiy ca2, ca2, %pcrel_lo(.LBB2_11) +; CHECK-NEXT: addy ca0, ca2, a0 +; CHECK-NEXT: jr ca0 +; CHECK-NEXT: .LBB2_2: # %bb1 +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_3: # %bb2 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_4: # %bb3 +; CHECK-NEXT: li a0, 2 +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_5: # %bb4 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_6: # %bb5 +; CHECK-NEXT: li a0, 100 +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_7: # %bb6 +; CHECK-NEXT: li a0, 200 +; CHECK-NEXT: .LBB2_8: +; CHECK-NEXT: sw a0, 0(ca1) +; CHECK-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 16 +; CHECK-NEXT: .LBB2_9: # %exit +; CHECK-NEXT: ret +entry: + switch i32 %in, label %exit [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + ] +bb1: + store i32 4, i32 addrspace(200)* %out + br label %exit +bb2: + store i32 3, i32 addrspace(200)* %out + br label %exit +bb3: + store i32 2, i32 addrspace(200)* %out + br label %exit +bb4: + store i32 1, i32 addrspace(200)* %out + br label %exit +bb5: + store i32 100, i32 addrspace(200)* %out + br label %exit +bb6: + store i32 200, i32 addrspace(200)* %out + br label %exit +exit: + ret void +} + +; UTC_ARGS: --disable +; CHECK-LABEL: .LJTI2_0: +; CHECK-NEXT: .word .LBB2_2-.Labove_threshold_all$jump_table_base +; CHECK-NEXT: .word .LBB2_3-.Labove_threshold_all$jump_table_base +; CHECK-NEXT: .word .LBB2_4-.Labove_threshold_all$jump_table_base +; CHECK-NEXT: .word .LBB2_5-.Labove_threshold_all$jump_table_base +; CHECK-NEXT: .word .LBB2_6-.Labove_threshold_all$jump_table_base +; CHECK-NEXT: .word .LBB2_7-.Labove_threshold_all$jump_table_base +; UTC_ARGS: --enable diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/setoffset-multiple-uses.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/setoffset-multiple-uses.ll new file mode 100644 index 0000000000000..895af7d17ce47 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/setoffset-multiple-uses.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/setoffset-multiple-uses.ll +; RUN: opt -S -passes=instcombine -o - %s | FileCheck %s +; RUN: opt -S -passes=instcombine -o - %s | llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -O1 - -o - | %cheri_FileCheck %s --check-prefix ASM + +target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128-A200-P200-G200" +; Reduced test case for a crash in the new optimization to fold multiple setoffset calls (orignally found when compiling libunwind) + +declare i64 @check_fold(i64) addrspace(200) +declare void @check_fold_i8ptr(ptr addrspace(200)) addrspace(200) +declare i64 @llvm.cheri.cap.offset.get.i64(ptr addrspace(200)) addrspace(200) +declare ptr addrspace(200) @llvm.cheri.cap.offset.set.i64(ptr addrspace(200), i64) addrspace(200) + +define void @infer_values_from_null_set_offset() addrspace(200) nounwind { +; ASM-LABEL: infer_values_from_null_set_offset: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; ASM-NEXT: lui a0, 30 +; ASM-NEXT: addiw a0, a0, 576 +; ASM-NEXT: call check_fold +; ASM-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define void @infer_values_from_null_set_offset +; CHECK-SAME: () addrspace(200) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[OFFSET_CHECK:%.*]] = call i64 @check_fold(i64 123456) +; CHECK-NEXT: ret void +; + %with_offset = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i64(ptr addrspace(200) null, i64 123456) + %offset = call i64 @llvm.cheri.cap.offset.get.i64(ptr addrspace(200) nonnull %with_offset) + %offset_check = call i64 @check_fold(i64 %offset) + ret void +} + +define void @multiple_uses_big_constant() addrspace(200) nounwind { +; ASM-LABEL: multiple_uses_big_constant: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -32 +; ASM-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; ASM-NEXT: sy cs0, 0(csp) # 16-byte Folded Spill +; ASM-NEXT: lui a0, 30 +; ASM-NEXT: addiw a0, a0, 576 +; ASM-NEXT: addy cs0, cnull, a0 +; ASM-NEXT: ymv ca0, cs0 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: ymv ca0, cs0 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: ymv ca0, cs0 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; ASM-NEXT: ly cs0, 0(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 32 +; ASM-NEXT: ret +; CHECK-LABEL: define void @multiple_uses_big_constant +; CHECK-SAME: () addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i64 123456)) +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i64 123456)) +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i64 123456)) +; CHECK-NEXT: ret void +; + %with_offset = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i64(ptr addrspace(200) null, i64 123456) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + ret void +} + +; Here we should use an immediate cincoffset: +define void @multiple_uses_small_constant() addrspace(200) nounwind { +; ASM-LABEL: multiple_uses_small_constant: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: sy cra, 0(csp) # 16-byte Folded Spill +; ASM-NEXT: addiy ca0, cnull, 123 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: addiy ca0, cnull, 123 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: addiy ca0, cnull, 123 +; ASM-NEXT: call check_fold_i8ptr +; ASM-NEXT: ly cra, 0(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define void @multiple_uses_small_constant +; CHECK-SAME: () addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i64 123)) +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i64 123)) +; CHECK-NEXT: call void @check_fold_i8ptr(ptr addrspace(200) getelementptr (i8, ptr addrspace(200) null, i64 123)) +; CHECK-NEXT: ret void +; + %with_offset = call ptr addrspace(200) @llvm.cheri.cap.offset.set.i64(ptr addrspace(200) null, i64 123) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + call void @check_fold_i8ptr(ptr addrspace(200) %with_offset) + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-dynamic-alloca.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-dynamic-alloca.ll new file mode 100644 index 0000000000000..a115d8b17574f --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-dynamic-alloca.ll @@ -0,0 +1,305 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/stack-bounds-dynamic-alloca.ll +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -cheri-bound-allocas -o - -S %s | FileCheck %s +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -O0 %s -o - | FileCheck %s -check-prefix ASM +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -O2 %s -o - | FileCheck %s -check-prefix ASM-OPT + +; reduced C test case: +; __builtin_va_list a; +; char *b; +; void c() { +; while (__builtin_va_arg(a, char)) +; b = __builtin_alloca(sizeof(b)); +; d(b); +; } +target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128-A200-P200-G200" + +declare i32 @use_alloca(ptr addrspace(200)) local_unnamed_addr addrspace(200) + +define i32 @alloca_in_entry(i1 %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: alloca_in_entry: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -32 +; ASM-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; ASM-NEXT: mv a1, a0 +; ASM-NEXT: andi a0, a1, 1 +; ASM-NEXT: beqz a0, .LBB0_4 +; ASM-NEXT: j .LBB0_1 +; ASM-NEXT: .LBB0_1: # %do_alloca +; ASM-NEXT: j .LBB0_2 +; ASM-NEXT: .LBB0_2: # %use_alloca_no_bounds +; ASM-NEXT: li a0, 1234 +; ASM-NEXT: sd a0, 8(csp) +; ASM-NEXT: j .LBB0_3 +; ASM-NEXT: .LBB0_3: # %use_alloca_need_bounds +; ASM-NEXT: addiy ca0, csp, 0 +; ASM-NEXT: ybndsiw ca0, ca0, 16 +; ASM-NEXT: call use_alloca +; ASM-NEXT: j .LBB0_4 +; ASM-NEXT: .LBB0_4: # %exit +; ASM-NEXT: li a0, 123 +; ASM-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 32 +; ASM-NEXT: ret +; +; ASM-OPT-LABEL: alloca_in_entry: +; ASM-OPT: # %bb.0: # %entry +; ASM-OPT-NEXT: andi a0, a0, 1 +; ASM-OPT-NEXT: beqz a0, .LBB0_2 +; ASM-OPT-NEXT: # %bb.1: # %do_alloca +; ASM-OPT-NEXT: addiy csp, csp, -32 +; ASM-OPT-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; ASM-OPT-NEXT: li a0, 1234 +; ASM-OPT-NEXT: sd a0, 8(csp) +; ASM-OPT-NEXT: addiy ca0, csp, 0 +; ASM-OPT-NEXT: ybndsiw ca0, ca0, 16 +; ASM-OPT-NEXT: call use_alloca +; ASM-OPT-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; ASM-OPT-NEXT: addiy csp, csp, 32 +; ASM-OPT-NEXT: .LBB0_2: # %exit +; ASM-OPT-NEXT: li a0, 123 +; ASM-OPT-NEXT: ret +; CHECK-LABEL: define i32 @alloca_in_entry +; CHECK-SAME: (i1 [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [16 x i8], align 16, addrspace(200) +; CHECK-NEXT: br i1 [[ARG]], label [[DO_ALLOCA:%.*]], label [[EXIT:%.*]] +; CHECK: do_alloca: +; CHECK-NEXT: br label [[USE_ALLOCA_NO_BOUNDS:%.*]] +; CHECK: use_alloca_no_bounds: +; CHECK-NEXT: [[PTR_PLUS_ONE:%.*]] = getelementptr i64, ptr addrspace(200) [[ALLOCA]], i64 1 +; CHECK-NEXT: store i64 1234, ptr addrspace(200) [[PTR_PLUS_ONE]], align 8 +; CHECK-NEXT: br label [[USE_ALLOCA_NEED_BOUNDS:%.*]] +; CHECK: use_alloca_need_bounds: +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i64(ptr addrspace(200) [[ALLOCA]], i64 16) +; CHECK-NEXT: [[DOTSUB_LE:%.*]] = getelementptr inbounds [16 x i8], ptr addrspace(200) [[TMP0]], i64 0, i64 0 +; CHECK-NEXT: [[CALL:%.*]] = call signext i32 @use_alloca(ptr addrspace(200) [[DOTSUB_LE]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 123 +; +entry: + %alloca = alloca [16 x i8], align 16, addrspace(200) + br i1 %arg, label %do_alloca, label %exit + +do_alloca: ; preds = %entry + br label %use_alloca_no_bounds + +use_alloca_no_bounds: ; preds = %do_alloca + %ptr_plus_one = getelementptr i64, ptr addrspace(200) %alloca, i64 1 + store i64 1234, ptr addrspace(200) %ptr_plus_one, align 8 + br label %use_alloca_need_bounds + +use_alloca_need_bounds: ; preds = %use_alloca_no_bounds + %.sub.le = getelementptr inbounds [16 x i8], ptr addrspace(200) %alloca, i64 0, i64 0 + %call = call signext i32 @use_alloca(ptr addrspace(200) %.sub.le) + br label %exit + +exit: ; preds = %use_alloca_need_bounds, %entry + ret i32 123 +} + +define i32 @alloca_not_in_entry(i1 %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: alloca_not_in_entry: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -64 +; ASM-NEXT: sy cra, 48(csp) # 16-byte Folded Spill +; ASM-NEXT: sy cs0, 32(csp) # 16-byte Folded Spill +; ASM-NEXT: addiy cs0, csp, 64 +; ASM-NEXT: mv a1, a0 +; ASM-NEXT: andi a0, a1, 1 +; ASM-NEXT: beqz a0, .LBB1_4 +; ASM-NEXT: j .LBB1_1 +; ASM-NEXT: .LBB1_1: # %do_alloca +; ASM-NEXT: ymv ca0, csp +; ASM-NEXT: mv a1, a0 +; ASM-NEXT: addi a1, a1, -16 +; ASM-NEXT: yaddrw ca1, ca0, a1 +; ASM-NEXT: li a0, 16 +; ASM-NEXT: ybndsrw ca0, ca1, a0 +; ASM-NEXT: sy ca0, -64(cs0) # 16-byte Folded Spill +; ASM-NEXT: ymv csp, ca1 +; ASM-NEXT: ybndsiw ca0, ca0, 16 +; ASM-NEXT: sy ca0, -48(cs0) # 16-byte Folded Spill +; ASM-NEXT: j .LBB1_2 +; ASM-NEXT: .LBB1_2: # %use_alloca_no_bounds +; ASM-NEXT: ly ca1, -64(cs0) # 16-byte Folded Reload +; ASM-NEXT: li a0, 1234 +; ASM-NEXT: sd a0, 8(ca1) +; ASM-NEXT: j .LBB1_3 +; ASM-NEXT: .LBB1_3: # %use_alloca_need_bounds +; ASM-NEXT: ly ca0, -48(cs0) # 16-byte Folded Reload +; ASM-NEXT: call use_alloca +; ASM-NEXT: j .LBB1_4 +; ASM-NEXT: .LBB1_4: # %exit +; ASM-NEXT: li a0, 123 +; ASM-NEXT: addiy csp, cs0, -64 +; ASM-NEXT: ly cra, 48(csp) # 16-byte Folded Reload +; ASM-NEXT: ly cs0, 32(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 64 +; ASM-NEXT: ret +; +; ASM-OPT-LABEL: alloca_not_in_entry: +; ASM-OPT: # %bb.0: # %entry +; ASM-OPT-NEXT: andi a0, a0, 1 +; ASM-OPT-NEXT: beqz a0, .LBB1_2 +; ASM-OPT-NEXT: # %bb.1: # %do_alloca +; ASM-OPT-NEXT: addiy csp, csp, -32 +; ASM-OPT-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; ASM-OPT-NEXT: sy cs0, 0(csp) # 16-byte Folded Spill +; ASM-OPT-NEXT: addiy cs0, csp, 32 +; ASM-OPT-NEXT: addi a0, sp, -16 +; ASM-OPT-NEXT: yaddrw ca0, csp, a0 +; ASM-OPT-NEXT: li a1, 16 +; ASM-OPT-NEXT: ybndsrw ca1, ca0, a1 +; ASM-OPT-NEXT: ymv csp, ca0 +; ASM-OPT-NEXT: ybndsiw ca0, ca1, 16 +; ASM-OPT-NEXT: li a2, 1234 +; ASM-OPT-NEXT: sd a2, 8(ca1) +; ASM-OPT-NEXT: call use_alloca +; ASM-OPT-NEXT: addiy csp, cs0, -32 +; ASM-OPT-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; ASM-OPT-NEXT: ly cs0, 0(csp) # 16-byte Folded Reload +; ASM-OPT-NEXT: addiy csp, csp, 32 +; ASM-OPT-NEXT: .LBB1_2: # %exit +; ASM-OPT-NEXT: li a0, 123 +; ASM-OPT-NEXT: ret +; CHECK-LABEL: define i32 @alloca_not_in_entry +; CHECK-SAME: (i1 [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[ARG]], label [[DO_ALLOCA:%.*]], label [[EXIT:%.*]] +; CHECK: do_alloca: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [16 x i8], align 16, addrspace(200) +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.dynamic.i64(ptr addrspace(200) [[ALLOCA]], i64 16) +; CHECK-NEXT: br label [[USE_ALLOCA_NO_BOUNDS:%.*]] +; CHECK: use_alloca_no_bounds: +; CHECK-NEXT: [[PTR_PLUS_ONE:%.*]] = getelementptr i64, ptr addrspace(200) [[ALLOCA]], i64 1 +; CHECK-NEXT: store i64 1234, ptr addrspace(200) [[PTR_PLUS_ONE]], align 8 +; CHECK-NEXT: br label [[USE_ALLOCA_NEED_BOUNDS:%.*]] +; CHECK: use_alloca_need_bounds: +; CHECK-NEXT: [[DOTSUB_LE:%.*]] = getelementptr inbounds [16 x i8], ptr addrspace(200) [[TMP0]], i64 0, i64 0 +; CHECK-NEXT: [[CALL:%.*]] = call signext i32 @use_alloca(ptr addrspace(200) [[DOTSUB_LE]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 123 +; +entry: + br i1 %arg, label %do_alloca, label %exit + +do_alloca: ; preds = %entry + %alloca = alloca [16 x i8], align 16, addrspace(200) + br label %use_alloca_no_bounds + +use_alloca_no_bounds: ; preds = %do_alloca + %ptr_plus_one = getelementptr i64, ptr addrspace(200) %alloca, i64 1 + store i64 1234, ptr addrspace(200) %ptr_plus_one, align 8 + br label %use_alloca_need_bounds + +use_alloca_need_bounds: ; preds = %use_alloca_no_bounds + %.sub.le = getelementptr inbounds [16 x i8], ptr addrspace(200) %alloca, i64 0, i64 0 + %call = call signext i32 @use_alloca(ptr addrspace(200) %.sub.le) + br label %exit + +exit: ; preds = %use_alloca_need_bounds, %entry + ret i32 123 +} + +; The original reduced test case from libc/gen/exec.c +; We can't use llvm.cheri.bounded.stack.cap.i64 here, since that only works for static allocas: +define i32 @crash_reproducer(i1 %arg) local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: crash_reproducer: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -64 +; ASM-NEXT: sy cra, 48(csp) # 16-byte Folded Spill +; ASM-NEXT: sy cs0, 32(csp) # 16-byte Folded Spill +; ASM-NEXT: addiy cs0, csp, 64 +; ASM-NEXT: mv a1, a0 +; ASM-NEXT: andi a0, a1, 1 +; ASM-NEXT: beqz a0, .LBB2_2 +; ASM-NEXT: j .LBB2_1 +; ASM-NEXT: .LBB2_1: # %entry.while.end_crit_edge +; ASM-NEXT: .LBB2_2: # %while.body +; ASM-NEXT: ymv ca0, csp +; ASM-NEXT: mv a1, a0 +; ASM-NEXT: addi a1, a1, -16 +; ASM-NEXT: yaddrw ca1, ca0, a1 +; ASM-NEXT: li a0, 16 +; ASM-NEXT: ybndsrw ca0, ca1, a0 +; ASM-NEXT: ymv csp, ca1 +; ASM-NEXT: ybndsiw ca0, ca0, 16 +; ASM-NEXT: sy ca0, -48(cs0) # 16-byte Folded Spill +; ASM-NEXT: j .LBB2_3 +; ASM-NEXT: .LBB2_3: # %while.end.loopexit +; ASM-NEXT: ly ca0, -48(cs0) # 16-byte Folded Reload +; ASM-NEXT: sy ca0, -64(cs0) # 16-byte Folded Spill +; ASM-NEXT: j .LBB2_4 +; ASM-NEXT: .LBB2_4: # %while.end +; ASM-NEXT: ly ca0, -64(cs0) # 16-byte Folded Reload +; ASM-NEXT: call use_alloca +; ASM-NEXT: addiw a0, a0, 1234 +; ASM-NEXT: addiy csp, cs0, -64 +; ASM-NEXT: ly cra, 48(csp) # 16-byte Folded Reload +; ASM-NEXT: ly cs0, 32(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 64 +; ASM-NEXT: ret +; +; ASM-OPT-LABEL: crash_reproducer: +; ASM-OPT: # %bb.0: # %entry +; ASM-OPT-NEXT: andi a0, a0, 1 +; ASM-OPT-NEXT: bnez a0, .LBB2_2 +; ASM-OPT-NEXT: # %bb.1: # %while.body +; ASM-OPT-NEXT: addiy csp, csp, -32 +; ASM-OPT-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; ASM-OPT-NEXT: sy cs0, 0(csp) # 16-byte Folded Spill +; ASM-OPT-NEXT: addiy cs0, csp, 32 +; ASM-OPT-NEXT: addi a0, sp, -16 +; ASM-OPT-NEXT: yaddrw ca0, csp, a0 +; ASM-OPT-NEXT: li a1, 16 +; ASM-OPT-NEXT: ybndsrw ca1, ca0, a1 +; ASM-OPT-NEXT: ymv csp, ca0 +; ASM-OPT-NEXT: ybndsiw ca0, ca1, 16 +; ASM-OPT-NEXT: call use_alloca +; ASM-OPT-NEXT: addiw a0, a0, 1234 +; ASM-OPT-NEXT: addiy csp, cs0, -32 +; ASM-OPT-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; ASM-OPT-NEXT: ly cs0, 0(csp) # 16-byte Folded Reload +; ASM-OPT-NEXT: addiy csp, csp, 32 +; ASM-OPT-NEXT: ret +; ASM-OPT-NEXT: .LBB2_2: # %entry.while.end_crit_edge +; CHECK-LABEL: define i32 @crash_reproducer +; CHECK-SAME: (i1 [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[ARG]], label [[ENTRY_WHILE_END_CRIT_EDGE:%.*]], label [[WHILE_BODY:%.*]] +; CHECK: entry.while.end_crit_edge: +; CHECK-NEXT: unreachable +; CHECK: while.body: +; CHECK-NEXT: [[TMP0:%.*]] = alloca [16 x i8], align 16, addrspace(200) +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.dynamic.i64(ptr addrspace(200) [[TMP0]], i64 16) +; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] +; CHECK: while.end.loopexit: +; CHECK-NEXT: [[DOTSUB_LE:%.*]] = getelementptr inbounds [16 x i8], ptr addrspace(200) [[TMP1]], i64 0, i64 0 +; CHECK-NEXT: br label [[WHILE_END:%.*]] +; CHECK: while.end: +; CHECK-NEXT: [[CALL:%.*]] = call signext i32 @use_alloca(ptr addrspace(200) [[DOTSUB_LE]]) +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[CALL]], 1234 +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + br i1 %arg, label %entry.while.end_crit_edge, label %while.body + +entry.while.end_crit_edge: ; preds = %entry + unreachable + +while.body: ; preds = %entry + %0 = alloca [16 x i8], align 16, addrspace(200) + br label %while.end.loopexit + +while.end.loopexit: ; preds = %while.body + %.sub.le = getelementptr inbounds [16 x i8], ptr addrspace(200) %0, i64 0, i64 0 + br label %while.end + +while.end: ; preds = %while.end.loopexit + %call = call signext i32 @use_alloca(ptr addrspace(200) %.sub.le) + %result = add i32 %call, 1234 + ret i32 %result +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-opaque-spill-too-early.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-opaque-spill-too-early.ll new file mode 100644 index 0000000000000..7aecc6be24c15 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-opaque-spill-too-early.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/stack-bounds-opaque-spill-too-early.ll +;; After merging to LLVM 15 the stack bounds pass the switch to opqaue pointers caused +;; miscompilations in the stack bounding pass (the unbounded value was used instead of +;; the bounded one due to the removal of the bitcast instructions). +; REQUIRES: asserts +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -cheri-bound-allocas -o - -S %s -debug-only=cheri-bound-allocas 2>%t.dbg| FileCheck %s +; RUN: FileCheck %s -input-file=%t.dbg -check-prefix DBG +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - | FileCheck %s -check-prefix ASM +target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128-A200-P200-G200" + +; DBG-LABEL: Checking function lazy_bind_args +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): %0 = load ptr addrspace(200), ptr addrspace(200) %cap, align 16 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=16, alloca size=16, current GEP offset=0 for ptr addrspace(200) +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for %0 = load ptr addrspace(200), ptr addrspace(200) %cap, align 16 +; DBG-NEXT: cheri-bound-allocas: -Adding stack bounds since it is passed to call: %call = call ptr addrspace(200) @cheribsdtest_dynamic_identity_cap(ptr addrspace(200) noundef nonnull %cap) +; DBG-NEXT: cheri-bound-allocas: Found alloca use that needs bounds: %call = call ptr addrspace(200) @cheribsdtest_dynamic_identity_cap(ptr addrspace(200) noundef nonnull %cap) +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): store ptr addrspace(200) %cap, ptr addrspace(200) %cap, align 16 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=16, alloca size=16, current GEP offset=0 for ptr addrspace(200) +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for store ptr addrspace(200) %cap, ptr addrspace(200) %cap, align 16 +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): store ptr addrspace(200) %cap, ptr addrspace(200) %cap, align 16 +; DBG-NEXT: cheri-bound-allocas: -Stack slot used as value and not pointer -> must set bounds +; DBG-NEXT: cheri-bound-allocas: Found alloca use that needs bounds: store ptr addrspace(200) %cap, ptr addrspace(200) %cap, align 16 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.start.p200(i64 16, ptr addrspace(200) nonnull %cap) +; DBG-NEXT: cheri-bound-allocas: lazy_bind_args: 2 of 5 users need bounds for %cap = alloca ptr addrspace(200), align 16, addrspace(200) +; DBG-NEXT: lazy_bind_args: setting bounds on stack alloca to 16 %cap = alloca ptr addrspace(200), align 16, addrspace(200) + +declare void @llvm.lifetime.start.p200(i64 immarg, ptr addrspace(200) nocapture) addrspace(200) + +declare ptr addrspace(200) @cheribsdtest_dynamic_identity_cap(ptr addrspace(200) noundef) addrspace(200) + +declare void @cheribsdtest_check_cap_eq(ptr addrspace(200) noundef, ptr addrspace(200) noundef) addrspace(200) + +define dso_local void @lazy_bind_args() addrspace(200) nounwind { +; ASM-LABEL: lazy_bind_args: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -32 +; ASM-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; ASM-NEXT: addiy ca0, csp, 0 +; ASM-NEXT: ybndsiw ca0, ca0, 16 +; ASM-NEXT: sy ca0, 0(csp) +; ASM-NEXT: call cheribsdtest_dynamic_identity_cap +; ASM-NEXT: ly ca1, 0(csp) +; ASM-NEXT: ymv ca2, ca0 +; ASM-NEXT: ymv ca0, ca1 +; ASM-NEXT: ymv ca1, ca2 +; ASM-NEXT: call cheribsdtest_check_cap_eq +; ASM-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 32 +; ASM-NEXT: ret +; CHECK-LABEL: define dso_local void @lazy_bind_args +; CHECK-SAME: () addrspace(200) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CAP:%.*]] = alloca ptr addrspace(200), align 16, addrspace(200) +; CHECK-NEXT: call void @llvm.lifetime.start.p200(i64 16, ptr addrspace(200) nonnull [[CAP]]) +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i64(ptr addrspace(200) [[CAP]], i64 16) +; CHECK-NEXT: store ptr addrspace(200) [[TMP0]], ptr addrspace(200) [[CAP]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i64(ptr addrspace(200) [[CAP]], i64 16) +; CHECK-NEXT: [[CALL:%.*]] = call ptr addrspace(200) @cheribsdtest_dynamic_identity_cap(ptr addrspace(200) noundef nonnull [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(200), ptr addrspace(200) [[CAP]], align 16 +; CHECK-NEXT: call void @cheribsdtest_check_cap_eq(ptr addrspace(200) noundef [[TMP2]], ptr addrspace(200) noundef [[CALL]]) +; CHECK-NEXT: ret void +; +entry: + %cap = alloca ptr addrspace(200), align 16, addrspace(200) + call void @llvm.lifetime.start.p200(i64 16, ptr addrspace(200) nonnull %cap) + store ptr addrspace(200) %cap, ptr addrspace(200) %cap, align 16 + %call = call ptr addrspace(200) @cheribsdtest_dynamic_identity_cap(ptr addrspace(200) noundef nonnull %cap) + %0 = load ptr addrspace(200), ptr addrspace(200) %cap, align 16 + call void @cheribsdtest_check_cap_eq(ptr addrspace(200) noundef %0, ptr addrspace(200) noundef %call) + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-pass-phi.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-pass-phi.ll new file mode 100644 index 0000000000000..3a501f62c1e97 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-bounds-pass-phi.ll @@ -0,0 +1,169 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/stack-bounds-pass-phi.ll +; REQUIRES: asserts +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -cheri-bound-allocas %s -o - -S -cheri-stack-bounds=if-needed \ +; RUN: -cheri-stack-bounds-single-intrinsic-threshold=10 -debug-only=cheri-bound-allocas 2>%t.dbg | FileCheck %s +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -cheri-stack-bounds=if-needed -O2 -cheri-stack-bounds-single-intrinsic-threshold=10 < %s | %cheri_FileCheck %s -check-prefix ASM +; RUN: FileCheck %s -check-prefix DBG -input-file=%t.dbg +target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128-A200-P200-G200" + +declare void @foo(ptr addrspace(200)) addrspace(200) + +; Check that we don't attempt to insert stack bounds intrinisics before the PHI at the start of a basic block: +define void @test_phi(i1 %cond) addrspace(200) nounwind { +; ASM-LABEL: test_phi: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -48 +; ASM-NEXT: sy cra, 32(csp) # 16-byte Folded Spill +; ASM-NEXT: sy cs0, 16(csp) # 16-byte Folded Spill +; ASM-NEXT: andi a0, a0, 1 +; ASM-NEXT: beqz a0, .LBB0_2 +; ASM-NEXT: # %bb.1: # %block1 +; ASM-NEXT: ymv ca0, cnull +; ASM-NEXT: li a1, 1 +; ASM-NEXT: sw a1, 12(csp) +; ASM-NEXT: li a1, 2 +; ASM-NEXT: sw a1, 8(csp) +; ASM-NEXT: li a1, 3 +; ASM-NEXT: sw a1, 4(csp) +; ASM-NEXT: addiy ca1, csp, 8 +; ASM-NEXT: j .LBB0_3 +; ASM-NEXT: .LBB0_2: # %block2 +; ASM-NEXT: li a0, 4 +; ASM-NEXT: sw a0, 12(csp) +; ASM-NEXT: li a0, 5 +; ASM-NEXT: sw a0, 8(csp) +; ASM-NEXT: li a0, 6 +; ASM-NEXT: sw a0, 4(csp) +; ASM-NEXT: addiy ca0, csp, 12 +; ASM-NEXT: ybndsiw ca0, ca0, 4 +; ASM-NEXT: addiy ca1, csp, 4 +; ASM-NEXT: .LBB0_3: # %phi_block +; ASM-NEXT: ybndsiw cs0, ca1, 4 +; ASM-NEXT: call foo +; ASM-NEXT: ymv ca0, cs0 +; ASM-NEXT: call foo +; ASM-NEXT: ly cra, 32(csp) # 16-byte Folded Reload +; ASM-NEXT: ly cs0, 16(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 48 +; ASM-NEXT: ret +; CHECK-LABEL: define void @test_phi +; CHECK-SAME: (i1 [[COND:%.*]]) addrspace(200) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: [[ALLOCA2:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: [[ALLOCA3:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: br i1 [[COND]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]] +; CHECK: block1: +; CHECK-NEXT: store i32 1, ptr addrspace(200) [[ALLOCA1]], align 4 +; CHECK-NEXT: store i32 2, ptr addrspace(200) [[ALLOCA2]], align 4 +; CHECK-NEXT: store i32 3, ptr addrspace(200) [[ALLOCA3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i64(ptr addrspace(200) [[ALLOCA2]], i64 4) +; CHECK-NEXT: br label [[PHI_BLOCK:%.*]] +; CHECK: block2: +; CHECK-NEXT: store i32 4, ptr addrspace(200) [[ALLOCA1]], align 4 +; CHECK-NEXT: store i32 5, ptr addrspace(200) [[ALLOCA2]], align 4 +; CHECK-NEXT: store i32 6, ptr addrspace(200) [[ALLOCA3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i64(ptr addrspace(200) [[ALLOCA1]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i64(ptr addrspace(200) [[ALLOCA3]], i64 4) +; CHECK-NEXT: br label [[PHI_BLOCK]] +; CHECK: phi_block: +; CHECK-NEXT: [[VAL1:%.*]] = phi ptr addrspace(200) [ null, [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ] +; CHECK-NEXT: [[VAL2:%.*]] = phi ptr addrspace(200) [ [[TMP0]], [[BLOCK1]] ], [ [[TMP2]], [[BLOCK2]] ] +; CHECK-NEXT: call void @foo(ptr addrspace(200) [[VAL1]]) +; CHECK-NEXT: call void @foo(ptr addrspace(200) [[VAL2]]) +; CHECK-NEXT: ret void +; +entry: + %alloca1 = alloca i32, align 4, addrspace(200) + %alloca2 = alloca i32, align 4, addrspace(200) + %alloca3 = alloca i32, align 4, addrspace(200) + br i1 %cond, label %block1, label %block2 + +block1: + store i32 1, ptr addrspace(200) %alloca1, align 4 + store i32 2, ptr addrspace(200) %alloca2, align 4 + store i32 3, ptr addrspace(200) %alloca3, align 4 + br label %phi_block + +block2: + store i32 4, ptr addrspace(200) %alloca1, align 4 + store i32 5, ptr addrspace(200) %alloca2, align 4 + store i32 6, ptr addrspace(200) %alloca3, align 4 + br label %phi_block + +phi_block: + %val1 = phi ptr addrspace(200) [ null, %block1 ], [ %alloca1, %block2 ] + %val2 = phi ptr addrspace(200) [ %alloca2, %block1 ], [ %alloca3, %block2 ] + call void @foo(ptr addrspace(200) %val1) + call void @foo(ptr addrspace(200) %val2) + ret void +} + +; Check that we don't place all bounded allocas in the entry block, instead only do it in the predecessor +define void @test_only_created_in_predecessor_block(i1 %cond) addrspace(200) nounwind { +; ASM-LABEL: test_only_created_in_predecessor_block: +; ASM: # %bb.0: # %entry +; ASM-NEXT: addiy csp, csp, -32 +; ASM-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; ASM-NEXT: andi a0, a0, 1 +; ASM-NEXT: beqz a0, .LBB1_2 +; ASM-NEXT: # %bb.1: # %block1 +; ASM-NEXT: li a0, 1 +; ASM-NEXT: sw a0, 12(csp) +; ASM-NEXT: addiy ca0, csp, 12 +; ASM-NEXT: j .LBB1_3 +; ASM-NEXT: .LBB1_2: # %block2 +; ASM-NEXT: li a0, 5 +; ASM-NEXT: sw a0, 8(csp) +; ASM-NEXT: addiy ca0, csp, 8 +; ASM-NEXT: .LBB1_3: # %phi_block +; ASM-NEXT: ybndsiw ca0, ca0, 4 +; ASM-NEXT: call foo +; ASM-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 32 +; ASM-NEXT: ret +; CHECK-LABEL: define void @test_only_created_in_predecessor_block +; CHECK-SAME: (i1 [[COND:%.*]]) addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: [[ALLOCA2:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: br i1 [[COND]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]] +; CHECK: block1: +; CHECK-NEXT: store i32 1, ptr addrspace(200) [[ALLOCA1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i64(ptr addrspace(200) [[ALLOCA1]], i64 4) +; CHECK-NEXT: br label [[PHI_BLOCK:%.*]] +; CHECK: block2: +; CHECK-NEXT: store i32 5, ptr addrspace(200) [[ALLOCA2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i64(ptr addrspace(200) [[ALLOCA2]], i64 4) +; CHECK-NEXT: br label [[PHI_BLOCK]] +; CHECK: phi_block: +; CHECK-NEXT: [[VAL1:%.*]] = phi ptr addrspace(200) [ [[TMP0]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ] +; CHECK-NEXT: call void @foo(ptr addrspace(200) [[VAL1]]) +; CHECK-NEXT: ret void +; +entry: + %alloca1 = alloca i32, align 4, addrspace(200) + %alloca2 = alloca i32, align 4, addrspace(200) + br i1 %cond, label %block1, label %block2 + +block1: + store i32 1, ptr addrspace(200) %alloca1, align 4 + br label %phi_block + +block2: + store i32 5, ptr addrspace(200) %alloca2, align 4 + br label %phi_block + +phi_block: + %val1 = phi ptr addrspace(200) [ %alloca1, %block1 ], [ %alloca2, %block2 ] + call void @foo(ptr addrspace(200) %val1) + ret void +} + +; DBG: -Adding stack bounds since phi user needs bounds: call void @foo(ptr addrspace(200) %val1) +; DBG: test_phi: 1 of 3 users need bounds for %alloca1 = alloca i32, align 4, addrspace(200) +; DBG: -Adding stack bounds since phi user needs bounds: call void @foo(ptr addrspace(200) %val2) +; DBG: test_phi: 1 of 3 users need bounds for %alloca2 = alloca i32, align 4, addrspace(200) +; DBG: -Adding stack bounds since phi user needs bounds: call void @foo(ptr addrspace(200) %val2) +; DBG: test_phi: 1 of 3 users need bounds for %alloca3 = alloca i32, align 4, addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-spill-unnecessary.c.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-spill-unnecessary.c.ll new file mode 100644 index 0000000000000..df4f3327d7947 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stack-spill-unnecessary.c.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/stack-spill-unnecessary.c.ll +; The new CheriBoundedStackPseudo instruction lets us pretend that the incoffset+csetbounds +; is a single trivially rematerizable instruction so it can freely move it around to avoid stack spills. +; Previously we were moving the allocation of the register that is only used later to the beginning of +; the function and saving+restoring it instead of materializing it just before + +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -O2 --cheri-stack-bounds-single-intrinsic-threshold=0 < %s | %cheri_FileCheck %s --check-prefixes=CHECK +; Always use a single intrinsic for the calls (should result in same codegen) +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -O2 --cheri-stack-bounds-single-intrinsic-threshold=0 < %s | %cheri_FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/addrspace(200)/addrspace(0)/g' %s | llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d | FileCheck --check-prefix HYBRID %s + + +declare void @foo() addrspace(200) +declare void @one_arg(i32 addrspace(200)*) addrspace(200) +declare void @multi_arg(i32 addrspace(200)* %start, i32 addrspace(200)* %end, i8 addrspace(200)* %buf) addrspace(200) + +define void @use_after_call() addrspace(200) nounwind { +; CHECK-LABEL: use_after_call: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -48 +; CHECK-NEXT: sy cra, 32(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs0, 16(csp) # 16-byte Folded Spill +; CHECK-NEXT: addiy ca0, csp, 12 +; CHECK-NEXT: ybndsiw cs0, ca0, 4 +; CHECK-NEXT: li a0, 123 +; CHECK-NEXT: sw a0, 12(csp) +; CHECK-NEXT: call foo +; CHECK-NEXT: ymv ca0, cs0 +; CHECK-NEXT: call one_arg +; CHECK-NEXT: ly cra, 32(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs0, 16(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 48 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: use_after_call: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: li a0, 123 +; HYBRID-NEXT: sw a0, 4(sp) +; HYBRID-NEXT: call foo@plt +; HYBRID-NEXT: addi a0, sp, 4 +; HYBRID-NEXT: call one_arg@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %x = alloca i32, align 4, addrspace(200) + store i32 123, i32 addrspace(200)* %x, align 4 + call void @foo() + call void @one_arg(i32 addrspace(200)* %x) + ret void +} + +define void @use_after_call_no_store() addrspace(200) nounwind { +; CHECK-LABEL: use_after_call_no_store: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -64 +; CHECK-NEXT: sy cra, 48(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs0, 32(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs1, 16(csp) # 16-byte Folded Spill +; CHECK-NEXT: addiy ca0, csp, 12 +; CHECK-NEXT: ybndsiw cs0, ca0, 4 +; CHECK-NEXT: addiy ca0, csp, 8 +; CHECK-NEXT: ybndsiw cs1, ca0, 4 +; CHECK-NEXT: call foo +; CHECK-NEXT: ymv ca0, cs0 +; CHECK-NEXT: call one_arg +; CHECK-NEXT: ymv ca0, cs1 +; CHECK-NEXT: call one_arg +; CHECK-NEXT: ly cra, 48(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs0, 32(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs1, 16(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 64 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: use_after_call_no_store: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: call foo@plt +; HYBRID-NEXT: addi a0, sp, 4 +; HYBRID-NEXT: call one_arg@plt +; HYBRID-NEXT: mv a0, sp +; HYBRID-NEXT: call one_arg@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %x = alloca i32, align 4, addrspace(200) + %y = alloca i32, align 4, addrspace(200) + call void @foo() + call void @one_arg(i32 addrspace(200)* %x) + call void @one_arg(i32 addrspace(200)* %y) + ret void +} + +define void @multi_use() addrspace(200) nounwind { +; CHECK-LABEL: multi_use: +; CHECK: # %bb.0: +; CHECK-NEXT: addiy csp, csp, -64 +; CHECK-NEXT: sy cra, 48(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs0, 32(csp) # 16-byte Folded Spill +; CHECK-NEXT: sy cs1, 16(csp) # 16-byte Folded Spill +; CHECK-NEXT: addiy ca0, csp, 12 +; CHECK-NEXT: ybndsiw cs0, ca0, 4 +; CHECK-NEXT: addiy ca0, csp, 8 +; CHECK-NEXT: ybndsiw cs1, ca0, 4 +; CHECK-NEXT: call foo +; CHECK-NEXT: addiy ca1, cs1, 4 +; CHECK-NEXT: addiy ca2, cs1, 1 +; CHECK-NEXT: ymv ca0, cs1 +; CHECK-NEXT: call multi_arg +; CHECK-NEXT: ymv ca0, cs0 +; CHECK-NEXT: call one_arg +; CHECK-NEXT: ymv ca0, cs1 +; CHECK-NEXT: call one_arg +; CHECK-NEXT: ly cra, 48(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs0, 32(csp) # 16-byte Folded Reload +; CHECK-NEXT: ly cs1, 16(csp) # 16-byte Folded Reload +; CHECK-NEXT: addiy csp, csp, 64 +; CHECK-NEXT: ret +; +; HYBRID-LABEL: multi_use: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; HYBRID-NEXT: call foo@plt +; HYBRID-NEXT: addi a1, sp, 4 +; HYBRID-NEXT: addi a2, sp, 1 +; HYBRID-NEXT: mv a0, sp +; HYBRID-NEXT: call multi_arg@plt +; HYBRID-NEXT: addi a0, sp, 4 +; HYBRID-NEXT: call one_arg@plt +; HYBRID-NEXT: mv a0, sp +; HYBRID-NEXT: call one_arg@plt +; HYBRID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret + %y = alloca i32, align 4, addrspace(200) + %x = alloca i32, align 4, addrspace(200) + call void @foo() + %x_plus0 = getelementptr inbounds i32, i32 addrspace(200)* %x, i32 0 + %x_plus1 = getelementptr i32, i32 addrspace(200)* %x, i32 1 + %x_i8 = bitcast i32 addrspace(200)* %x to i8 addrspace(200)* + %x_i8_plus_1 = getelementptr inbounds i8, i8 addrspace(200)* %x_i8, i32 1 + call void @multi_arg(i32 addrspace(200)* %x_plus0, i32 addrspace(200)* %x_plus1, i8 addrspace(200)* %x_i8_plus_1) + call void @one_arg(i32 addrspace(200)* %y) + call void @one_arg(i32 addrspace(200)* %x) + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stackframe-intrinsics.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stackframe-intrinsics.ll new file mode 100644 index 0000000000000..1a535e17502f3 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/stackframe-intrinsics.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/stackframe-intrinsics.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - < %s | FileCheck %s --check-prefix=PURECAP +; RUN: sed 's/addrspace(200)/addrspace(0)/g' %s | llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d | FileCheck %s --check-prefix HYBRID +; Check that we can lower llvm.frameaddress/llvm.returnaddress + +; Capability-Inspection Instructions + +define dso_local ptr addrspace(200) @frameaddr() addrspace(200) nounwind { +; PURECAP-LABEL: frameaddr: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -48 +; PURECAP-NEXT: sy cra, 32(csp) # 16-byte Folded Spill +; PURECAP-NEXT: sy cs0, 16(csp) # 16-byte Folded Spill +; PURECAP-NEXT: addiy cs0, csp, 48 +; PURECAP-NEXT: sy cs0, -48(cs0) +; PURECAP-NEXT: ymv ca0, cs0 +; PURECAP-NEXT: ly cra, 32(csp) # 16-byte Folded Reload +; PURECAP-NEXT: ly cs0, 16(csp) # 16-byte Folded Reload +; PURECAP-NEXT: addiy csp, csp, 48 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: frameaddr: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -32 +; HYBRID-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; HYBRID-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; HYBRID-NEXT: addi s0, sp, 32 +; HYBRID-NEXT: sd s0, -32(s0) +; HYBRID-NEXT: mv a0, s0 +; HYBRID-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; HYBRID-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; HYBRID-NEXT: addi sp, sp, 32 +; HYBRID-NEXT: ret +entry: + %b = alloca ptr addrspace(200), align 16, addrspace(200) + %0 = call ptr addrspace(200) @llvm.frameaddress.p200(i32 0) + store ptr addrspace(200) %0, ptr addrspace(200) %b, align 16 + %1 = load ptr addrspace(200), ptr addrspace(200) %b, align 16 + ret ptr addrspace(200) %1 +} + +declare ptr addrspace(200) @llvm.frameaddress.p200(i32 immarg) addrspace(200) + +define dso_local ptr addrspace(200) @retaddr() addrspace(200) nounwind { +; PURECAP-LABEL: retaddr: +; PURECAP: # %bb.0: # %entry +; PURECAP-NEXT: addiy csp, csp, -16 +; PURECAP-NEXT: sy cra, 0(csp) +; PURECAP-NEXT: ymv ca0, cra +; PURECAP-NEXT: addiy csp, csp, 16 +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: retaddr: +; HYBRID: # %bb.0: # %entry +; HYBRID-NEXT: addi sp, sp, -16 +; HYBRID-NEXT: sd ra, 0(sp) +; HYBRID-NEXT: mv a0, ra +; HYBRID-NEXT: addi sp, sp, 16 +; HYBRID-NEXT: ret +entry: + %b = alloca ptr addrspace(200), align 16, addrspace(200) + %0 = call ptr addrspace(200) @llvm.returnaddress.p200(i32 0) + store ptr addrspace(200) %0, ptr addrspace(200) %b, align 16 + %1 = load ptr addrspace(200), ptr addrspace(200) %b, align 16 + ret ptr addrspace(200) %1 +} + +declare ptr addrspace(200) @llvm.returnaddress.p200(i32 immarg) addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/strcpy-to-memcpy-no-tags.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/strcpy-to-memcpy-no-tags.ll new file mode 100644 index 0000000000000..ac2cb507db707 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/strcpy-to-memcpy-no-tags.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/strcpy-to-memcpy-no-tags.ll +; Check that we can inline the loads/stores generated when simplifiying +; string libcalls to memcpy() (since it should be marked as non-tag-preserving). +; CHERI-GENERIC-UTC: llc +; Note: unlike other tests we do want to test attributes in this one. +; CHERI-GENERIC-UTC: opt --function-signature +; RUN: opt < %s -passes=instcombine -S | FileCheck %s --check-prefix=CHECK-IR +; RUN: opt < %s -passes=instcombine -S | llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d - -o - | FileCheck %s --check-prefix=CHECK-ASM +target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128-A200-P200-G200" + +@str = private unnamed_addr addrspace(200) constant [17 x i8] c"exactly 16 chars\00", align 8 + +declare ptr addrspace(200) @strcpy(ptr addrspace(200), ptr addrspace(200)) addrspace(200) +declare ptr addrspace(200) @stpcpy(ptr addrspace(200), ptr addrspace(200)) addrspace(200) +declare ptr addrspace(200) @strcat(ptr addrspace(200), ptr addrspace(200)) addrspace(200) +declare ptr addrspace(200) @strncpy(ptr addrspace(200), ptr addrspace(200), i64) addrspace(200) +declare ptr addrspace(200) @stpncpy(ptr addrspace(200), ptr addrspace(200), i64) addrspace(200) + +define void @test_strcpy_to_memcpy(ptr addrspace(200) align 8 %dst) addrspace(200) nounwind { +; CHECK-ASM-LABEL: test_strcpy_to_memcpy: +; CHECK-ASM: # %bb.0: # %entry +; CHECK-ASM-NEXT: .LBB0_1: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca1, %pcrel_hi(.LCPI0_0) +; CHECK-ASM-NEXT: addiy ca1, ca1, %pcrel_lo(.LBB0_1) +; CHECK-ASM-NEXT: ld a1, 0(ca1) +; CHECK-ASM-NEXT: .LBB0_2: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca2, %pcrel_hi(.LCPI0_1) +; CHECK-ASM-NEXT: addiy ca2, ca2, %pcrel_lo(.LBB0_2) +; CHECK-ASM-NEXT: ld a2, 0(ca2) +; CHECK-ASM-NEXT: sd a1, 8(ca0) +; CHECK-ASM-NEXT: sb zero, 16(ca0) +; CHECK-ASM-NEXT: sd a2, 0(ca0) +; CHECK-ASM-NEXT: ret +; CHECK-IR-LABEL: define void @test_strcpy_to_memcpy +; CHECK-IR-SAME: (ptr addrspace(200) align 8 [[DST:%.*]]) addrspace(200) #[[ATTR1:[0-9]+]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noundef nonnull align 8 dereferenceable(17) [[DST]], ptr addrspace(200) noundef nonnull align 8 dereferenceable(17) @str, i64 17, i1 false) #[[ATTR4:[0-9]+]] +; CHECK-IR-NEXT: ret void +; +entry: + %call = call ptr addrspace(200) @strcpy(ptr addrspace(200) %dst, ptr addrspace(200) @str) + ret void +} + +define void @test_stpcpy_to_memcpy(ptr addrspace(200) align 8 %dst) addrspace(200) nounwind { +; CHECK-ASM-LABEL: test_stpcpy_to_memcpy: +; CHECK-ASM: # %bb.0: # %entry +; CHECK-ASM-NEXT: .LBB1_1: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca1, %pcrel_hi(.LCPI1_0) +; CHECK-ASM-NEXT: addiy ca1, ca1, %pcrel_lo(.LBB1_1) +; CHECK-ASM-NEXT: ld a1, 0(ca1) +; CHECK-ASM-NEXT: .LBB1_2: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca2, %pcrel_hi(.LCPI1_1) +; CHECK-ASM-NEXT: addiy ca2, ca2, %pcrel_lo(.LBB1_2) +; CHECK-ASM-NEXT: ld a2, 0(ca2) +; CHECK-ASM-NEXT: sd a1, 8(ca0) +; CHECK-ASM-NEXT: sb zero, 16(ca0) +; CHECK-ASM-NEXT: sd a2, 0(ca0) +; CHECK-ASM-NEXT: ret +; CHECK-IR-LABEL: define void @test_stpcpy_to_memcpy +; CHECK-IR-SAME: (ptr addrspace(200) align 8 [[DST:%.*]]) addrspace(200) #[[ATTR1]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noundef nonnull align 8 dereferenceable(17) [[DST]], ptr addrspace(200) noundef nonnull align 8 dereferenceable(17) @str, i64 17, i1 false) #[[ATTR5:[0-9]+]] +; CHECK-IR-NEXT: ret void +; +entry: + %call = call ptr addrspace(200) @stpcpy(ptr addrspace(200) %dst, ptr addrspace(200) @str) + ret void +} + +define void @test_strcat_to_memcpy(ptr addrspace(200) align 8 %dst) addrspace(200) nounwind { +; CHECK-ASM-LABEL: test_strcat_to_memcpy: +; CHECK-ASM: # %bb.0: # %entry +; CHECK-ASM-NEXT: addiy csp, csp, -32 +; CHECK-ASM-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; CHECK-ASM-NEXT: sy cs0, 0(csp) # 16-byte Folded Spill +; CHECK-ASM-NEXT: ymv cs0, ca0 +; CHECK-ASM-NEXT: call strlen +; CHECK-ASM-NEXT: addy ca0, cs0, a0 +; CHECK-ASM-NEXT: .LBB2_1: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca1, %got_pcrel_hi(.Lstr) +; CHECK-ASM-NEXT: ly ca1, %pcrel_lo(.LBB2_1)(ca1) +; CHECK-ASM-NEXT: li a2, 17 +; CHECK-ASM-NEXT: call memcpy +; CHECK-ASM-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; CHECK-ASM-NEXT: ly cs0, 0(csp) # 16-byte Folded Reload +; CHECK-ASM-NEXT: addiy csp, csp, 32 +; CHECK-ASM-NEXT: ret +; CHECK-IR-LABEL: define void @test_strcat_to_memcpy +; CHECK-IR-SAME: (ptr addrspace(200) align 8 [[DST:%.*]]) addrspace(200) #[[ATTR1]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr addrspace(200) noundef nonnull dereferenceable(1) [[DST]]) +; CHECK-IR-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(200) [[DST]], i64 [[STRLEN]] +; CHECK-IR-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noundef nonnull align 1 dereferenceable(17) [[ENDPTR]], ptr addrspace(200) noundef nonnull align 8 dereferenceable(17) @str, i64 17, i1 false) #[[ATTR5]] +; CHECK-IR-NEXT: ret void +; +entry: + %call = call ptr addrspace(200) @strcat(ptr addrspace(200) %dst, ptr addrspace(200) @str) + ret void +} + + +define void @test_strncpy_to_memcpy(ptr addrspace(200) align 8 %dst) addrspace(200) nounwind { +; CHECK-ASM-LABEL: test_strncpy_to_memcpy: +; CHECK-ASM: # %bb.0: # %entry +; CHECK-ASM-NEXT: .LBB3_1: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca1, %pcrel_hi(.LCPI3_0) +; CHECK-ASM-NEXT: addiy ca1, ca1, %pcrel_lo(.LBB3_1) +; CHECK-ASM-NEXT: ld a1, 0(ca1) +; CHECK-ASM-NEXT: .LBB3_2: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca2, %pcrel_hi(.LCPI3_1) +; CHECK-ASM-NEXT: addiy ca2, ca2, %pcrel_lo(.LBB3_2) +; CHECK-ASM-NEXT: ld a2, 0(ca2) +; CHECK-ASM-NEXT: sd a1, 8(ca0) +; CHECK-ASM-NEXT: sb zero, 16(ca0) +; CHECK-ASM-NEXT: sd a2, 0(ca0) +; CHECK-ASM-NEXT: ret +; CHECK-IR-LABEL: define void @test_strncpy_to_memcpy +; CHECK-IR-SAME: (ptr addrspace(200) align 8 [[DST:%.*]]) addrspace(200) #[[ATTR1]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noundef nonnull align 8 dereferenceable(17) [[DST]], ptr addrspace(200) noundef nonnull align 8 dereferenceable(17) @str, i64 17, i1 false) #[[ATTR4]] +; CHECK-IR-NEXT: ret void +; +entry: + %call = call ptr addrspace(200) @strncpy(ptr addrspace(200) %dst, ptr addrspace(200) @str, i64 17) + ret void +} + +; Note: stpncpy is not handled by SimplifyLibcalls yet, so this should not be changed. +define void @test_stpncpy_to_memcpy(ptr addrspace(200) align 8 %dst) addrspace(200) nounwind { +; CHECK-ASM-LABEL: test_stpncpy_to_memcpy: +; CHECK-ASM: # %bb.0: # %entry +; CHECK-ASM-NEXT: .LBB4_1: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca1, %pcrel_hi(.LCPI4_0) +; CHECK-ASM-NEXT: addiy ca1, ca1, %pcrel_lo(.LBB4_1) +; CHECK-ASM-NEXT: ld a1, 0(ca1) +; CHECK-ASM-NEXT: .LBB4_2: # %entry +; CHECK-ASM-NEXT: # Label of block must be emitted +; CHECK-ASM-NEXT: auipcc ca2, %pcrel_hi(.LCPI4_1) +; CHECK-ASM-NEXT: addiy ca2, ca2, %pcrel_lo(.LBB4_2) +; CHECK-ASM-NEXT: ld a2, 0(ca2) +; CHECK-ASM-NEXT: sd a1, 8(ca0) +; CHECK-ASM-NEXT: sb zero, 16(ca0) +; CHECK-ASM-NEXT: sd a2, 0(ca0) +; CHECK-ASM-NEXT: ret +; CHECK-IR-LABEL: define void @test_stpncpy_to_memcpy +; CHECK-IR-SAME: (ptr addrspace(200) align 8 [[DST:%.*]]) addrspace(200) #[[ATTR1]] { +; CHECK-IR-NEXT: entry: +; CHECK-IR-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noundef nonnull align 8 dereferenceable(17) [[DST]], ptr addrspace(200) noundef nonnull align 8 dereferenceable(17) @str, i64 17, i1 false) #[[ATTR4]] +; CHECK-IR-NEXT: ret void +; +entry: + %call = call ptr addrspace(200) @stpncpy(ptr addrspace(200) %dst, ptr addrspace(200) @str, i64 17) + ret void +} + +; UTC_ARGS: --disable +; CHECK-IR: attributes #[[ATTR1]] = { nounwind } +; The no_preserve_cheri_tags should be attribute 4/5 in all cases +; CHECK-IR: attributes #[[ATTR4]] = { no_preserve_cheri_tags nounwind } +; CHECK-IR: attributes #[[ATTR5]] = { no_preserve_cheri_tags } diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/subobject-bounds-redundant-setbounds.c.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/subobject-bounds-redundant-setbounds.c.ll new file mode 100644 index 0000000000000..65af35735c106 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/subobject-bounds-redundant-setbounds.c.ll @@ -0,0 +1,317 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/subobject-bounds-redundant-setbounds.c.ll +; REQUIRES: asserts +; RUN: rm -f %t.dbg-opt %t.dbg-llc +; RUN: opt -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -cheri-bound-allocas -debug-only=cheri-bound-allocas -S -o - %s 2>%t.dbg-opt | FileCheck %s +; RUN: FileCheck %s -input-file=%t.dbg-opt -check-prefix DBG +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d -debug-only=cheri-bound-allocas -o - %s 2>%t.dbg-llc | FileCheck %s -check-prefix ASM +; RUN: FileCheck %s -input-file=%t.dbg-llc -check-prefix DBG +target datalayout = "e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128-A200-P200-G200" + +; created from the following C source code (when compiled with subobject bounds): +; void use(void* arg); +; +;void use_inline(int* arg) { +; *arg = 2; +;} +; +;int stack_array() { +; int array[10]; +; use(array); +; return array[5]; +;} +; +;int stack_int() { +; int value = 1; +; use(&value); +; return value; +;} +; +;int stack_int_inlined() { +; int value = 1; +; use_inline(&value); +; return value; +;} + + +define void @use_inline(ptr addrspace(200) nocapture %arg) local_unnamed_addr addrspace(200) { +; ASM-LABEL: use_inline: +; ASM: # %bb.0: +; ASM-NEXT: li a1, 2 +; ASM-NEXT: sw a1, 0(ca0) +; ASM-NEXT: ret +; CHECK-LABEL: define void @use_inline +; CHECK-SAME: (ptr addrspace(200) nocapture [[ARG:%.*]]) local_unnamed_addr addrspace(200) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: store i32 2, ptr addrspace(200) [[ARG]], align 4 +; CHECK-NEXT: ret void +; + store i32 2, ptr addrspace(200) %arg, align 4 + ret void +} + +define signext i32 @stack_array() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: stack_array: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -80 +; ASM-NEXT: sy cra, 64(csp) # 16-byte Folded Spill +; ASM-NEXT: sy cs0, 48(csp) # 16-byte Folded Spill +; ASM-NEXT: li a0, 40 +; ASM-NEXT: addiy ca1, csp, 8 +; ASM-NEXT: ybndsrw cs0, ca1, a0 +; ASM-NEXT: ymv ca0, cs0 +; ASM-NEXT: call use +; ASM-NEXT: lw a0, 20(cs0) +; ASM-NEXT: ly cra, 64(csp) # 16-byte Folded Reload +; ASM-NEXT: ly cs0, 48(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 80 +; ASM-NEXT: ret +; CHECK-LABEL: define signext i32 @stack_array +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [10 x i32], align 4, addrspace(200) +; CHECK-NEXT: call void @llvm.lifetime.start.p200(i64 40, ptr addrspace(200) nonnull [[ARRAY]]) +; CHECK-NEXT: [[BOUNDED:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull [[ARRAY]], i64 40) +; CHECK-NEXT: call void @use(ptr addrspace(200) [[BOUNDED]]) +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr addrspace(200) [[BOUNDED]], i64 20 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(200) [[ARRAYIDX]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p200(i64 40, ptr addrspace(200) nonnull [[ARRAY]]) +; CHECK-NEXT: ret i32 [[LD]] +; + %array = alloca [10 x i32], align 4, addrspace(200) + call void @llvm.lifetime.start.p200(i64 40, ptr addrspace(200) nonnull %array) + %bounded = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %array, i64 40) + call void @use(ptr addrspace(200) %bounded) + %arrayidx = getelementptr inbounds i8, ptr addrspace(200) %bounded, i64 20 + %ld = load i32, ptr addrspace(200) %arrayidx, align 4 + call void @llvm.lifetime.end.p200(i64 40, ptr addrspace(200) nonnull %array) + ret i32 %ld +} + +; DBG-LABEL: Checking function stack_array +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.end.p200(i64 40, ptr addrspace(200) nonnull %array) +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for use in setbounds with smaller or equal size: original size=40, setbounds size=40 current offset=0: %bounded = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %array, i64 40) +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.start.p200(i64 40, ptr addrspace(200) nonnull %array) +; DBG-NEXT: cheri-bound-allocas: stack_array: 0 of 3 users need bounds for %array = alloca [10 x i32], align 4, addrspace(200) +; DBG-NEXT: cheri-bound-allocas: No need to set bounds on stack alloca %array = alloca [10 x i32], align 4, addrspace(200) +; DBG-EMPTY: + +declare void @llvm.lifetime.start.p200(i64 immarg, ptr addrspace(200) nocapture) addrspace(200) + +declare void @use(ptr addrspace(200)) local_unnamed_addr addrspace(200) + +declare ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200), i64) addrspace(200) + +declare void @llvm.lifetime.end.p200(i64 immarg, ptr addrspace(200) nocapture) addrspace(200) + +define signext i32 @stack_int() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: stack_int: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -32 +; ASM-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; ASM-NEXT: li a0, 1 +; ASM-NEXT: sw a0, 12(csp) +; ASM-NEXT: li a0, 4 +; ASM-NEXT: addiy ca1, csp, 12 +; ASM-NEXT: ybndsrw ca0, ca1, a0 +; ASM-NEXT: call use +; ASM-NEXT: lw a0, 12(csp) +; ASM-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 32 +; ASM-NEXT: ret +; CHECK-LABEL: define signext i32 @stack_int +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: [[VALUE:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull [[VALUE]]) +; CHECK-NEXT: store i32 1, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: [[BOUNDED:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull [[VALUE]], i64 4) +; CHECK-NEXT: call void @use(ptr addrspace(200) [[BOUNDED]]) +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull [[VALUE]]) +; CHECK-NEXT: ret i32 [[LD]] +; + %value = alloca i32, align 4, addrspace(200) + call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull %value) + store i32 1, ptr addrspace(200) %value, align 4 + %bounded = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %value, i64 4) + call void @use(ptr addrspace(200) %bounded) + %ld = load i32, ptr addrspace(200) %value, align 4 + call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull %value) + ret i32 %ld +} + +; DBG-LABEL: Checking function stack_int +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull %value) +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for use in setbounds with smaller or equal size: original size=4, setbounds size=4 current offset=0: %bounded = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %value, i64 4) +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): store i32 1, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for store i32 1, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull %value) +; DBG-NEXT: cheri-bound-allocas: stack_int: 0 of 5 users need bounds for %value = alloca i32, align 4, addrspace(200) +; DBG-NEXT: cheri-bound-allocas: No need to set bounds on stack alloca %value = alloca i32, align 4, addrspace(200) +; DBG-EMPTY: + +define signext i32 @stack_int_inlined() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: stack_int_inlined: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: li a0, 1 +; ASM-NEXT: sw a0, 12(csp) +; ASM-NEXT: li a0, 4 +; ASM-NEXT: addiy ca1, csp, 12 +; ASM-NEXT: ybndsrw ca0, ca1, a0 +; ASM-NEXT: li a1, 2 +; ASM-NEXT: sw a1, 0(ca0) +; ASM-NEXT: lw a0, 12(csp) +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define signext i32 @stack_int_inlined +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: [[VALUE:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull [[VALUE]]) +; CHECK-NEXT: store i32 1, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: [[ADDRESS_WITH_BOUNDS:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull [[VALUE]], i64 4) +; CHECK-NEXT: store i32 2, ptr addrspace(200) [[ADDRESS_WITH_BOUNDS]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull [[VALUE]]) +; CHECK-NEXT: ret i32 [[LD]] +; + %value = alloca i32, align 4, addrspace(200) + call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull %value) + store i32 1, ptr addrspace(200) %value, align 4 + %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %value, i64 4) + store i32 2, ptr addrspace(200) %address.with.bounds, align 4 + %ld = load i32, ptr addrspace(200) %value, align 4 + call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull %value) + ret i32 %ld +} + +; DBG-LABEL: Checking function stack_int_inlined +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.end.p200(i64 4, ptr addrspace(200) nonnull %value) +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for use in setbounds with smaller or equal size: original size=4, setbounds size=4 current offset=0: %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %value, i64 4) +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): store i32 1, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for store i32 1, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for lifetime_{start,end}: call void @llvm.lifetime.start.p200(i64 4, ptr addrspace(200) nonnull %value) +; DBG-NEXT: cheri-bound-allocas: stack_int_inlined: 0 of 5 users need bounds for %value = alloca i32, align 4, addrspace(200) +; DBG-NEXT: cheri-bound-allocas: No need to set bounds on stack alloca %value = alloca i32, align 4, addrspace(200) +; DBG-EMPTY: + +define signext i32 @out_of_bounds_setbounds() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: out_of_bounds_setbounds: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: addiy ca0, csp, 12 +; ASM-NEXT: ybndsiw ca0, ca0, 4 +; ASM-NEXT: li a1, 5 +; ASM-NEXT: ybndsrw ca0, ca0, a1 +; ASM-NEXT: li a1, 2 +; ASM-NEXT: sw a1, 0(ca0) +; ASM-NEXT: lw a0, 12(csp) +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define signext i32 @out_of_bounds_setbounds +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: [[VALUE:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(200) @llvm.cheri.bounded.stack.cap.i64(ptr addrspace(200) [[VALUE]], i64 4) +; CHECK-NEXT: [[ADDRESS_WITH_BOUNDS:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull [[TMP1]], i64 5) +; CHECK-NEXT: store i32 2, ptr addrspace(200) [[ADDRESS_WITH_BOUNDS]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: ret i32 [[LD]] +; + %value = alloca i32, align 4, addrspace(200) + ; TOO big, cannot elide the setbonds: + %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %value, i64 5) + store i32 2, ptr addrspace(200) %address.with.bounds, align 4 + %ld = load i32, ptr addrspace(200) %value, align 4 + ret i32 %ld +} + +; DBG-NEXT: Checking function out_of_bounds_setbounds +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -out_of_bounds_setbounds: setbounds use offset OUT OF BOUNDS and will trap -> adding csetbounds: %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %value, i64 5) +; DBG-NEXT: cheri-bound-allocas: Found alloca use that needs bounds: %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %value, i64 5) +; DBG-NEXT: cheri-bound-allocas: out_of_bounds_setbounds: 1 of 2 users need bounds for %value = alloca i32, align 4, addrspace(200) +; DBG-NEXT: out_of_bounds_setbounds: setting bounds on stack alloca to 4 %value = alloca i32, align 4, addrspace(200) +; DBG-EMPTY: + +define signext i32 @setbounds_escapes() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: setbounds_escapes: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -32 +; ASM-NEXT: sy cra, 16(csp) # 16-byte Folded Spill +; ASM-NEXT: li a0, 4 +; ASM-NEXT: addiy ca1, csp, 12 +; ASM-NEXT: ybndsrw ca0, ca1, a0 +; ASM-NEXT: li a1, 2 +; ASM-NEXT: sw a1, 0(ca0) +; ASM-NEXT: call use +; ASM-NEXT: lw a0, 12(csp) +; ASM-NEXT: ly cra, 16(csp) # 16-byte Folded Reload +; ASM-NEXT: addiy csp, csp, 32 +; ASM-NEXT: ret +; CHECK-LABEL: define signext i32 @setbounds_escapes +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: [[VALUE:%.*]] = alloca i32, align 4, addrspace(200) +; CHECK-NEXT: [[ADDRESS_WITH_BOUNDS:%.*]] = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull [[VALUE]], i64 4) +; CHECK-NEXT: store i32 2, ptr addrspace(200) [[ADDRESS_WITH_BOUNDS]], align 4 +; CHECK-NEXT: call void @use(ptr addrspace(200) [[ADDRESS_WITH_BOUNDS]]) +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(200) [[VALUE]], align 4 +; CHECK-NEXT: ret i32 [[LD]] +; + %value = alloca i32, align 4, addrspace(200) + ; Too big, cannot elide the setbonds: + %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %value, i64 4) + store i32 2, ptr addrspace(200) %address.with.bounds, align 4 + call void @use(ptr addrspace(200) %address.with.bounds) + %ld = load i32, ptr addrspace(200) %value, align 4 + ret i32 %ld +} + +; DBG-NEXT: Checking function setbounds_escapes +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for %ld = load i32, ptr addrspace(200) %value, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for use in setbounds with smaller or equal size: original size=4, setbounds size=4 current offset=0: %address.with.bounds = call ptr addrspace(200) @llvm.cheri.cap.bounds.set.i64(ptr addrspace(200) nonnull %value, i64 4) +; DBG-NEXT: cheri-bound-allocas: setbounds_escapes: 0 of 2 users need bounds for %value = alloca i32, align 4, addrspace(200) +; DBG-NEXT: cheri-bound-allocas: No need to set bounds on stack alloca %value = alloca i32, align 4, addrspace(200) +; DBG-EMPTY: + +; llvm.assume() should not add bounds: +define void @assume_aligned() local_unnamed_addr addrspace(200) nounwind { +; ASM-LABEL: assume_aligned: +; ASM: # %bb.0: +; ASM-NEXT: addiy csp, csp, -16 +; ASM-NEXT: li a0, 1 +; ASM-NEXT: sw a0, 12(csp) +; ASM-NEXT: addiy csp, csp, 16 +; ASM-NEXT: ret +; CHECK-LABEL: define void @assume_aligned +; CHECK-SAME: () local_unnamed_addr addrspace(200) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca [4 x i8], align 4, addrspace(200) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(200) [[TMP1]], i64 4) ] +; CHECK-NEXT: store i32 1, ptr addrspace(200) [[TMP1]], align 4 +; CHECK-NEXT: ret void +; + %1 = alloca [4 x i8], align 4, addrspace(200) + call void @llvm.assume(i1 true) [ "align"([4 x i8] addrspace(200)* %1, i64 4) ] + store i32 1, ptr addrspace(200) %1 + ret void +} + +; DBG-NEXT: Checking function assume_aligned +; DBG-NEXT: cheri-bound-allocas: -Checking if load/store needs bounds (GEP offset is 0): store i32 1, ptr addrspace(200) %1, align 4 +; DBG-NEXT: cheri-bound-allocas: -Load/store size=4, alloca size=4, current GEP offset=0 for i32 +; DBG-NEXT: cheri-bound-allocas: -Load/store is in bounds -> can reuse $csp for store i32 1, ptr addrspace(200) %1, align 4 +; DBG-NEXT: cheri-bound-allocas: -No need for stack bounds for assume: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(200) %1, i64 4) ] +; DBG-NEXT: cheri-bound-allocas: assume_aligned: 0 of 2 users need bounds for %1 = alloca [4 x i8], align 4, addrspace(200) +; DBG-NEXT: cheri-bound-allocas: No need to set bounds on stack alloca %1 = alloca [4 x i8], align 4, addrspace(200) +; DBG-EMPTY: + +declare void @llvm.assume(i1) addrspace(200) diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/trunc-load.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/trunc-load.ll new file mode 100644 index 0000000000000..2d5554df5c129 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/trunc-load.ll @@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/trunc-load.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - < %s | FileCheck %s --check-prefix=PURECAP +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d -o - < %s | FileCheck %s --check-prefix=HYBRID + +define zeroext i16 @trunc_load_zext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_load_zext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lhu a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_load_zext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lhu a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = load i32, i32 addrspace(200)* %p + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +define signext i16 @trunc_load_sext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_load_sext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lh a0, 0(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_load_sext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lh a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = load i32, i32 addrspace(200)* %p + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +define zeroext i16 @trunc_load_gep_zext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_load_gep_zext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lhu a0, 4(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_load_gep_zext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addiy ca0, ca0, 4 +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lhu a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = getelementptr i32, i32 addrspace(200)* %p, i64 1 + %2 = load i32, i32 addrspace(200)* %1 + %3 = trunc i32 %2 to i16 + ret i16 %3 +} + +define signext i16 @trunc_load_gep_sext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_load_gep_sext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lh a0, 4(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_load_gep_sext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addiy ca0, ca0, 4 +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lh a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = getelementptr i32, i32 addrspace(200)* %p, i64 1 + %2 = load i32, i32 addrspace(200)* %1 + %3 = trunc i32 %2 to i16 + ret i16 %3 +} + +define zeroext i16 @trunc_lshr_load_zext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_lshr_load_zext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lhu a0, 2(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_lshr_load_zext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addiy ca0, ca0, 2 +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lhu a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = load i32, i32 addrspace(200)* %p + %2 = lshr i32 %1, 16 + %3 = trunc i32 %2 to i16 + ret i16 %3 +} + +define signext i16 @trunc_lshr_load_sext(i32 addrspace(200)* %p) { +; PURECAP-LABEL: trunc_lshr_load_sext: +; PURECAP: # %bb.0: +; PURECAP-NEXT: lh a0, 2(ca0) +; PURECAP-NEXT: ret +; +; HYBRID-LABEL: trunc_lshr_load_sext: +; HYBRID: # %bb.0: +; HYBRID-NEXT: addiy ca0, ca0, 2 +; HYBRID-NEXT: .option capmode +; HYBRID-NEXT: modesw.cap +; HYBRID-NEXT: lh a0, 0(ca0) +; HYBRID-NEXT: .option nocapmode +; HYBRID-NEXT: modesw.int +; HYBRID-NEXT: ret + %1 = load i32, i32 addrspace(200)* %p + %2 = lshr i32 %1, 16 + %3 = trunc i32 %2 to i16 + ret i16 %3 +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/unaligned-loads-stores-hybrid.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/unaligned-loads-stores-hybrid.ll new file mode 100644 index 0000000000000..2f2611d5fb292 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/unaligned-loads-stores-hybrid.ll @@ -0,0 +1,371 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/unaligned-loads-stores-hybrid.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi lp64d -mattr=+y,+zyhybrid,+f,+d %s -o - | FileCheck %s + +@a1 = global i64 0, align 1 +@a2 = global i64 0, align 2 +@a4 = global i64 0, align 4 +@a8 = global i64 0, align 8 + +define i64 @load_global_i64_align_1(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_1: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi0: +; CHECK-NEXT: auipc a0, %got_pcrel_hi(a1) +; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi0)(a0) +; CHECK-NEXT: csrrc ca1, ddc, zero +; CHECK-NEXT: bnez a0, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca0, cnull +; CHECK-NEXT: j .LBB0_3 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: yaddrw ca0, ca1, a0 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a1, 0(ca0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca0, 1 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a2, 0(ca2) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca0, 2 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a3, 0(ca3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca4, ca0, 3 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a4, 0(ca4) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: slli a2, a2, 8 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: slli a3, a3, 16 +; CHECK-NEXT: slli a4, a4, 24 +; CHECK-NEXT: or a3, a4, a3 +; CHECK-NEXT: or a1, a3, a1 +; CHECK-NEXT: addiy ca2, ca0, 4 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a2, 0(ca2) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca0, 5 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a3, 0(ca3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca4, ca0, 6 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a4, 0(ca4) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca0, ca0, 7 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lbu a0, 0(ca0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: slli a3, a3, 8 +; CHECK-NEXT: or a2, a3, a2 +; CHECK-NEXT: slli a4, a4, 16 +; CHECK-NEXT: slli a0, a0, 24 +; CHECK-NEXT: or a0, a0, a4 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* addrspacecast(i64* @a1 to i64 addrspace(200)*), align 1 + ret i64 %ret +} + +define i64 @load_global_i64_align_2(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_2: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi1: +; CHECK-NEXT: auipc a0, %got_pcrel_hi(a2) +; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi1)(a0) +; CHECK-NEXT: csrrc ca1, ddc, zero +; CHECK-NEXT: bnez a0, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca0, cnull +; CHECK-NEXT: j .LBB1_3 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: yaddrw ca0, ca1, a0 +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lhu a1, 0(ca0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca0, 2 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lhu a2, 0(ca2) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca3, ca0, 4 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lhu a3, 0(ca3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca0, ca0, 6 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lhu a0, 0(ca0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: slli a2, a2, 16 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: slli a3, a3, 32 +; CHECK-NEXT: slli a0, a0, 48 +; CHECK-NEXT: or a0, a0, a3 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* addrspacecast(i64* @a2 to i64 addrspace(200)*), align 2 + ret i64 %ret +} + +define i64 @load_global_i64_align_4(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_4: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi2: +; CHECK-NEXT: auipc a0, %got_pcrel_hi(a4) +; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi2)(a0) +; CHECK-NEXT: csrrc ca1, ddc, zero +; CHECK-NEXT: bnez a0, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca0, cnull +; CHECK-NEXT: j .LBB2_3 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: yaddrw ca0, ca1, a0 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: addiy ca1, ca0, 4 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lwu a1, 0(ca1) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: lwu a0, 0(ca0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* addrspacecast(i64* @a4 to i64 addrspace(200)*), align 4 + ret i64 %ret +} + +define i64 @load_global_i64_align_8(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_8: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi3: +; CHECK-NEXT: auipc a0, %got_pcrel_hi(a8) +; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; CHECK-NEXT: csrrc ca1, ddc, zero +; CHECK-NEXT: bnez a0, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: ld a0, 0(cnull) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: yaddrw ca0, ca1, a0 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: ld a0, 0(ca0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* addrspacecast(i64* @a8 to i64 addrspace(200)*), align 8 + ret i64 %ret +} + +define void @store_global_i64_align_1(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_1: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi4: +; CHECK-NEXT: auipc a1, %got_pcrel_hi(a1) +; CHECK-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi4)(a1) +; CHECK-NEXT: csrrc ca2, ddc, zero +; CHECK-NEXT: bnez a1, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca1, cnull +; CHECK-NEXT: j .LBB4_3 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: yaddrw ca1, ca2, a1 +; CHECK-NEXT: .LBB4_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca1, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca1, 7 +; CHECK-NEXT: srli a3, a0, 56 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca2, 0(a3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca1, 6 +; CHECK-NEXT: srli a3, a0, 48 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca2, 0(a3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca1, 5 +; CHECK-NEXT: srli a3, a0, 40 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca2, 0(a3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca1, 4 +; CHECK-NEXT: srli a3, a0, 32 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca2, 0(a3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca1, 3 +; CHECK-NEXT: srli a3, a0, 24 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca2, 0(a3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca1, 2 +; CHECK-NEXT: srli a3, a0, 16 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca2, 0(a3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca1, ca1, 1 +; CHECK-NEXT: srli a0, a0, 8 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sb ca1, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* addrspacecast(i64* @a1 to i64 addrspace(200)*), align 1 + ret void +} + +define void @store_global_i64_align_2(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_2: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi5: +; CHECK-NEXT: auipc a1, %got_pcrel_hi(a2) +; CHECK-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi5)(a1) +; CHECK-NEXT: csrrc ca2, ddc, zero +; CHECK-NEXT: bnez a1, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca1, cnull +; CHECK-NEXT: j .LBB5_3 +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: yaddrw ca1, ca2, a1 +; CHECK-NEXT: .LBB5_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sh ca1, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca1, 6 +; CHECK-NEXT: srli a3, a0, 48 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sh ca2, 0(a3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca2, ca1, 4 +; CHECK-NEXT: srli a3, a0, 32 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sh ca2, 0(a3) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca1, ca1, 2 +; CHECK-NEXT: srli a0, a0, 16 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sh ca1, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* addrspacecast(i64* @a2 to i64 addrspace(200)*), align 2 + ret void +} + +define void @store_global_i64_align_4(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_4: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi6: +; CHECK-NEXT: auipc a1, %got_pcrel_hi(a4) +; CHECK-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi6)(a1) +; CHECK-NEXT: csrrc ca2, ddc, zero +; CHECK-NEXT: bnez a1, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ymv ca1, cnull +; CHECK-NEXT: j .LBB6_3 +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: yaddrw ca1, ca2, a1 +; CHECK-NEXT: .LBB6_3: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sw ca1, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: addiy ca1, ca1, 4 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sw ca1, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* addrspacecast(i64* @a4 to i64 addrspace(200)*), align 4 + ret void +} + +define void @store_global_i64_align_8(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_8: +; CHECK: # %bb.0: +; CHECK-NEXT: .Lpcrel_hi7: +; CHECK-NEXT: auipc a1, %got_pcrel_hi(a8) +; CHECK-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi7)(a1) +; CHECK-NEXT: csrrc ca2, ddc, zero +; CHECK-NEXT: bnez a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sd cnull, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: yaddrw ca1, ca2, a1 +; CHECK-NEXT: .option capmode +; CHECK-NEXT: modesw.cap +; CHECK-NEXT: sd ca1, 0(a0) +; CHECK-NEXT: .option nocapmode +; CHECK-NEXT: modesw.int +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* addrspacecast(i64* @a8 to i64 addrspace(200)*), align 8 + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/unaligned-loads-stores-purecap.ll b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/unaligned-loads-stores-purecap.ll new file mode 100644 index 0000000000000..ba87ce3190902 --- /dev/null +++ b/llvm/test/CodeGen/CHERI-Generic/RISCV64Y/unaligned-loads-stores-purecap.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 +; DO NOT EDIT -- This file was generated from test/CodeGen/CHERI-Generic/Inputs/unaligned-loads-stores-purecap.ll +; RUN: llc -mtriple=riscv64 --relocation-model=pic -target-abi l64pc128d -mattr=+y,+cap-mode,+f,+d %s -o - | FileCheck %s + +@a1 = addrspace(200) global i64 0, align 1 +@a2 = addrspace(200) global i64 0, align 2 +@a4 = addrspace(200) global i64 0, align 4 +@a8 = addrspace(200) global i64 0, align 8 + +define i64 @load_global_i64_align_1(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_1: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB0_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca0, %got_pcrel_hi(a1) +; CHECK-NEXT: ly ca0, %pcrel_lo(.LBB0_1)(ca0) +; CHECK-NEXT: lbu a1, 1(ca0) +; CHECK-NEXT: lbu a2, 0(ca0) +; CHECK-NEXT: lbu a3, 2(ca0) +; CHECK-NEXT: lbu a4, 3(ca0) +; CHECK-NEXT: slli a1, a1, 8 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: slli a3, a3, 16 +; CHECK-NEXT: slli a4, a4, 24 +; CHECK-NEXT: or a3, a4, a3 +; CHECK-NEXT: or a1, a3, a1 +; CHECK-NEXT: lbu a2, 5(ca0) +; CHECK-NEXT: lbu a3, 4(ca0) +; CHECK-NEXT: lbu a4, 6(ca0) +; CHECK-NEXT: lbu a0, 7(ca0) +; CHECK-NEXT: slli a2, a2, 8 +; CHECK-NEXT: or a2, a2, a3 +; CHECK-NEXT: slli a4, a4, 16 +; CHECK-NEXT: slli a0, a0, 24 +; CHECK-NEXT: or a0, a0, a4 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* @a1, align 1 + ret i64 %ret +} + +define i64 @load_global_i64_align_2(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_2: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB1_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca0, %got_pcrel_hi(a2) +; CHECK-NEXT: ly ca0, %pcrel_lo(.LBB1_1)(ca0) +; CHECK-NEXT: lhu a1, 2(ca0) +; CHECK-NEXT: lhu a2, 0(ca0) +; CHECK-NEXT: lhu a3, 4(ca0) +; CHECK-NEXT: lhu a0, 6(ca0) +; CHECK-NEXT: slli a1, a1, 16 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: slli a3, a3, 32 +; CHECK-NEXT: slli a0, a0, 48 +; CHECK-NEXT: or a0, a0, a3 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* @a2, align 2 + ret i64 %ret +} + +define i64 @load_global_i64_align_4(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_4: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB2_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca0, %got_pcrel_hi(a4) +; CHECK-NEXT: ly ca0, %pcrel_lo(.LBB2_1)(ca0) +; CHECK-NEXT: lwu a1, 4(ca0) +; CHECK-NEXT: lwu a0, 0(ca0) +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* @a4, align 4 + ret i64 %ret +} + +define i64 @load_global_i64_align_8(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: load_global_i64_align_8: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB3_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca0, %got_pcrel_hi(a8) +; CHECK-NEXT: ly ca0, %pcrel_lo(.LBB3_1)(ca0) +; CHECK-NEXT: ld a0, 0(ca0) +; CHECK-NEXT: ret + %ret = load i64, i64 addrspace(200)* @a8, align 8 + ret i64 %ret +} + +define void @store_global_i64_align_1(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_1: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB4_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(a1) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB4_1)(ca1) +; CHECK-NEXT: sb a0, 0(ca1) +; CHECK-NEXT: srli a2, a0, 56 +; CHECK-NEXT: sb a2, 7(ca1) +; CHECK-NEXT: srli a2, a0, 48 +; CHECK-NEXT: sb a2, 6(ca1) +; CHECK-NEXT: srli a2, a0, 40 +; CHECK-NEXT: sb a2, 5(ca1) +; CHECK-NEXT: srli a2, a0, 32 +; CHECK-NEXT: sb a2, 4(ca1) +; CHECK-NEXT: srli a2, a0, 24 +; CHECK-NEXT: sb a2, 3(ca1) +; CHECK-NEXT: srli a2, a0, 16 +; CHECK-NEXT: sb a2, 2(ca1) +; CHECK-NEXT: srli a0, a0, 8 +; CHECK-NEXT: sb a0, 1(ca1) +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* @a1, align 1 + ret void +} + +define void @store_global_i64_align_2(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_2: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB5_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(a2) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB5_1)(ca1) +; CHECK-NEXT: sh a0, 0(ca1) +; CHECK-NEXT: srli a2, a0, 48 +; CHECK-NEXT: sh a2, 6(ca1) +; CHECK-NEXT: srli a2, a0, 32 +; CHECK-NEXT: sh a2, 4(ca1) +; CHECK-NEXT: srli a0, a0, 16 +; CHECK-NEXT: sh a0, 2(ca1) +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* @a2, align 2 + ret void +} + +define void @store_global_i64_align_4(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_4: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB6_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(a4) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB6_1)(ca1) +; CHECK-NEXT: sw a0, 0(ca1) +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: sw a0, 4(ca1) +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* @a4, align 4 + ret void +} + +define void @store_global_i64_align_8(i64 %y) addrspace(200) nounwind { +; CHECK-LABEL: store_global_i64_align_8: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB7_1: # Label of block must be emitted +; CHECK-NEXT: auipcc ca1, %got_pcrel_hi(a8) +; CHECK-NEXT: ly ca1, %pcrel_lo(.LBB7_1)(ca1) +; CHECK-NEXT: sd a0, 0(ca1) +; CHECK-NEXT: ret + store i64 %y, i64 addrspace(200)* @a8, align 8 + ret void +} diff --git a/llvm/test/CodeGen/CHERI-Generic/regenerate-all.py b/llvm/test/CodeGen/CHERI-Generic/regenerate-all.py index 7c7c738ca16f4..80cf5b93622d8 100755 --- a/llvm/test/CodeGen/CHERI-Generic/regenerate-all.py +++ b/llvm/test/CodeGen/CHERI-Generic/regenerate-all.py @@ -59,8 +59,25 @@ def __init__(self, architecture: str, *, cap_range, cap_width, purecap_sf_args=["-target-abi", "l64pc128", "-mattr=+xcheri,+cap-mode,-f,-d"], purecap_hf_args=["-target-abi", "l64pc128d", "-mattr=+xcheri,+cap-mode,+f,+d"], datalayout=b"e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128") +RISCV32YConfig = ArchSpecificValues( + "RISCV32Y", base_architecture="RISCV", cap_range=32, cap_width=64, + common_args=["-mtriple=riscv32", "--relocation-model=pic"], + hybrid_sf_args=["-target-abi", "ilp32", "-mattr=+y,+zyhybrid,-f"], + hybrid_hf_args=["-target-abi", "ilp32f", "-mattr=+y,+zyhybrid,+f"], + purecap_sf_args=["-target-abi", "il32pc64", "-mattr=+y,+cap-mode,-f"], + purecap_hf_args=["-target-abi", "il32pc64f", "-mattr=+y,+cap-mode,+f"], + datalayout=b"e-m:e-pf200:64:64:64:32-p:32:32-i64:64-n32-S128") +RISCV64YConfig = ArchSpecificValues( + "RISCV64Y", base_architecture="RISCV", cap_range=64, cap_width=128, + common_args=["-mtriple=riscv64", "--relocation-model=pic"], + hybrid_sf_args=["-target-abi", "lp64", "-mattr=+y,+zyhybrid,-f,-d"], + hybrid_hf_args=["-target-abi", "lp64d", "-mattr=+y,+zyhybrid,+f,+d"], + purecap_sf_args=["-target-abi", "l64pc128", "-mattr=+y,+cap-mode,-f,-d"], + purecap_hf_args=["-target-abi", "l64pc128d", "-mattr=+y,+cap-mode,+f,+d"], + datalayout=b"e-m:e-pf200:128:128:128:64-p:64:64-i64:64-i128:128-n64-S128") -ALL_ARCHITECTURES = [MIPSConfig, RISCV32Config, RISCV64Config] +ALL_ARCHITECTURES = [MIPSConfig, RISCV32Config, RISCV64Config, RISCV32YConfig, + RISCV64YConfig] ALL_ARCHITECTURE_IF_STRS = set([b"@IF-" + arch_def.name.encode() + b"@" for arch_def in ALL_ARCHITECTURES] + [ b"@IF-" + arch_def.base_name.encode() + b"@" for arch_def in ALL_ARCHITECTURES]) ALL_ARCHITECTURE_IFNOT_STRS = set([b"@IFNOT-" + arch_def.name.encode() + b"@" for arch_def in ALL_ARCHITECTURES] + [ From 33a598e63c74ba501d7627bc0d439e9586b934cc Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Wed, 20 Aug 2025 11:14:08 +0100 Subject: [PATCH 13/13] [RISCV] Fix compressing of ymv instruction --- llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 10 +++ llvm/test/CodeGen/RISCV/cheri/rvy/compress.ll | 64 +++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/cheri/rvy/compress.ll diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index a539e11b95006..dd1ce43c8112b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -1024,7 +1024,16 @@ def : CompressPat<(JALR X0, GPRNoX0:$rs1, 0), (C_JR GPRNoX0:$rs1)>; } // Predicates = [HasStdExtC, NotCapMode] +def NotStdExtYCapMode + : Predicate<"!(Subtarget->hasStdExtY() && Subtarget->hasCapMode())">, + AssemblerPredicate< + (any_of (not FeatureStdExtY),(not FeatureCapMode)), + "NotHasStdExtYAndCapMode">; + let Predicates = [HasStdExtCOrZca] in { +// `c.mv` in capmode is uncompressed to a capability move so we can't compress +// an integer move to this instruction in this case. +let Predicates = [HasStdExtCOrZca, NotStdExtYCapMode] in { let isCompressOnly = true in { def : CompressPat<(ADD GPRNoX0:$rs1, X0, GPRNoX0:$rs2), (C_MV GPRNoX0:$rs1, GPRNoX0:$rs2)>; @@ -1033,6 +1042,7 @@ def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, X0), } def : CompressPat<(ADDI GPRNoX0:$rs1, GPRNoX0:$rs2, 0), (C_MV GPRNoX0:$rs1, GPRNoX0:$rs2)>; +} def : CompressPat<(EBREAK), (C_EBREAK)>; def : CompressPat<(UNIMP), (C_UNIMP)>; } // Predicates = [HasStdExtCOrZca] diff --git a/llvm/test/CodeGen/RISCV/cheri/rvy/compress.ll b/llvm/test/CodeGen/RISCV/cheri/rvy/compress.ll new file mode 100644 index 0000000000000..b2e3fe9076e6e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/cheri/rvy/compress.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: cat %s > %t.tgtattr +; RUN: echo 'attributes #0 = { nounwind }' >> %t.tgtattr +; RUN: %riscv32y_purecap_llc -mattr=+c,+y,+cap-mode -filetype=obj < %t.tgtattr \ +; RUN: | llvm-objdump -d -M no-aliases - | FileCheck %s +; RUN: %riscv64y_purecap_llc -mattr=+c,+y,+cap-mode -filetype=obj < %t.tgtattr \ +; RUN: | llvm-objdump -d -M no-aliases - | FileCheck %s +; RUN: %riscv64y_purecap_llc -mattr=+c,+y,+cap-mode,+xcheri-norvc -filetype=obj < %t.tgtattr \ +; RUN: | llvm-objdump -d -M no-aliases --mattr=+y - | FileCheck %s --check-prefix=CHECK-NORVC + +; RUN: cat %s > %t.fnattr +; RUN: echo 'attributes #0 = { nounwind "target-features"="+c,+y,+cap-mode" }' >> %t.fnattr +; RUN: %riscv32y_purecap_llc -filetype=obj < %t.fnattr \ +; RUN: | llvm-objdump -d --mattr=+c -M no-aliases - | FileCheck %s +; RUN: %riscv64y_purecap_llc -filetype=obj < %t.fnattr \ +; RUN: | llvm-objdump -d --mattr=+c -M no-aliases - | FileCheck %s +; RUN: cat %s > %t.fnattr +; RUN: echo 'attributes #0 = { nounwind "target-features"="+c,+y,+cap-mode,+xcheri-norvc" }' >> %t.fnattr +; RUN: %riscv64y_purecap_llc -filetype=obj < %t.fnattr \ +; RUN: | llvm-objdump -d --mattr=+c -M no-aliases --mattr=+y - | FileCheck %s --check-prefix=CHECK-NORVC + + +define i32 @loadstore(ptr addrspace(200) %intptrarg, ptr addrspace(200) %ptrptrarg) addrspace(200) #0 { +; CHECK-LABEL: : +; CHECK-NEXT: c.cincoffset16csp csp, -32 +; CHECK-NEXT: c.lw a2, 0(ca0) +; CHECK-NEXT: c.li a3, 1 +; CHECK-NEXT: c.sw a3, 0(ca0) +; CHECK-NEXT: c.lc ca0, 0(ca1) +; CHECK-NEXT: c.sc ca0, 0(ca1) +; CHECK-NEXT: c.scsp ca0, 16(csp) +; CHECK-NEXT: c.lcsp ca0, 16(csp) +; CHECK-NEXT: c.swsp a2, 0(csp) +; CHECK-NEXT: c.lwsp a0, 0(csp) +; CHECK-NEXT: addi a0, a2, 0 +; CHECK-NEXT: c.cincoffset16csp csp, 32 +; CHECK-NEXT: c.jr cra +; CHECK-NORVC-LABEL: : +; CHECK-NORVC-NEXT: {{[^a-z.]}}addiy csp, csp, -32 +; CHECK-NORVC-NEXT: {{[^a-z.]}}lw a2, 0(ca0) +; CHECK-NORVC-NEXT: {{[^a-z.]}}c.li a3, 1 +; CHECK-NORVC-NEXT: {{[^a-z.]}}sw a3, 0(ca0) +; CHECK-NORVC-NEXT: {{[^a-z.]}}ly ca0, 0(ca1) +; CHECK-NORVC-NEXT: {{[^a-z.]}}sy ca0, 0(ca1) +; CHECK-NORVC-NEXT: {{[^a-z.]}}sy ca0, 16(csp) +; CHECK-NORVC-NEXT: {{[^a-z.]}}ly ca0, 16(csp) +; CHECK-NORVC-NEXT: {{[^a-z.]}}sw a2, 0(csp) +; CHECK-NORVC-NEXT: {{[^a-z.]}}lw a0, 0(csp) +; CHECK-NORVC-NEXT: {{[^a-z.]}}addi a0, a2, 0 +; CHECK-NORVC-NEXT: {{[^a-z.]}}addiy csp, csp, 32 +; CHECK-NORVC-NEXT: {{[^a-z.]}}jalr cnull, 0(cra) + %stackptr = alloca ptr addrspace(200), align 16, addrspace(200) + %stackint = alloca i32, align 16, addrspace(200) + %val = load volatile i32, ptr addrspace(200) %intptrarg + store volatile i32 1, ptr addrspace(200) %intptrarg + %ptrval = load volatile ptr addrspace(200), ptr addrspace(200) %ptrptrarg + store volatile ptr addrspace(200) %ptrval, ptr addrspace(200) %ptrptrarg + store volatile ptr addrspace(200) %ptrval, ptr addrspace(200) %stackptr + %stackptrval = load volatile ptr addrspace(200), ptr addrspace(200) %stackptr + store volatile i32 %val, ptr addrspace(200) %stackint + %stackintval = load volatile i32, ptr addrspace(200) %stackint + ret i32 %val +}