From d52b5e6df586ceb3bc10aef79940fa21f2ce1341 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Wed, 14 May 2025 13:51:02 +0200 Subject: [PATCH 01/10] Merge commit '6ba33f5e1189a5ae58fb96ce3546e76b13d090f5' into subtree-update_cg_gcc_2025-05-14 --- .gitattributes | 1 + .github/workflows/stdarch.yml | 3 +- CONTRIBUTING.md | 101 +++++++ Cargo.lock | 8 +- Cargo.toml | 4 +- Readme.md | 44 ++- build_system/Cargo.toml | 2 +- build_system/src/main.rs | 4 +- build_system/src/utils.rs | 2 +- example/mini_core.rs | 54 +++- libgccjit.version | 2 +- rust-toolchain | 2 +- src/abi.rs | 65 ++-- src/attributes.rs | 64 +++- src/back/lto.rs | 2 +- src/back/write.rs | 2 +- src/builder.rs | 25 +- src/consts.rs | 6 +- src/debuginfo.rs | 5 +- src/declare.rs | 1 + src/intrinsic/archs.rs | 554 +++++++++++++++++++++++++--------- src/intrinsic/mod.rs | 81 ++++- src/lib.rs | 4 +- tests/failing-ui-tests.txt | 11 +- tests/lang_tests_common.rs | 9 +- tests/run/always_inline.rs | 53 ++++ tests/run/switchint_128bit.rs | 37 +++ tools/generate_intrinsics.py | 5 +- triagebot.toml | 7 + 29 files changed, 910 insertions(+), 248 deletions(-) create mode 100644 .gitattributes create mode 100644 CONTRIBUTING.md create mode 100644 tests/run/always_inline.rs create mode 100644 tests/run/switchint_128bit.rs create mode 100644 triagebot.toml diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000000..b9cd1111c8d --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +Cargo.lock linguist-generated=false \ No newline at end of file diff --git a/.github/workflows/stdarch.yml b/.github/workflows/stdarch.yml index 4b9f48e7b18..f26ac3b755f 100644 --- a/.github/workflows/stdarch.yml +++ b/.github/workflows/stdarch.yml @@ -101,9 +101,8 @@ jobs: if: ${{ matrix.cargo_runner }} run: | # FIXME: these tests fail when the sysroot is compiled with LTO because of a missing symbol in proc-macro. - # TODO: remove --skip test_mm512_stream_ps when stdarch is updated in rustc. # TODO: remove --skip test_tile_ when it's implemented. - STDARCH_TEST_EVERYTHING=1 CHANNEL=release CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="${{ matrix.cargo_runner }}" TARGET=x86_64-unknown-linux-gnu CG_RUSTFLAGS="-Ainternal_features --cfg stdarch_intel_sde" ./y.sh cargo test --manifest-path build/build_sysroot/sysroot_src/library/stdarch/Cargo.toml -- --skip rtm --skip tbm --skip sse4a --skip test_mm512_stream_ps --skip test_tile_ + STDARCH_TEST_SKIP_FUNCTION="xsave,xsaveopt,xsave64,xsaveopt64" STDARCH_TEST_EVERYTHING=1 CHANNEL=release CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="${{ matrix.cargo_runner }}" TARGET=x86_64-unknown-linux-gnu CG_RUSTFLAGS="-Ainternal_features" ./y.sh cargo test --manifest-path build/build_sysroot/sysroot_src/library/stdarch/Cargo.toml -- --skip rtm --skip tbm --skip sse4a --skip test_tile_ # Summary job for the merge queue. # ALL THE PREVIOUS JOBS NEED TO BE ADDED TO THE `needs` SECTION OF THIS JOB! diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..8e313ab08b5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,101 @@ +# Contributing to rustc_codegen_gcc + +Welcome to the `rustc_codegen_gcc` project! This guide will help you get started as a contributor. The project aims to provide a GCC codegen backend for rustc, allowing Rust compilation on platforms unsupported by LLVM and potentially improving runtime performance through GCC's optimizations. + +## Getting Started + +### Setting Up Your Development Environment + +For detailed setup instructions including dependencies, build steps, and initial testing, please refer to our [README](Readme.md). The README contains the most up-to-date information on: + +- Required dependencies and system packages +- Repository setup and configuration +- Build process +- Basic test verification + +Once you've completed the setup process outlined in the README, you can proceed with the contributor-specific information below. + +## Communication Channels + +- Matrix: Join our [Matrix channel](https://matrix.to/#/#rustc_codegen_gcc:matrix.org) +- IRC: Join us on [IRC](https://web.libera.chat/#rustc_codegen_gcc) +- [GitHub Issues](https://github.com/rust-lang/rustc_codegen_gcc/issues): For bug reports and feature discussions + +We encourage new contributors to join our communication channels and introduce themselves. Feel free to ask questions about where to start or discuss potential contributions. + +## Understanding Core Concepts + +### Common Development Tasks + +#### Running Specific Tests + +To run specific tests, use appropriate flags such as: + +- `./y.sh test --test-libcore` +- `./y.sh test --std-tests` +- `cargo test -- ` + +Additionally, you can run the tests of `libgccjit`: + +```bash +# libgccjit tests +cd gcc-build/gcc +make check-jit +# For a specific test: +make check-jit RUNTESTFLAGS="-v -v -v jit.exp=jit.dg/test-asm.cc" +``` + +#### Debugging Tools + +The project provides several environment variables for debugging: + +- `CG_GCCJIT_DUMP_GIMPLE`: Dumps the GIMPLE IR +- `CG_RUSTFLAGS`: Additional Rust flags +- `CG_GCCJIT_DUMP_MODULE`: Dumps a specific module +- `CG_GCCJIT_DUMP_TO_FILE`: Creates C-like representation + +Full list of debugging options can be found in the [README](Readme.md#env-vars). + +## Making Contributions + +### Finding Issues to Work On + +1. Look for issues labeled with [`good first issue`](https://github.com/rust-lang/rustc_codegen_gcc/issues?q=is%3Aissue%20state%3Aopen%20label%3A"good%20first%20issue") or [`help wanted`](https://github.com/rust-lang/rustc_codegen_gcc/issues?q=is%3Aissue%20state%3Aopen%20label%3A"help%20wanted") +2. Check the [progress report](https://blog.antoyo.xyz/rustc_codegen_gcc-progress-report-34#state_of_rustc_codegen_gcc) for larger initiatives +3. Consider improving documentation or investigating [failing tests](https://github.com/rust-lang/rustc_codegen_gcc/tree/master/tests) (except `failing-ui-tests12.txt`) + +### Pull Request Process + +1. Fork the repository and create a new branch +2. Make your changes with clear commit messages +3. Add tests for new functionality +4. Update documentation as needed +5. Submit a PR with a description of your changes + +### Code Style Guidelines + +- Follow Rust standard coding conventions +- Ensure your code passes `rustfmt` and `clippy` +- Add comments explaining complex logic, especially in GCC interface code + +## Additional Resources + +- [Rustc Dev Guide](https://rustc-dev-guide.rust-lang.org/) +- [GCC Internals Documentation](https://gcc.gnu.org/onlinedocs/gccint/) +- Project-specific documentation in the `doc/` directory: + - [Common errors](doc/errors.md) + - [Debugging](doc/debugging.md) + - [Debugging libgccjit](doc/debugging-libgccjit.md) + - [Git subtree sync](doc/subtree.md) + - [List of useful commands](doc/tips.md) + - [Send a patch to GCC](doc/sending-gcc-patch.md) + +## Getting Help + +If you're stuck or unsure about anything: +1. Check the existing documentation in the `doc/` directory +2. Ask in the IRC or Matrix channels +3. Open a GitHub issue for technical problems +4. Comment on the issue you're working on if you need guidance + +Remember that all contributions, including documentation improvements, bug reports, and feature requests, are valuable to the project. diff --git a/Cargo.lock b/Cargo.lock index 832603aa792..967a51a1cc6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -56,18 +56,18 @@ dependencies = [ [[package]] name = "gccjit" -version = "2.5.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2895ddec764de7ac76fe6c056050c4801a80109c066f177a00a9cc8dee02b29b" +checksum = "ae99a89184220d967dd300139f2d2ae7d52c1a69d632b24aacc57c54625254ce" dependencies = [ "gccjit_sys", ] [[package]] name = "gccjit_sys" -version = "0.6.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac133db68db8a6a8b2c51ef4b18d8ea16682d5814c4641272fe37bbbc223d5f3" +checksum = "24edb7bfe2b7b27c6d09ed23eebfcab0b359c8fe978433f902943e6f127a0f1b" dependencies = [ "libc", ] diff --git a/Cargo.toml b/Cargo.toml index b50f2a626d5..c692a90f0a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "rustc_codegen_gcc" version = "0.1.0" authors = ["Antoni Boucher "] -edition = "2018" +edition = "2024" license = "MIT OR Apache-2.0" [lib] @@ -22,7 +22,7 @@ master = ["gccjit/master"] default = ["master"] [dependencies] -gccjit = "2.5" +gccjit = "2.7" #gccjit = { git = "https://github.com/rust-lang/gccjit.rs" } # Local copy. diff --git a/Readme.md b/Readme.md index d0e4dbba6d3..859bb1568f4 100644 --- a/Readme.md +++ b/Readme.md @@ -12,22 +12,38 @@ This is a GCC codegen for rustc, which means it can be loaded by the existing ru The primary goal of this project is to be able to compile Rust code on platforms unsupported by LLVM. A secondary goal is to check if using the gcc backend will provide any run-time speed improvement for the programs compiled using rustc. -### Dependencies - -**rustup:** Follow the instructions on the official [website](https://www.rust-lang.org/tools/install) - -**DejaGnu:** Consider to install DejaGnu which is necessary for running the libgccjit test suite. [website](https://www.gnu.org/software/dejagnu/#downloading) - - +## Getting Started -## Building - -**This requires a patched libgccjit in order to work. +Note: **This requires a patched libgccjit in order to work. You need to use my [fork of gcc](https://github.com/rust-lang/gcc) which already includes these patches.** +The default configuration (see below in the [Quick start](#quick-start) section) will download a `libgccjit` built in the CI that already contains these patches, so you don't need to build this fork yourself if you use the default configuration. -```bash -$ cp config.example.toml config.toml -``` +### Dependencies + +- rustup: follow instructions on the [official website](https://rustup.rs) +- consider to install DejaGnu which is necessary for running the libgccjit test suite. [website](https://www.gnu.org/software/dejagnu/#downloading) +- additional packages: `flex`, `libmpfr-dev`, `libgmp-dev`, `libmpc3`, `libmpc-dev` + +### Quick start + +1. Clone and configure the repository: + ```bash + git clone https://github.com/rust-lang/rustc_codegen_gcc + cd rustc_codegen_gcc + cp config.example.toml config.toml + ``` + +2. Build and test: + ```bash + ./y.sh prepare # downloads and patches sysroot + ./y.sh build --sysroot --release + + # Verify setup with a simple test + ./y.sh cargo build --manifest-path tests/hello-world/Cargo.toml + + # Run full test suite (expect ~100 failing UI tests) + ./y.sh test --release + ``` If don't need to test GCC patches you wrote in our GCC fork, then the default configuration should be all you need. You can update the `rustc_codegen_gcc` without worrying about GCC. @@ -143,7 +159,7 @@ You can do the same manually (although we don't recommend it): $ LIBRARY_PATH="[gcc-path value]" LD_LIBRARY_PATH="[gcc-path value]" rustc +$(cat $CG_GCCJIT_DIR/rust-toolchain | grep 'channel' | cut -d '=' -f 2 | sed 's/"//g' | sed 's/ //g') -Cpanic=abort -Zcodegen-backend=$CG_GCCJIT_DIR/target/release/librustc_codegen_gcc.so --sysroot $CG_GCCJIT_DIR/build_sysroot/sysroot my_crate.rs ``` -## Env vars +## Environment variables * _**CG_GCCJIT_DUMP_ALL_MODULES**_: Enables dumping of all compilation modules. When set to "1", a dump is created for each module during compilation and stored in `/tmp/reproducers/`. * _**CG_GCCJIT_DUMP_MODULE**_: Enables dumping of a specific module. When set with the module name, e.g., `CG_GCCJIT_DUMP_MODULE=module_name`, a dump of that specific module is created in `/tmp/reproducers/`. diff --git a/build_system/Cargo.toml b/build_system/Cargo.toml index d2600ed5a03..540d82369fd 100644 --- a/build_system/Cargo.toml +++ b/build_system/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "y" version = "0.1.0" -edition = "2021" +edition = "2024" [dependencies] boml = "0.3.1" diff --git a/build_system/src/main.rs b/build_system/src/main.rs index 39361718306..c70b00e09ae 100644 --- a/build_system/src/main.rs +++ b/build_system/src/main.rs @@ -60,7 +60,9 @@ pub enum Command { fn main() { if env::var("RUST_BACKTRACE").is_err() { - env::set_var("RUST_BACKTRACE", "1"); + unsafe { + env::set_var("RUST_BACKTRACE", "1"); + } } let command = match env::args().nth(1).as_deref() { diff --git a/build_system/src/utils.rs b/build_system/src/utils.rs index 401c23948e5..ca177a5feb8 100644 --- a/build_system/src/utils.rs +++ b/build_system/src/utils.rs @@ -10,7 +10,7 @@ use std::path::{Path, PathBuf}; use std::process::{Command, ExitStatus, Output}; #[cfg(unix)] -extern "C" { +unsafe extern "C" { fn raise(signal: c_int) -> c_int; } diff --git a/example/mini_core.rs b/example/mini_core.rs index c554a87b825..d1d8e8fd5bc 100644 --- a/example/mini_core.rs +++ b/example/mini_core.rs @@ -1,6 +1,14 @@ #![feature( - no_core, lang_items, intrinsics, unboxed_closures, extern_types, - decl_macro, rustc_attrs, transparent_unions, auto_traits, freeze_impls, + no_core, + lang_items, + intrinsics, + unboxed_closures, + extern_types, + decl_macro, + rustc_attrs, + transparent_unions, + auto_traits, + freeze_impls, thread_local )] #![no_core] @@ -35,13 +43,13 @@ impl, U: ?Sized> CoerceUnsized<*mut U> for *mut T {} pub trait DispatchFromDyn {} // &T -> &U -impl<'a, T: ?Sized+Unsize, U: ?Sized> DispatchFromDyn<&'a U> for &'a T {} +impl<'a, T: ?Sized + Unsize, U: ?Sized> DispatchFromDyn<&'a U> for &'a T {} // &mut T -> &mut U -impl<'a, T: ?Sized+Unsize, U: ?Sized> DispatchFromDyn<&'a mut U> for &'a mut T {} +impl<'a, T: ?Sized + Unsize, U: ?Sized> DispatchFromDyn<&'a mut U> for &'a mut T {} // *const T -> *const U -impl, U: ?Sized> DispatchFromDyn<*const U> for *const T {} +impl, U: ?Sized> DispatchFromDyn<*const U> for *const T {} // *mut T -> *mut U -impl, U: ?Sized> DispatchFromDyn<*mut U> for *mut T {} +impl, U: ?Sized> DispatchFromDyn<*mut U> for *mut T {} impl, U: ?Sized> DispatchFromDyn> for Box {} #[lang = "legacy_receiver"] @@ -52,8 +60,7 @@ impl LegacyReceiver for &mut T {} impl LegacyReceiver for Box {} #[lang = "receiver"] -trait Receiver { -} +trait Receiver {} #[lang = "copy"] pub trait Copy {} @@ -67,10 +74,13 @@ impl Copy for u16 {} impl Copy for u32 {} impl Copy for u64 {} impl Copy for usize {} +impl Copy for u128 {} impl Copy for i8 {} impl Copy for i16 {} impl Copy for i32 {} +impl Copy for i64 {} impl Copy for isize {} +impl Copy for i128 {} impl Copy for f32 {} impl Copy for f64 {} impl Copy for char {} @@ -336,7 +346,6 @@ impl PartialEq for u32 { } } - impl PartialEq for u64 { fn eq(&self, other: &u64) -> bool { (*self) == (*other) @@ -523,7 +532,11 @@ fn panic_in_cleanup() -> ! { #[track_caller] fn panic_bounds_check(index: usize, len: usize) -> ! { unsafe { - libc::printf("index out of bounds: the len is %d but the index is %d\n\0" as *const str as *const i8, len, index); + libc::printf( + "index out of bounds: the len is %d but the index is %d\n\0" as *const str as *const i8, + len, + index, + ); intrinsics::abort(); } } @@ -551,8 +564,7 @@ pub trait Deref { fn deref(&self) -> &Self::Target; } -pub trait Allocator { -} +pub trait Allocator {} impl Allocator for () {} @@ -634,6 +646,8 @@ pub union MaybeUninit { } pub mod intrinsics { + #[rustc_intrinsic] + pub const fn black_box(_dummy: T) -> T; #[rustc_intrinsic] pub fn abort() -> !; #[rustc_intrinsic] @@ -711,19 +725,27 @@ pub struct VaList<'a>(&'a mut VaListImpl); #[rustc_builtin_macro] #[rustc_macro_transparency = "semitransparent"] -pub macro stringify($($t:tt)*) { /* compiler built-in */ } +pub macro stringify($($t:tt)*) { + /* compiler built-in */ +} #[rustc_builtin_macro] #[rustc_macro_transparency = "semitransparent"] -pub macro file() { /* compiler built-in */ } +pub macro file() { + /* compiler built-in */ +} #[rustc_builtin_macro] #[rustc_macro_transparency = "semitransparent"] -pub macro line() { /* compiler built-in */ } +pub macro line() { + /* compiler built-in */ +} #[rustc_builtin_macro] #[rustc_macro_transparency = "semitransparent"] -pub macro cfg() { /* compiler built-in */ } +pub macro cfg() { + /* compiler built-in */ +} pub static A_STATIC: u8 = 42; diff --git a/libgccjit.version b/libgccjit.version index 125b04004b0..d06646dacc3 100644 --- a/libgccjit.version +++ b/libgccjit.version @@ -1 +1 @@ -0ea98a1365b81f7488073512c850e8ee951a4afd +8b194529188f9d3a98cc211caa805a5355bfa8f0 diff --git a/rust-toolchain b/rust-toolchain index 452d3f22dc5..a8cda28688c 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2025-04-25" +channel = "nightly-2025-05-12" components = ["rust-src", "rustc-dev", "llvm-tools-preview"] diff --git a/src/abi.rs b/src/abi.rs index a96b18e01c0..d882d3eecf4 100644 --- a/src/abi.rs +++ b/src/abi.rs @@ -9,9 +9,9 @@ use rustc_middle::ty::Ty; use rustc_middle::ty::layout::LayoutOf; #[cfg(feature = "master")] use rustc_session::config; -#[cfg(feature = "master")] -use rustc_target::callconv::Conv; use rustc_target::callconv::{ArgAttributes, CastTarget, FnAbi, PassMode}; +#[cfg(feature = "master")] +use rustc_target::callconv::{Conv, RiscvInterruptKind}; use crate::builder::Builder; use crate::context::CodegenCx; @@ -240,38 +240,57 @@ impl<'gcc, 'tcx> FnAbiGccExt<'gcc, 'tcx> for FnAbi<'tcx, Ty<'tcx>> { #[cfg(feature = "master")] pub fn conv_to_fn_attribute<'gcc>(conv: Conv, arch: &str) -> Option> { - // TODO: handle the calling conventions returning None. let attribute = match conv { - Conv::C - | Conv::Rust - | Conv::CCmseNonSecureCall - | Conv::CCmseNonSecureEntry - | Conv::RiscvInterrupt { .. } => return None, - Conv::Cold => return None, + Conv::C | Conv::Rust => return None, + Conv::CCmseNonSecureCall => { + if arch == "arm" { + FnAttribute::ArmCmseNonsecureCall + } else { + return None; + } + } + Conv::CCmseNonSecureEntry => { + if arch == "arm" { + FnAttribute::ArmCmseNonsecureEntry + } else { + return None; + } + } + Conv::Cold => FnAttribute::Cold, + // NOTE: the preserve attributes are not yet implemented in GCC: + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110899 Conv::PreserveMost => return None, Conv::PreserveAll => return None, Conv::GpuKernel => { - // TODO(antoyo): remove clippy allow attribute when this is implemented. - #[allow(clippy::if_same_then_else)] if arch == "amdgpu" { - return None; + FnAttribute::GcnAmdGpuHsaKernel } else if arch == "nvptx64" { - return None; + FnAttribute::NvptxKernel } else { panic!("Architecture {} does not support GpuKernel calling convention", arch); } } - Conv::AvrInterrupt => return None, - Conv::AvrNonBlockingInterrupt => return None, - Conv::ArmAapcs => return None, - Conv::Msp430Intr => return None, - Conv::X86Fastcall => return None, - Conv::X86Intr => return None, - Conv::X86Stdcall => return None, - Conv::X86ThisCall => return None, + // TODO(antoyo): check if those AVR attributes are mapped correctly. + Conv::AvrInterrupt => FnAttribute::AvrSignal, + Conv::AvrNonBlockingInterrupt => FnAttribute::AvrInterrupt, + Conv::ArmAapcs => FnAttribute::ArmPcs("aapcs"), + Conv::Msp430Intr => FnAttribute::Msp430Interrupt, + Conv::RiscvInterrupt { kind } => { + let kind = match kind { + RiscvInterruptKind::Machine => "machine", + RiscvInterruptKind::Supervisor => "supervisor", + }; + FnAttribute::RiscvInterrupt(kind) + } + Conv::X86Fastcall => FnAttribute::X86FastCall, + Conv::X86Intr => FnAttribute::X86Interrupt, + Conv::X86Stdcall => FnAttribute::X86Stdcall, + Conv::X86ThisCall => FnAttribute::X86ThisCall, + // NOTE: the vectorcall calling convention is not yet implemented in GCC: + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89485 Conv::X86VectorCall => return None, - Conv::X86_64SysV => FnAttribute::SysvAbi, - Conv::X86_64Win64 => FnAttribute::MsAbi, + Conv::X86_64SysV => FnAttribute::X86SysvAbi, + Conv::X86_64Win64 => FnAttribute::X86MsAbi, }; Some(attribute) } diff --git a/src/attributes.rs b/src/attributes.rs index 69b04dd5796..e63091c6082 100644 --- a/src/attributes.rs +++ b/src/attributes.rs @@ -6,21 +6,69 @@ use rustc_attr_parsing::InlineAttr; use rustc_attr_parsing::InstructionSetAttr; #[cfg(feature = "master")] use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; +#[cfg(feature = "master")] +use rustc_middle::mir::TerminatorKind; use rustc_middle::ty; use crate::context::CodegenCx; use crate::gcc_util::to_gcc_features; -/// Get GCC attribute for the provided inline heuristic. +/// Checks if the function `instance` is recursively inline. +/// Returns `false` if a functions is guaranteed to be non-recursive, and `true` if it *might* be recursive. +#[cfg(feature = "master")] +fn resursively_inline<'gcc, 'tcx>( + cx: &CodegenCx<'gcc, 'tcx>, + instance: ty::Instance<'tcx>, +) -> bool { + // No body, so we can't check if this is recursively inline, so we assume it is. + if !cx.tcx.is_mir_available(instance.def_id()) { + return true; + } + // `expect_local` ought to never fail: we should be checking a function within this codegen unit. + let body = cx.tcx.optimized_mir(instance.def_id()); + for block in body.basic_blocks.iter() { + let Some(ref terminator) = block.terminator else { continue }; + // I assume that the recursive-inline issue applies only to functions, and not to drops. + // In principle, a recursive, `#[inline(always)]` drop could(?) exist, but I don't think it does. + let TerminatorKind::Call { ref func, .. } = terminator.kind else { continue }; + let Some((def, _args)) = func.const_fn_def() else { continue }; + // Check if the called function is recursively inline. + if matches!( + cx.tcx.codegen_fn_attrs(def).inline, + InlineAttr::Always | InlineAttr::Force { .. } + ) { + return true; + } + } + false +} + +/// Get GCC attribute for the provided inline heuristic, attached to `instance`. #[cfg(feature = "master")] #[inline] fn inline_attr<'gcc, 'tcx>( cx: &CodegenCx<'gcc, 'tcx>, inline: InlineAttr, + instance: ty::Instance<'tcx>, ) -> Option> { match inline { + InlineAttr::Always => { + // We can't simply always return `always_inline` unconditionally. + // It is *NOT A HINT* and does not work for recursive functions. + // + // So, it can only be applied *if*: + // The current function does not call any functions marked `#[inline(always)]`. + // + // That prevents issues steming from recursive `#[inline(always)]` at a *relatively* small cost. + // We *only* need to check all the terminators of a function marked with this attribute. + if resursively_inline(cx, instance) { + Some(FnAttribute::Inline) + } else { + Some(FnAttribute::AlwaysInline) + } + } InlineAttr::Hint => Some(FnAttribute::Inline), - InlineAttr::Always | InlineAttr::Force { .. } => Some(FnAttribute::AlwaysInline), + InlineAttr::Force { .. } => Some(FnAttribute::AlwaysInline), InlineAttr::Never => { if cx.sess().target.arch != "amdgpu" { Some(FnAttribute::NoInline) @@ -52,7 +100,7 @@ pub fn from_fn_attrs<'gcc, 'tcx>( } else { codegen_fn_attrs.inline }; - if let Some(attr) = inline_attr(cx, inline) { + if let Some(attr) = inline_attr(cx, inline, instance) { if let FnAttribute::AlwaysInline = attr { func.add_attribute(FnAttribute::Inline); } @@ -88,14 +136,8 @@ pub fn from_fn_attrs<'gcc, 'tcx>( let target_features = function_features .iter() .filter_map(|feature| { - // FIXME(antoyo): for some reasons, disabling SSE results in the following error when - // compiling Rust for Linux: - // SSE register return with SSE disabled - // TODO(antoyo): support soft-float and retpoline-external-thunk. - if feature.contains("soft-float") - || feature.contains("retpoline-external-thunk") - || *feature == "-sse" - { + // TODO(antoyo): support soft-float. + if feature.contains("soft-float") { return None; } diff --git a/src/back/lto.rs b/src/back/lto.rs index faeb2643ecb..e9c87f35779 100644 --- a/src/back/lto.rs +++ b/src/back/lto.rs @@ -593,7 +593,7 @@ fn thin_lto( Ok((opt_jobs, copy_jobs)) } -pub unsafe fn optimize_thin_module( +pub fn optimize_thin_module( thin_module: ThinModule, _cgcx: &CodegenContext, ) -> Result, FatalError> { diff --git a/src/back/write.rs b/src/back/write.rs index 16c895322e8..09e955acf39 100644 --- a/src/back/write.rs +++ b/src/back/write.rs @@ -14,7 +14,7 @@ use crate::base::add_pic_option; use crate::errors::CopyBitcode; use crate::{GccCodegenBackend, GccContext}; -pub(crate) unsafe fn codegen( +pub(crate) fn codegen( cgcx: &CodegenContext, dcx: DiagCtxtHandle<'_>, module: ModuleCodegen, diff --git a/src/builder.rs b/src/builder.rs index 9e5ebf3a9a4..4e2163201fd 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -568,11 +568,28 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { ) { let mut gcc_cases = vec![]; let typ = self.val_ty(value); - for (on_val, dest) in cases { - let on_val = self.const_uint_big(typ, on_val); - gcc_cases.push(self.context.new_case(on_val, on_val, dest)); + // FIXME(FractalFir): This is a workaround for a libgccjit limitation. + // Currently, libgccjit can't directly create 128 bit integers. + // Since switch cases must be values, and casts are not constant, we can't use 128 bit switch cases. + // In such a case, we will simply fall back to an if-ladder. + // This *may* be slower than a native switch, but a slow working solution is better than none at all. + if typ.is_i128(self) || typ.is_u128(self) { + for (on_val, dest) in cases { + let on_val = self.const_uint_big(typ, on_val); + let is_case = + self.context.new_comparison(self.location, ComparisonOp::Equals, value, on_val); + let next_block = self.current_func().new_block("case"); + self.block.end_with_conditional(self.location, is_case, dest, next_block); + self.block = next_block; + } + self.block.end_with_jump(self.location, default_block); + } else { + for (on_val, dest) in cases { + let on_val = self.const_uint_big(typ, on_val); + gcc_cases.push(self.context.new_case(on_val, on_val, dest)); + } + self.block.end_with_switch(self.location, value, default_block, &gcc_cases); } - self.block.end_with_switch(self.location, value, default_block, &gcc_cases); } #[cfg(feature = "master")] diff --git a/src/consts.rs b/src/consts.rs index 0a67bd7bc71..033afc0f8fb 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -191,13 +191,11 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { // TODO(antoyo): check if it's okay that no link_section is set. let typ = self.val_ty(cv).get_aligned(align.bytes()); - let global = self.declare_private_global(&name[..], typ); - global + self.declare_private_global(&name[..], typ) } _ => { let typ = self.val_ty(cv).get_aligned(align.bytes()); - let global = self.declare_unnamed_global(typ); - global + self.declare_unnamed_global(typ) } }; global.global_set_initializer_rvalue(cv); diff --git a/src/debuginfo.rs b/src/debuginfo.rs index f3ced864395..e0597d0030d 100644 --- a/src/debuginfo.rs +++ b/src/debuginfo.rs @@ -289,7 +289,7 @@ impl<'gcc, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'gcc, 'tcx> { ) -> Self::DILocation { let pos = span.lo(); let DebugLoc { file, line, col } = self.lookup_debug_loc(pos); - let loc = match file.name { + match file.name { rustc_span::FileName::Real(ref name) => match *name { rustc_span::RealFileName::LocalPath(ref name) => { if let Some(name) = name.to_str() { @@ -314,7 +314,6 @@ impl<'gcc, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'gcc, 'tcx> { } }, _ => Location::null(), - }; - loc + } } } diff --git a/src/declare.rs b/src/declare.rs index c1ca3eb849e..bed82073e2c 100644 --- a/src/declare.rs +++ b/src/declare.rs @@ -157,6 +157,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { /// /// If there’s a value with the same name already declared, the function will /// update the declaration and return existing Value instead. +#[allow(clippy::let_and_return)] fn declare_raw_fn<'gcc>( cx: &CodegenCx<'gcc, '_>, name: &str, diff --git a/src/intrinsic/archs.rs b/src/intrinsic/archs.rs index b8d1cde1d5d..5ada535aa41 100644 --- a/src/intrinsic/archs.rs +++ b/src/intrinsic/archs.rs @@ -38,6 +38,7 @@ match name { "llvm.aarch64.gcsss" => "__builtin_arm_gcsss", "llvm.aarch64.isb" => "__builtin_arm_isb", "llvm.aarch64.prefetch" => "__builtin_arm_prefetch", + "llvm.aarch64.sme.in.streaming.mode" => "__builtin_arm_in_streaming_mode", "llvm.aarch64.sve.aesd" => "__builtin_sve_svaesd_u8", "llvm.aarch64.sve.aese" => "__builtin_sve_svaese_u8", "llvm.aarch64.sve.aesimc" => "__builtin_sve_svaesimc_u8", @@ -55,6 +56,8 @@ match name { "llvm.aarch64.ttest" => "__builtin_arm_ttest", // amdgcn "llvm.amdgcn.alignbyte" => "__builtin_amdgcn_alignbyte", + "llvm.amdgcn.ashr.pk.i8.i32" => "__builtin_amdgcn_ashr_pk_i8_i32", + "llvm.amdgcn.ashr.pk.u8.i32" => "__builtin_amdgcn_ashr_pk_u8_i32", "llvm.amdgcn.buffer.wbinvl1" => "__builtin_amdgcn_buffer_wbinvl1", "llvm.amdgcn.buffer.wbinvl1.sc" => "__builtin_amdgcn_buffer_wbinvl1_sc", "llvm.amdgcn.buffer.wbinvl1.vol" => "__builtin_amdgcn_buffer_wbinvl1_vol", @@ -64,6 +67,7 @@ match name { "llvm.amdgcn.cubetc" => "__builtin_amdgcn_cubetc", "llvm.amdgcn.cvt.f32.bf8" => "__builtin_amdgcn_cvt_f32_bf8", "llvm.amdgcn.cvt.f32.fp8" => "__builtin_amdgcn_cvt_f32_fp8", + "llvm.amdgcn.cvt.off.f32.i4" => "__builtin_amdgcn_cvt_off_f32_i4", "llvm.amdgcn.cvt.pk.bf8.f32" => "__builtin_amdgcn_cvt_pk_bf8_f32", "llvm.amdgcn.cvt.pk.f32.bf8" => "__builtin_amdgcn_cvt_pk_f32_bf8", "llvm.amdgcn.cvt.pk.f32.fp8" => "__builtin_amdgcn_cvt_pk_f32_fp8", @@ -74,7 +78,58 @@ match name { "llvm.amdgcn.cvt.pknorm.i16" => "__builtin_amdgcn_cvt_pknorm_i16", "llvm.amdgcn.cvt.pknorm.u16" => "__builtin_amdgcn_cvt_pknorm_u16", "llvm.amdgcn.cvt.pkrtz" => "__builtin_amdgcn_cvt_pkrtz", + "llvm.amdgcn.cvt.scalef32.2xpk16.bf6.f32" => "__builtin_amdgcn_cvt_scalef32_2xpk16_bf6_f32", + "llvm.amdgcn.cvt.scalef32.2xpk16.fp6.f32" => "__builtin_amdgcn_cvt_scalef32_2xpk16_fp6_f32", + "llvm.amdgcn.cvt.scalef32.f16.bf8" => "__builtin_amdgcn_cvt_scalef32_f16_bf8", + "llvm.amdgcn.cvt.scalef32.f16.fp8" => "__builtin_amdgcn_cvt_scalef32_f16_fp8", + "llvm.amdgcn.cvt.scalef32.f32.bf8" => "__builtin_amdgcn_cvt_scalef32_f32_bf8", + "llvm.amdgcn.cvt.scalef32.f32.fp8" => "__builtin_amdgcn_cvt_scalef32_f32_fp8", + "llvm.amdgcn.cvt.scalef32.pk.bf16.bf8" => "__builtin_amdgcn_cvt_scalef32_pk_bf16_bf8", + "llvm.amdgcn.cvt.scalef32.pk.bf16.fp4" => "__builtin_amdgcn_cvt_scalef32_pk_bf16_fp4", + "llvm.amdgcn.cvt.scalef32.pk.bf16.fp8" => "__builtin_amdgcn_cvt_scalef32_pk_bf16_fp8", + "llvm.amdgcn.cvt.scalef32.pk.bf8.bf16" => "__builtin_amdgcn_cvt_scalef32_pk_bf8_bf16", + "llvm.amdgcn.cvt.scalef32.pk.bf8.f16" => "__builtin_amdgcn_cvt_scalef32_pk_bf8_f16", + "llvm.amdgcn.cvt.scalef32.pk.bf8.f32" => "__builtin_amdgcn_cvt_scalef32_pk_bf8_f32", + "llvm.amdgcn.cvt.scalef32.pk.f16.bf8" => "__builtin_amdgcn_cvt_scalef32_pk_f16_bf8", + "llvm.amdgcn.cvt.scalef32.pk.f16.fp4" => "__builtin_amdgcn_cvt_scalef32_pk_f16_fp4", + "llvm.amdgcn.cvt.scalef32.pk.f16.fp8" => "__builtin_amdgcn_cvt_scalef32_pk_f16_fp8", + "llvm.amdgcn.cvt.scalef32.pk.f32.bf8" => "__builtin_amdgcn_cvt_scalef32_pk_f32_bf8", + "llvm.amdgcn.cvt.scalef32.pk.f32.fp4" => "__builtin_amdgcn_cvt_scalef32_pk_f32_fp4", + "llvm.amdgcn.cvt.scalef32.pk.f32.fp8" => "__builtin_amdgcn_cvt_scalef32_pk_f32_fp8", + "llvm.amdgcn.cvt.scalef32.pk.fp4.bf16" => "__builtin_amdgcn_cvt_scalef32_pk_fp4_bf16", + "llvm.amdgcn.cvt.scalef32.pk.fp4.f16" => "__builtin_amdgcn_cvt_scalef32_pk_fp4_f16", + "llvm.amdgcn.cvt.scalef32.pk.fp4.f32" => "__builtin_amdgcn_cvt_scalef32_pk_fp4_f32", + "llvm.amdgcn.cvt.scalef32.pk.fp8.bf16" => "__builtin_amdgcn_cvt_scalef32_pk_fp8_bf16", + "llvm.amdgcn.cvt.scalef32.pk.fp8.f16" => "__builtin_amdgcn_cvt_scalef32_pk_fp8_f16", + "llvm.amdgcn.cvt.scalef32.pk.fp8.f32" => "__builtin_amdgcn_cvt_scalef32_pk_fp8_f32", + "llvm.amdgcn.cvt.scalef32.pk32.bf16.bf6" => "__builtin_amdgcn_cvt_scalef32_pk32_bf16_bf6", + "llvm.amdgcn.cvt.scalef32.pk32.bf16.fp6" => "__builtin_amdgcn_cvt_scalef32_pk32_bf16_fp6", + "llvm.amdgcn.cvt.scalef32.pk32.bf6.bf16" => "__builtin_amdgcn_cvt_scalef32_pk32_bf6_bf16", + "llvm.amdgcn.cvt.scalef32.pk32.bf6.f16" => "__builtin_amdgcn_cvt_scalef32_pk32_bf6_f16", + "llvm.amdgcn.cvt.scalef32.pk32.f16.bf6" => "__builtin_amdgcn_cvt_scalef32_pk32_f16_bf6", + "llvm.amdgcn.cvt.scalef32.pk32.f16.fp6" => "__builtin_amdgcn_cvt_scalef32_pk32_f16_fp6", + "llvm.amdgcn.cvt.scalef32.pk32.f32.bf6" => "__builtin_amdgcn_cvt_scalef32_pk32_f32_bf6", + "llvm.amdgcn.cvt.scalef32.pk32.f32.fp6" => "__builtin_amdgcn_cvt_scalef32_pk32_f32_fp6", + "llvm.amdgcn.cvt.scalef32.pk32.fp6.bf16" => "__builtin_amdgcn_cvt_scalef32_pk32_fp6_bf16", + "llvm.amdgcn.cvt.scalef32.pk32.fp6.f16" => "__builtin_amdgcn_cvt_scalef32_pk32_fp6_f16", + "llvm.amdgcn.cvt.scalef32.sr.bf8.bf16" => "__builtin_amdgcn_cvt_scalef32_sr_bf8_bf16", + "llvm.amdgcn.cvt.scalef32.sr.bf8.f16" => "__builtin_amdgcn_cvt_scalef32_sr_bf8_f16", + "llvm.amdgcn.cvt.scalef32.sr.bf8.f32" => "__builtin_amdgcn_cvt_scalef32_sr_bf8_f32", + "llvm.amdgcn.cvt.scalef32.sr.fp8.bf16" => "__builtin_amdgcn_cvt_scalef32_sr_fp8_bf16", + "llvm.amdgcn.cvt.scalef32.sr.fp8.f16" => "__builtin_amdgcn_cvt_scalef32_sr_fp8_f16", + "llvm.amdgcn.cvt.scalef32.sr.fp8.f32" => "__builtin_amdgcn_cvt_scalef32_sr_fp8_f32", + "llvm.amdgcn.cvt.scalef32.sr.pk.fp4.bf16" => "__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_bf16", + "llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16" => "__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f16", + "llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f32" => "__builtin_amdgcn_cvt_scalef32_sr_pk_fp4_f32", + "llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.bf16" => "__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16", + "llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f16" => "__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f16", + "llvm.amdgcn.cvt.scalef32.sr.pk32.bf6.f32" => "__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f32", + "llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.bf16" => "__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16", + "llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f16" => "__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f16", + "llvm.amdgcn.cvt.scalef32.sr.pk32.fp6.f32" => "__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f32", + "llvm.amdgcn.cvt.sr.bf16.f32" => "__builtin_amdgcn_cvt_sr_bf16_f32", "llvm.amdgcn.cvt.sr.bf8.f32" => "__builtin_amdgcn_cvt_sr_bf8_f32", + "llvm.amdgcn.cvt.sr.f16.f32" => "__builtin_amdgcn_cvt_sr_f16_f32", "llvm.amdgcn.cvt.sr.fp8.f32" => "__builtin_amdgcn_cvt_sr_fp8_f32", "llvm.amdgcn.dispatch.id" => "__builtin_amdgcn_dispatch_id", "llvm.amdgcn.dot4.f32.bf8.bf8" => "__builtin_amdgcn_dot4_f32_bf8_bf8", @@ -83,6 +138,7 @@ match name { "llvm.amdgcn.dot4.f32.fp8.fp8" => "__builtin_amdgcn_dot4_f32_fp8_fp8", "llvm.amdgcn.ds.add.gs.reg.rtn" => "__builtin_amdgcn_ds_add_gs_reg_rtn", "llvm.amdgcn.ds.bpermute" => "__builtin_amdgcn_ds_bpermute", + "llvm.amdgcn.ds.bpermute.fi.b32" => "__builtin_amdgcn_ds_bpermute_fi_b32", "llvm.amdgcn.ds.gws.barrier" => "__builtin_amdgcn_ds_gws_barrier", "llvm.amdgcn.ds.gws.init" => "__builtin_amdgcn_ds_gws_init", "llvm.amdgcn.ds.gws.sema.br" => "__builtin_amdgcn_ds_gws_sema_br", @@ -97,6 +153,7 @@ match name { "llvm.amdgcn.fdot2.bf16.bf16" => "__builtin_amdgcn_fdot2_bf16_bf16", "llvm.amdgcn.fdot2.f16.f16" => "__builtin_amdgcn_fdot2_f16_f16", "llvm.amdgcn.fdot2.f32.bf16" => "__builtin_amdgcn_fdot2_f32_bf16", + "llvm.amdgcn.fdot2c.f32.bf16" => "__builtin_amdgcn_fdot2c_f32_bf16", "llvm.amdgcn.fmul.legacy" => "__builtin_amdgcn_fmul_legacy", "llvm.amdgcn.global.load.lds" => "__builtin_amdgcn_global_load_lds", "llvm.amdgcn.groupstaticsize" => "__builtin_amdgcn_groupstaticsize", @@ -118,8 +175,10 @@ match name { "llvm.amdgcn.mfma.f32.16x16x16f16" => "__builtin_amdgcn_mfma_f32_16x16x16f16", "llvm.amdgcn.mfma.f32.16x16x1f32" => "__builtin_amdgcn_mfma_f32_16x16x1f32", "llvm.amdgcn.mfma.f32.16x16x2bf16" => "__builtin_amdgcn_mfma_f32_16x16x2bf16", + "llvm.amdgcn.mfma.f32.16x16x32.bf16" => "__builtin_amdgcn_mfma_f32_16x16x32_bf16", "llvm.amdgcn.mfma.f32.16x16x32.bf8.bf8" => "__builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8", "llvm.amdgcn.mfma.f32.16x16x32.bf8.fp8" => "__builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8", + "llvm.amdgcn.mfma.f32.16x16x32.f16" => "__builtin_amdgcn_mfma_f32_16x16x32_f16", "llvm.amdgcn.mfma.f32.16x16x32.fp8.bf8" => "__builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8", "llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8" => "__builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8", "llvm.amdgcn.mfma.f32.16x16x4bf16.1k" => "__builtin_amdgcn_mfma_f32_16x16x4bf16_1k", @@ -127,8 +186,10 @@ match name { "llvm.amdgcn.mfma.f32.16x16x4f32" => "__builtin_amdgcn_mfma_f32_16x16x4f32", "llvm.amdgcn.mfma.f32.16x16x8.xf32" => "__builtin_amdgcn_mfma_f32_16x16x8_xf32", "llvm.amdgcn.mfma.f32.16x16x8bf16" => "__builtin_amdgcn_mfma_f32_16x16x8bf16", + "llvm.amdgcn.mfma.f32.32x32x16.bf16" => "__builtin_amdgcn_mfma_f32_32x32x16_bf16", "llvm.amdgcn.mfma.f32.32x32x16.bf8.bf8" => "__builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8", "llvm.amdgcn.mfma.f32.32x32x16.bf8.fp8" => "__builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8", + "llvm.amdgcn.mfma.f32.32x32x16.f16" => "__builtin_amdgcn_mfma_f32_32x32x16_f16", "llvm.amdgcn.mfma.f32.32x32x16.fp8.bf8" => "__builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8", "llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8" => "__builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8", "llvm.amdgcn.mfma.f32.32x32x1f32" => "__builtin_amdgcn_mfma_f32_32x32x1f32", @@ -149,7 +210,9 @@ match name { "llvm.amdgcn.mfma.i32.16x16x16i8" => "__builtin_amdgcn_mfma_i32_16x16x16i8", "llvm.amdgcn.mfma.i32.16x16x32.i8" => "__builtin_amdgcn_mfma_i32_16x16x32_i8", "llvm.amdgcn.mfma.i32.16x16x4i8" => "__builtin_amdgcn_mfma_i32_16x16x4i8", + "llvm.amdgcn.mfma.i32.16x16x64.i8" => "__builtin_amdgcn_mfma_i32_16x16x64_i8", "llvm.amdgcn.mfma.i32.32x32x16.i8" => "__builtin_amdgcn_mfma_i32_32x32x16_i8", + "llvm.amdgcn.mfma.i32.32x32x32.i8" => "__builtin_amdgcn_mfma_i32_32x32x32_i8", "llvm.amdgcn.mfma.i32.32x32x4i8" => "__builtin_amdgcn_mfma_i32_32x32x4i8", "llvm.amdgcn.mfma.i32.32x32x8i8" => "__builtin_amdgcn_mfma_i32_32x32x8i8", "llvm.amdgcn.mfma.i32.4x4x4i8" => "__builtin_amdgcn_mfma_i32_4x4x4i8", @@ -159,25 +222,25 @@ match name { "llvm.amdgcn.perm" => "__builtin_amdgcn_perm", "llvm.amdgcn.permlane16.var" => "__builtin_amdgcn_permlane16_var", "llvm.amdgcn.permlanex16.var" => "__builtin_amdgcn_permlanex16_var", + "llvm.amdgcn.prng.b32" => "__builtin_amdgcn_prng_b32", "llvm.amdgcn.qsad.pk.u16.u8" => "__builtin_amdgcn_qsad_pk_u16_u8", "llvm.amdgcn.queue.ptr" => "__builtin_amdgcn_queue_ptr", + "llvm.amdgcn.raw.ptr.buffer.load.lds" => "__builtin_amdgcn_raw_ptr_buffer_load_lds", "llvm.amdgcn.rcp.legacy" => "__builtin_amdgcn_rcp_legacy", "llvm.amdgcn.rsq.legacy" => "__builtin_amdgcn_rsq_legacy", "llvm.amdgcn.s.barrier" => "__builtin_amdgcn_s_barrier", - "llvm.amdgcn.s.barrier.init" => "__builtin_amdgcn_s_barrier_init", - "llvm.amdgcn.s.barrier.join" => "__builtin_amdgcn_s_barrier_join", - "llvm.amdgcn.s.barrier.leave" => "__builtin_amdgcn_s_barrier_leave", "llvm.amdgcn.s.barrier.signal" => "__builtin_amdgcn_s_barrier_signal", "llvm.amdgcn.s.barrier.signal.isfirst" => "__builtin_amdgcn_s_barrier_signal_isfirst", - "llvm.amdgcn.s.barrier.signal.isfirst.var" => "__builtin_amdgcn_s_barrier_signal_isfirst_var", "llvm.amdgcn.s.barrier.signal.var" => "__builtin_amdgcn_s_barrier_signal_var", "llvm.amdgcn.s.barrier.wait" => "__builtin_amdgcn_s_barrier_wait", + "llvm.amdgcn.s.buffer.prefetch.data" => "__builtin_amdgcn_s_buffer_prefetch_data", "llvm.amdgcn.s.dcache.inv" => "__builtin_amdgcn_s_dcache_inv", "llvm.amdgcn.s.dcache.inv.vol" => "__builtin_amdgcn_s_dcache_inv_vol", "llvm.amdgcn.s.dcache.wb" => "__builtin_amdgcn_s_dcache_wb", "llvm.amdgcn.s.dcache.wb.vol" => "__builtin_amdgcn_s_dcache_wb_vol", "llvm.amdgcn.s.decperflevel" => "__builtin_amdgcn_s_decperflevel", "llvm.amdgcn.s.get.barrier.state" => "__builtin_amdgcn_s_get_barrier_state", + "llvm.amdgcn.s.get.named.barrier.state" => "__builtin_amdgcn_s_get_named_barrier_state", "llvm.amdgcn.s.get.waveid.in.workgroup" => "__builtin_amdgcn_s_get_waveid_in_workgroup", "llvm.amdgcn.s.getpc" => "__builtin_amdgcn_s_getpc", "llvm.amdgcn.s.getreg" => "__builtin_amdgcn_s_getreg", @@ -194,7 +257,6 @@ match name { "llvm.amdgcn.s.ttracedata.imm" => "__builtin_amdgcn_s_ttracedata_imm", "llvm.amdgcn.s.wait.event.export.ready" => "__builtin_amdgcn_s_wait_event_export_ready", "llvm.amdgcn.s.waitcnt" => "__builtin_amdgcn_s_waitcnt", - "llvm.amdgcn.s.wakeup.barrier" => "__builtin_amdgcn_s_wakeup_barrier", "llvm.amdgcn.sad.hi.u8" => "__builtin_amdgcn_sad_hi_u8", "llvm.amdgcn.sad.u16" => "__builtin_amdgcn_sad_u16", "llvm.amdgcn.sad.u8" => "__builtin_amdgcn_sad_u8", @@ -203,20 +265,34 @@ match name { "llvm.amdgcn.sdot2" => "__builtin_amdgcn_sdot2", "llvm.amdgcn.sdot4" => "__builtin_amdgcn_sdot4", "llvm.amdgcn.sdot8" => "__builtin_amdgcn_sdot8", + "llvm.amdgcn.smfmac.f32.16x16x128.bf8.bf8" => "__builtin_amdgcn_smfmac_f32_16x16x128_bf8_bf8", + "llvm.amdgcn.smfmac.f32.16x16x128.bf8.fp8" => "__builtin_amdgcn_smfmac_f32_16x16x128_bf8_fp8", + "llvm.amdgcn.smfmac.f32.16x16x128.fp8.bf8" => "__builtin_amdgcn_smfmac_f32_16x16x128_fp8_bf8", + "llvm.amdgcn.smfmac.f32.16x16x128.fp8.fp8" => "__builtin_amdgcn_smfmac_f32_16x16x128_fp8_fp8", "llvm.amdgcn.smfmac.f32.16x16x32.bf16" => "__builtin_amdgcn_smfmac_f32_16x16x32_bf16", "llvm.amdgcn.smfmac.f32.16x16x32.f16" => "__builtin_amdgcn_smfmac_f32_16x16x32_f16", + "llvm.amdgcn.smfmac.f32.16x16x64.bf16" => "__builtin_amdgcn_smfmac_f32_16x16x64_bf16", "llvm.amdgcn.smfmac.f32.16x16x64.bf8.bf8" => "__builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8", "llvm.amdgcn.smfmac.f32.16x16x64.bf8.fp8" => "__builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8", + "llvm.amdgcn.smfmac.f32.16x16x64.f16" => "__builtin_amdgcn_smfmac_f32_16x16x64_f16", "llvm.amdgcn.smfmac.f32.16x16x64.fp8.bf8" => "__builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8", "llvm.amdgcn.smfmac.f32.16x16x64.fp8.fp8" => "__builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8", "llvm.amdgcn.smfmac.f32.32x32x16.bf16" => "__builtin_amdgcn_smfmac_f32_32x32x16_bf16", "llvm.amdgcn.smfmac.f32.32x32x16.f16" => "__builtin_amdgcn_smfmac_f32_32x32x16_f16", + "llvm.amdgcn.smfmac.f32.32x32x32.bf16" => "__builtin_amdgcn_smfmac_f32_32x32x32_bf16", "llvm.amdgcn.smfmac.f32.32x32x32.bf8.bf8" => "__builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8", "llvm.amdgcn.smfmac.f32.32x32x32.bf8.fp8" => "__builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8", + "llvm.amdgcn.smfmac.f32.32x32x32.f16" => "__builtin_amdgcn_smfmac_f32_32x32x32_f16", "llvm.amdgcn.smfmac.f32.32x32x32.fp8.bf8" => "__builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8", "llvm.amdgcn.smfmac.f32.32x32x32.fp8.fp8" => "__builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8", + "llvm.amdgcn.smfmac.f32.32x32x64.bf8.bf8" => "__builtin_amdgcn_smfmac_f32_32x32x64_bf8_bf8", + "llvm.amdgcn.smfmac.f32.32x32x64.bf8.fp8" => "__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8", + "llvm.amdgcn.smfmac.f32.32x32x64.fp8.bf8" => "__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8", + "llvm.amdgcn.smfmac.f32.32x32x64.fp8.fp8" => "__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8", + "llvm.amdgcn.smfmac.i32.16x16x128.i8" => "__builtin_amdgcn_smfmac_i32_16x16x128_i8", "llvm.amdgcn.smfmac.i32.16x16x64.i8" => "__builtin_amdgcn_smfmac_i32_16x16x64_i8", "llvm.amdgcn.smfmac.i32.32x32x32.i8" => "__builtin_amdgcn_smfmac_i32_32x32x32_i8", + "llvm.amdgcn.smfmac.i32.32x32x64.i8" => "__builtin_amdgcn_smfmac_i32_32x32x64_i8", "llvm.amdgcn.sudot4" => "__builtin_amdgcn_sudot4", "llvm.amdgcn.sudot8" => "__builtin_amdgcn_sudot8", "llvm.amdgcn.udot2" => "__builtin_amdgcn_udot2", @@ -227,6 +303,9 @@ match name { "llvm.amdgcn.workgroup.id.x" => "__builtin_amdgcn_workgroup_id_x", "llvm.amdgcn.workgroup.id.y" => "__builtin_amdgcn_workgroup_id_y", "llvm.amdgcn.workgroup.id.z" => "__builtin_amdgcn_workgroup_id_z", + "llvm.amdgcn.workitem.id.x" => "__builtin_amdgcn_workitem_id_x", + "llvm.amdgcn.workitem.id.y" => "__builtin_amdgcn_workitem_id_y", + "llvm.amdgcn.workitem.id.z" => "__builtin_amdgcn_workitem_id_z", // arm "llvm.arm.cdp" => "__builtin_arm_cdp", "llvm.arm.cdp2" => "__builtin_arm_cdp2", @@ -342,8 +421,6 @@ match name { "llvm.bpf.pseudo" => "__builtin_bpf_pseudo", // cuda "llvm.cuda.syncthreads" => "__syncthreads", - // dx - "llvm.dx.create.handle" => "__builtin_hlsl_create_handle", // hexagon "llvm.hexagon.A2.abs" => "__builtin_HEXAGON_A2_abs", "llvm.hexagon.A2.absp" => "__builtin_HEXAGON_A2_absp", @@ -1255,6 +1332,10 @@ match name { "llvm.hexagon.SI.to.SXTHI.asrh" => "__builtin_SI_to_SXTHI_asrh", "llvm.hexagon.V6.extractw" => "__builtin_HEXAGON_V6_extractw", "llvm.hexagon.V6.extractw.128B" => "__builtin_HEXAGON_V6_extractw_128B", + "llvm.hexagon.V6.get.qfext" => "__builtin_HEXAGON_V6_get_qfext", + "llvm.hexagon.V6.get.qfext.128B" => "__builtin_HEXAGON_V6_get_qfext_128B", + "llvm.hexagon.V6.get.qfext.oracc" => "__builtin_HEXAGON_V6_get_qfext_oracc", + "llvm.hexagon.V6.get.qfext.oracc.128B" => "__builtin_HEXAGON_V6_get_qfext_oracc_128B", "llvm.hexagon.V6.hi" => "__builtin_HEXAGON_V6_hi", "llvm.hexagon.V6.hi.128B" => "__builtin_HEXAGON_V6_hi_128B", "llvm.hexagon.V6.lo" => "__builtin_HEXAGON_V6_lo", @@ -1281,6 +1362,8 @@ match name { "llvm.hexagon.V6.pred.scalar2v2.128B" => "__builtin_HEXAGON_V6_pred_scalar2v2_128B", "llvm.hexagon.V6.pred.xor" => "__builtin_HEXAGON_V6_pred_xor", "llvm.hexagon.V6.pred.xor.128B" => "__builtin_HEXAGON_V6_pred_xor_128B", + "llvm.hexagon.V6.set.qfext" => "__builtin_HEXAGON_V6_set_qfext", + "llvm.hexagon.V6.set.qfext.128B" => "__builtin_HEXAGON_V6_set_qfext_128B", "llvm.hexagon.V6.shuffeqh" => "__builtin_HEXAGON_V6_shuffeqh", "llvm.hexagon.V6.shuffeqh.128B" => "__builtin_HEXAGON_V6_shuffeqh_128B", "llvm.hexagon.V6.shuffeqw" => "__builtin_HEXAGON_V6_shuffeqw", @@ -1301,6 +1384,8 @@ match name { "llvm.hexagon.V6.vS32b.nt.qpred.ai.128B" => "__builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B", "llvm.hexagon.V6.vS32b.qpred.ai" => "__builtin_HEXAGON_V6_vS32b_qpred_ai", "llvm.hexagon.V6.vS32b.qpred.ai.128B" => "__builtin_HEXAGON_V6_vS32b_qpred_ai_128B", + "llvm.hexagon.V6.vabs.f8" => "__builtin_HEXAGON_V6_vabs_f8", + "llvm.hexagon.V6.vabs.f8.128B" => "__builtin_HEXAGON_V6_vabs_f8_128B", "llvm.hexagon.V6.vabs.hf" => "__builtin_HEXAGON_V6_vabs_hf", "llvm.hexagon.V6.vabs.hf.128B" => "__builtin_HEXAGON_V6_vabs_hf_128B", "llvm.hexagon.V6.vabs.sf" => "__builtin_HEXAGON_V6_vabs_sf", @@ -1327,6 +1412,8 @@ match name { "llvm.hexagon.V6.vabsw.sat.128B" => "__builtin_HEXAGON_V6_vabsw_sat_128B", "llvm.hexagon.V6.vadd.hf" => "__builtin_HEXAGON_V6_vadd_hf", "llvm.hexagon.V6.vadd.hf.128B" => "__builtin_HEXAGON_V6_vadd_hf_128B", + "llvm.hexagon.V6.vadd.hf.f8" => "__builtin_HEXAGON_V6_vadd_hf_f8", + "llvm.hexagon.V6.vadd.hf.f8.128B" => "__builtin_HEXAGON_V6_vadd_hf_f8_128B", "llvm.hexagon.V6.vadd.hf.hf" => "__builtin_HEXAGON_V6_vadd_hf_hf", "llvm.hexagon.V6.vadd.hf.hf.128B" => "__builtin_HEXAGON_V6_vadd_hf_hf_128B", "llvm.hexagon.V6.vadd.qf16" => "__builtin_HEXAGON_V6_vadd_qf16", @@ -1549,10 +1636,14 @@ match name { "llvm.hexagon.V6.vcvt.b.hf.128B" => "__builtin_HEXAGON_V6_vcvt_b_hf_128B", "llvm.hexagon.V6.vcvt.bf.sf" => "__builtin_HEXAGON_V6_vcvt_bf_sf", "llvm.hexagon.V6.vcvt.bf.sf.128B" => "__builtin_HEXAGON_V6_vcvt_bf_sf_128B", + "llvm.hexagon.V6.vcvt.f8.hf" => "__builtin_HEXAGON_V6_vcvt_f8_hf", + "llvm.hexagon.V6.vcvt.f8.hf.128B" => "__builtin_HEXAGON_V6_vcvt_f8_hf_128B", "llvm.hexagon.V6.vcvt.h.hf" => "__builtin_HEXAGON_V6_vcvt_h_hf", "llvm.hexagon.V6.vcvt.h.hf.128B" => "__builtin_HEXAGON_V6_vcvt_h_hf_128B", "llvm.hexagon.V6.vcvt.hf.b" => "__builtin_HEXAGON_V6_vcvt_hf_b", "llvm.hexagon.V6.vcvt.hf.b.128B" => "__builtin_HEXAGON_V6_vcvt_hf_b_128B", + "llvm.hexagon.V6.vcvt.hf.f8" => "__builtin_HEXAGON_V6_vcvt_hf_f8", + "llvm.hexagon.V6.vcvt.hf.f8.128B" => "__builtin_HEXAGON_V6_vcvt_hf_f8_128B", "llvm.hexagon.V6.vcvt.hf.h" => "__builtin_HEXAGON_V6_vcvt_hf_h", "llvm.hexagon.V6.vcvt.hf.h.128B" => "__builtin_HEXAGON_V6_vcvt_hf_h_128B", "llvm.hexagon.V6.vcvt.hf.sf" => "__builtin_HEXAGON_V6_vcvt_hf_sf", @@ -1567,6 +1658,14 @@ match name { "llvm.hexagon.V6.vcvt.ub.hf.128B" => "__builtin_HEXAGON_V6_vcvt_ub_hf_128B", "llvm.hexagon.V6.vcvt.uh.hf" => "__builtin_HEXAGON_V6_vcvt_uh_hf", "llvm.hexagon.V6.vcvt.uh.hf.128B" => "__builtin_HEXAGON_V6_vcvt_uh_hf_128B", + "llvm.hexagon.V6.vcvt2.b.hf" => "__builtin_HEXAGON_V6_vcvt2_b_hf", + "llvm.hexagon.V6.vcvt2.b.hf.128B" => "__builtin_HEXAGON_V6_vcvt2_b_hf_128B", + "llvm.hexagon.V6.vcvt2.hf.b" => "__builtin_HEXAGON_V6_vcvt2_hf_b", + "llvm.hexagon.V6.vcvt2.hf.b.128B" => "__builtin_HEXAGON_V6_vcvt2_hf_b_128B", + "llvm.hexagon.V6.vcvt2.hf.ub" => "__builtin_HEXAGON_V6_vcvt2_hf_ub", + "llvm.hexagon.V6.vcvt2.hf.ub.128B" => "__builtin_HEXAGON_V6_vcvt2_hf_ub_128B", + "llvm.hexagon.V6.vcvt2.ub.hf" => "__builtin_HEXAGON_V6_vcvt2_ub_hf", + "llvm.hexagon.V6.vcvt2.ub.hf.128B" => "__builtin_HEXAGON_V6_vcvt2_ub_hf_128B", "llvm.hexagon.V6.vd0" => "__builtin_HEXAGON_V6_vd0", "llvm.hexagon.V6.vd0.128B" => "__builtin_HEXAGON_V6_vd0_128B", "llvm.hexagon.V6.vdd0" => "__builtin_HEXAGON_V6_vdd0", @@ -1649,14 +1748,20 @@ match name { "llvm.hexagon.V6.veqw.or.128B" => "__builtin_HEXAGON_V6_veqw_or_128B", "llvm.hexagon.V6.veqw.xor" => "__builtin_HEXAGON_V6_veqw_xor", "llvm.hexagon.V6.veqw.xor.128B" => "__builtin_HEXAGON_V6_veqw_xor_128B", + "llvm.hexagon.V6.vfmax.f8" => "__builtin_HEXAGON_V6_vfmax_f8", + "llvm.hexagon.V6.vfmax.f8.128B" => "__builtin_HEXAGON_V6_vfmax_f8_128B", "llvm.hexagon.V6.vfmax.hf" => "__builtin_HEXAGON_V6_vfmax_hf", "llvm.hexagon.V6.vfmax.hf.128B" => "__builtin_HEXAGON_V6_vfmax_hf_128B", "llvm.hexagon.V6.vfmax.sf" => "__builtin_HEXAGON_V6_vfmax_sf", "llvm.hexagon.V6.vfmax.sf.128B" => "__builtin_HEXAGON_V6_vfmax_sf_128B", + "llvm.hexagon.V6.vfmin.f8" => "__builtin_HEXAGON_V6_vfmin_f8", + "llvm.hexagon.V6.vfmin.f8.128B" => "__builtin_HEXAGON_V6_vfmin_f8_128B", "llvm.hexagon.V6.vfmin.hf" => "__builtin_HEXAGON_V6_vfmin_hf", "llvm.hexagon.V6.vfmin.hf.128B" => "__builtin_HEXAGON_V6_vfmin_hf_128B", "llvm.hexagon.V6.vfmin.sf" => "__builtin_HEXAGON_V6_vfmin_sf", "llvm.hexagon.V6.vfmin.sf.128B" => "__builtin_HEXAGON_V6_vfmin_sf_128B", + "llvm.hexagon.V6.vfneg.f8" => "__builtin_HEXAGON_V6_vfneg_f8", + "llvm.hexagon.V6.vfneg.f8.128B" => "__builtin_HEXAGON_V6_vfneg_f8_128B", "llvm.hexagon.V6.vfneg.hf" => "__builtin_HEXAGON_V6_vfneg_hf", "llvm.hexagon.V6.vfneg.hf.128B" => "__builtin_HEXAGON_V6_vfneg_hf_128B", "llvm.hexagon.V6.vfneg.sf" => "__builtin_HEXAGON_V6_vfneg_sf", @@ -1807,6 +1912,8 @@ match name { "llvm.hexagon.V6.vmaxuh.128B" => "__builtin_HEXAGON_V6_vmaxuh_128B", "llvm.hexagon.V6.vmaxw" => "__builtin_HEXAGON_V6_vmaxw", "llvm.hexagon.V6.vmaxw.128B" => "__builtin_HEXAGON_V6_vmaxw_128B", + "llvm.hexagon.V6.vmerge.qf" => "__builtin_HEXAGON_V6_vmerge_qf", + "llvm.hexagon.V6.vmerge.qf.128B" => "__builtin_HEXAGON_V6_vmerge_qf_128B", "llvm.hexagon.V6.vmin.bf" => "__builtin_HEXAGON_V6_vmin_bf", "llvm.hexagon.V6.vmin.bf.128B" => "__builtin_HEXAGON_V6_vmin_bf_128B", "llvm.hexagon.V6.vmin.hf" => "__builtin_HEXAGON_V6_vmin_hf", @@ -1849,6 +1956,10 @@ match name { "llvm.hexagon.V6.vmpauhuhsat.128B" => "__builtin_HEXAGON_V6_vmpauhuhsat_128B", "llvm.hexagon.V6.vmpsuhuhsat" => "__builtin_HEXAGON_V6_vmpsuhuhsat", "llvm.hexagon.V6.vmpsuhuhsat.128B" => "__builtin_HEXAGON_V6_vmpsuhuhsat_128B", + "llvm.hexagon.V6.vmpy.hf.f8" => "__builtin_HEXAGON_V6_vmpy_hf_f8", + "llvm.hexagon.V6.vmpy.hf.f8.128B" => "__builtin_HEXAGON_V6_vmpy_hf_f8_128B", + "llvm.hexagon.V6.vmpy.hf.f8.acc" => "__builtin_HEXAGON_V6_vmpy_hf_f8_acc", + "llvm.hexagon.V6.vmpy.hf.f8.acc.128B" => "__builtin_HEXAGON_V6_vmpy_hf_f8_acc_128B", "llvm.hexagon.V6.vmpy.hf.hf" => "__builtin_HEXAGON_V6_vmpy_hf_hf", "llvm.hexagon.V6.vmpy.hf.hf.128B" => "__builtin_HEXAGON_V6_vmpy_hf_hf_128B", "llvm.hexagon.V6.vmpy.hf.hf.acc" => "__builtin_HEXAGON_V6_vmpy_hf_hf_acc", @@ -1869,6 +1980,12 @@ match name { "llvm.hexagon.V6.vmpy.qf32.qf16.128B" => "__builtin_HEXAGON_V6_vmpy_qf32_qf16_128B", "llvm.hexagon.V6.vmpy.qf32.sf" => "__builtin_HEXAGON_V6_vmpy_qf32_sf", "llvm.hexagon.V6.vmpy.qf32.sf.128B" => "__builtin_HEXAGON_V6_vmpy_qf32_sf_128B", + "llvm.hexagon.V6.vmpy.rt.hf" => "__builtin_HEXAGON_V6_vmpy_rt_hf", + "llvm.hexagon.V6.vmpy.rt.hf.128B" => "__builtin_HEXAGON_V6_vmpy_rt_hf_128B", + "llvm.hexagon.V6.vmpy.rt.qf16" => "__builtin_HEXAGON_V6_vmpy_rt_qf16", + "llvm.hexagon.V6.vmpy.rt.qf16.128B" => "__builtin_HEXAGON_V6_vmpy_rt_qf16_128B", + "llvm.hexagon.V6.vmpy.rt.sf" => "__builtin_HEXAGON_V6_vmpy_rt_sf", + "llvm.hexagon.V6.vmpy.rt.sf.128B" => "__builtin_HEXAGON_V6_vmpy_rt_sf_128B", "llvm.hexagon.V6.vmpy.sf.bf" => "__builtin_HEXAGON_V6_vmpy_sf_bf", "llvm.hexagon.V6.vmpy.sf.bf.128B" => "__builtin_HEXAGON_V6_vmpy_sf_bf_128B", "llvm.hexagon.V6.vmpy.sf.bf.acc" => "__builtin_HEXAGON_V6_vmpy_sf_bf_acc", @@ -2127,6 +2244,8 @@ match name { "llvm.hexagon.V6.vshufoh.128B" => "__builtin_HEXAGON_V6_vshufoh_128B", "llvm.hexagon.V6.vsub.hf" => "__builtin_HEXAGON_V6_vsub_hf", "llvm.hexagon.V6.vsub.hf.128B" => "__builtin_HEXAGON_V6_vsub_hf_128B", + "llvm.hexagon.V6.vsub.hf.f8" => "__builtin_HEXAGON_V6_vsub_hf_f8", + "llvm.hexagon.V6.vsub.hf.f8.128B" => "__builtin_HEXAGON_V6_vsub_hf_f8_128B", "llvm.hexagon.V6.vsub.hf.hf" => "__builtin_HEXAGON_V6_vsub_hf_hf", "llvm.hexagon.V6.vsub.hf.hf.128B" => "__builtin_HEXAGON_V6_vsub_hf_hf_128B", "llvm.hexagon.V6.vsub.qf16" => "__builtin_HEXAGON_V6_vsub_qf16", @@ -4445,8 +4564,6 @@ match name { "llvm.mips.xor.v" => "__builtin_msa_xor_v", "llvm.mips.xori.b" => "__builtin_msa_xori_b", // nvvm - "llvm.nvvm.abs.bf16" => "__nvvm_abs_bf16", - "llvm.nvvm.abs.bf16x2" => "__nvvm_abs_bf16x2", "llvm.nvvm.abs.i" => "__nvvm_abs_i", "llvm.nvvm.abs.ll" => "__nvvm_abs_ll", "llvm.nvvm.activemask" => "__nvvm_activemask", @@ -4473,6 +4590,10 @@ match name { "llvm.nvvm.barrier0.and" => "__nvvm_bar0_and", "llvm.nvvm.barrier0.or" => "__nvvm_bar0_or", "llvm.nvvm.barrier0.popc" => "__nvvm_bar0_popc", + "llvm.nvvm.bf16x2.to.ue8m0x2.rp" => "__nvvm_bf16x2_to_ue8m0x2_rp", + "llvm.nvvm.bf16x2.to.ue8m0x2.rp.satfinite" => "__nvvm_bf16x2_to_ue8m0x2_rp_satfinite", + "llvm.nvvm.bf16x2.to.ue8m0x2.rz" => "__nvvm_bf16x2_to_ue8m0x2_rz", + "llvm.nvvm.bf16x2.to.ue8m0x2.rz.satfinite" => "__nvvm_bf16x2_to_ue8m0x2_rz_satfinite", "llvm.nvvm.bf2h.rn" => "__nvvm_bf2h_rn", "llvm.nvvm.bf2h.rn.ftz" => "__nvvm_bf2h_rn_ftz", "llvm.nvvm.bitcast.d2ll" => "__nvvm_bitcast_d2ll", @@ -4523,6 +4644,8 @@ match name { "llvm.nvvm.d2ull.rz" => "__nvvm_d2ull_rz", "llvm.nvvm.div.approx.f" => "__nvvm_div_approx_f", "llvm.nvvm.div.approx.ftz.f" => "__nvvm_div_approx_ftz_f", + "llvm.nvvm.div.full" => "__nvvm_div_full", + "llvm.nvvm.div.full.ftz" => "__nvvm_div_full_ftz", "llvm.nvvm.div.rm.d" => "__nvvm_div_rm_d", "llvm.nvvm.div.rm.f" => "__nvvm_div_rm_f", "llvm.nvvm.div.rm.ftz.f" => "__nvvm_div_rm_ftz_f", @@ -4535,6 +4658,10 @@ match name { "llvm.nvvm.div.rz.d" => "__nvvm_div_rz_d", "llvm.nvvm.div.rz.f" => "__nvvm_div_rz_f", "llvm.nvvm.div.rz.ftz.f" => "__nvvm_div_rz_ftz_f", + "llvm.nvvm.e2m3x2.to.f16x2.rn" => "__nvvm_e2m3x2_to_f16x2_rn", + "llvm.nvvm.e2m3x2.to.f16x2.rn.relu" => "__nvvm_e2m3x2_to_f16x2_rn_relu", + "llvm.nvvm.e3m2x2.to.f16x2.rn" => "__nvvm_e3m2x2_to_f16x2_rn", + "llvm.nvvm.e3m2x2.to.f16x2.rn.relu" => "__nvvm_e3m2x2_to_f16x2_rn_relu", "llvm.nvvm.e4m3x2.to.f16x2.rn" => "__nvvm_e4m3x2_to_f16x2_rn", "llvm.nvvm.e4m3x2.to.f16x2.rn.relu" => "__nvvm_e4m3x2_to_f16x2_rn_relu", "llvm.nvvm.e5m2x2.to.f16x2.rn" => "__nvvm_e5m2x2_to_f16x2_rn", @@ -4569,7 +4696,16 @@ match name { "llvm.nvvm.f2ll.rp.ftz" => "__nvvm_f2ll_rp_ftz", "llvm.nvvm.f2ll.rz" => "__nvvm_f2ll_rz", "llvm.nvvm.f2ll.rz.ftz" => "__nvvm_f2ll_rz_ftz", + "llvm.nvvm.f2tf32.rn" => "__nvvm_f2tf32_rn", + "llvm.nvvm.f2tf32.rn.relu" => "__nvvm_f2tf32_rn_relu", + "llvm.nvvm.f2tf32.rn.relu.satfinite" => "__nvvm_f2tf32_rn_relu_satfinite", + "llvm.nvvm.f2tf32.rn.satfinite" => "__nvvm_f2tf32_rn_satfinite", "llvm.nvvm.f2tf32.rna" => "__nvvm_f2tf32_rna", + "llvm.nvvm.f2tf32.rna.satfinite" => "__nvvm_f2tf32_rna_satfinite", + "llvm.nvvm.f2tf32.rz" => "__nvvm_f2tf32_rz", + "llvm.nvvm.f2tf32.rz.relu" => "__nvvm_f2tf32_rz_relu", + "llvm.nvvm.f2tf32.rz.relu.satfinite" => "__nvvm_f2tf32_rz_relu_satfinite", + "llvm.nvvm.f2tf32.rz.satfinite" => "__nvvm_f2tf32_rz_satfinite", "llvm.nvvm.f2ui.rm" => "__nvvm_f2ui_rm", "llvm.nvvm.f2ui.rm.ftz" => "__nvvm_f2ui_rm_ftz", "llvm.nvvm.f2ui.rn" => "__nvvm_f2ui_rn", @@ -4589,10 +4725,18 @@ match name { "llvm.nvvm.fabs.d" => "__nvvm_fabs_d", "llvm.nvvm.fabs.f" => "__nvvm_fabs_f", "llvm.nvvm.fabs.ftz.f" => "__nvvm_fabs_ftz_f", + "llvm.nvvm.ff.to.e2m3x2.rn.relu.satfinite" => "__nvvm_ff_to_e2m3x2_rn_relu_satfinite", + "llvm.nvvm.ff.to.e2m3x2.rn.satfinite" => "__nvvm_ff_to_e2m3x2_rn_satfinite", + "llvm.nvvm.ff.to.e3m2x2.rn.relu.satfinite" => "__nvvm_ff_to_e3m2x2_rn_relu_satfinite", + "llvm.nvvm.ff.to.e3m2x2.rn.satfinite" => "__nvvm_ff_to_e3m2x2_rn_satfinite", "llvm.nvvm.ff.to.e4m3x2.rn" => "__nvvm_ff_to_e4m3x2_rn", "llvm.nvvm.ff.to.e4m3x2.rn.relu" => "__nvvm_ff_to_e4m3x2_rn_relu", "llvm.nvvm.ff.to.e5m2x2.rn" => "__nvvm_ff_to_e5m2x2_rn", "llvm.nvvm.ff.to.e5m2x2.rn.relu" => "__nvvm_ff_to_e5m2x2_rn_relu", + "llvm.nvvm.ff.to.ue8m0x2.rp" => "__nvvm_ff_to_ue8m0x2_rp", + "llvm.nvvm.ff.to.ue8m0x2.rp.satfinite" => "__nvvm_ff_to_ue8m0x2_rp_satfinite", + "llvm.nvvm.ff.to.ue8m0x2.rz" => "__nvvm_ff_to_ue8m0x2_rz", + "llvm.nvvm.ff.to.ue8m0x2.rz.satfinite" => "__nvvm_ff_to_ue8m0x2_rz_satfinite", "llvm.nvvm.ff2bf16x2.rn" => "__nvvm_ff2bf16x2_rn", "llvm.nvvm.ff2bf16x2.rn.relu" => "__nvvm_ff2bf16x2_rn_relu", "llvm.nvvm.ff2bf16x2.rz" => "__nvvm_ff2bf16x2_rz", @@ -4862,6 +5006,14 @@ match name { // [DUPLICATE]: "llvm.nvvm.read.ptx.sreg.warpsize" => "__nvvm_read_ptx_sreg_", "llvm.nvvm.redux.sync.add" => "__nvvm_redux_sync_add", "llvm.nvvm.redux.sync.and" => "__nvvm_redux_sync_and", + "llvm.nvvm.redux.sync.fmax" => "__nvvm_redux_sync_fmax", + "llvm.nvvm.redux.sync.fmax.NaN" => "__nvvm_redux_sync_fmax_NaN", + "llvm.nvvm.redux.sync.fmax.abs" => "__nvvm_redux_sync_fmax_abs", + "llvm.nvvm.redux.sync.fmax.abs.NaN" => "__nvvm_redux_sync_fmax_abs_NaN", + "llvm.nvvm.redux.sync.fmin" => "__nvvm_redux_sync_fmin", + "llvm.nvvm.redux.sync.fmin.NaN" => "__nvvm_redux_sync_fmin_NaN", + "llvm.nvvm.redux.sync.fmin.abs" => "__nvvm_redux_sync_fmin_abs", + "llvm.nvvm.redux.sync.fmin.abs.NaN" => "__nvvm_redux_sync_fmin_abs_NaN", "llvm.nvvm.redux.sync.max" => "__nvvm_redux_sync_max", "llvm.nvvm.redux.sync.min" => "__nvvm_redux_sync_min", "llvm.nvvm.redux.sync.or" => "__nvvm_redux_sync_or", @@ -5149,6 +5301,7 @@ match name { "llvm.nvvm.txq.num.mipmap.levels" => "__nvvm_txq_num_mipmap_levels", "llvm.nvvm.txq.num.samples" => "__nvvm_txq_num_samples", "llvm.nvvm.txq.width" => "__nvvm_txq_width", + "llvm.nvvm.ue8m0x2.to.bf16x2" => "__nvvm_ue8m0x2_to_bf16x2", "llvm.nvvm.ui2d.rm" => "__nvvm_ui2d_rm", "llvm.nvvm.ui2d.rn" => "__nvvm_ui2d_rn", "llvm.nvvm.ui2d.rp" => "__nvvm_ui2d_rp", @@ -5783,6 +5936,9 @@ match name { "llvm.r600.read.tgid.x" => "__builtin_r600_read_tgid_x", "llvm.r600.read.tgid.y" => "__builtin_r600_read_tgid_y", "llvm.r600.read.tgid.z" => "__builtin_r600_read_tgid_z", + "llvm.r600.read.tidig.x" => "__builtin_r600_read_tidig_x", + "llvm.r600.read.tidig.y" => "__builtin_r600_read_tidig_y", + "llvm.r600.read.tidig.z" => "__builtin_r600_read_tidig_z", // riscv "llvm.riscv.aes32dsi" => "__builtin_riscv_aes32dsi", "llvm.riscv.aes32dsmi" => "__builtin_riscv_aes32dsmi", @@ -5806,6 +5962,8 @@ match name { "llvm.riscv.sha512sum1" => "__builtin_riscv_sha512sum1", "llvm.riscv.sha512sum1r" => "__builtin_riscv_sha512sum1r", // s390 + "llvm.s390.bdepg" => "__builtin_s390_bdepg", + "llvm.s390.bextg" => "__builtin_s390_bextg", "llvm.s390.efpc" => "__builtin_s390_efpc", "llvm.s390.etnd" => "__builtin_tx_nesting_depth", "llvm.s390.lcbb" => "__builtin_s390_lcbb", @@ -5828,6 +5986,8 @@ match name { "llvm.s390.vavglf" => "__builtin_s390_vavglf", "llvm.s390.vavglg" => "__builtin_s390_vavglg", "llvm.s390.vavglh" => "__builtin_s390_vavglh", + "llvm.s390.vavglq" => "__builtin_s390_vavglq", + "llvm.s390.vavgq" => "__builtin_s390_vavgq", "llvm.s390.vbperm" => "__builtin_s390_vbperm", "llvm.s390.vcfn" => "__builtin_s390_vcfn", "llvm.s390.vcksm" => "__builtin_s390_vcksm", @@ -5839,6 +5999,7 @@ match name { "llvm.s390.verimf" => "__builtin_s390_verimf", "llvm.s390.verimg" => "__builtin_s390_verimg", "llvm.s390.verimh" => "__builtin_s390_verimh", + "llvm.s390.veval" => "__builtin_s390_veval", "llvm.s390.vfaeb" => "__builtin_s390_vfaeb", "llvm.s390.vfaef" => "__builtin_s390_vfaef", "llvm.s390.vfaeh" => "__builtin_s390_vfaeh", @@ -5857,6 +6018,11 @@ match name { "llvm.s390.vfenezb" => "__builtin_s390_vfenezb", "llvm.s390.vfenezf" => "__builtin_s390_vfenezf", "llvm.s390.vfenezh" => "__builtin_s390_vfenezh", + "llvm.s390.vgemb" => "__builtin_s390_vgemb", + "llvm.s390.vgemf" => "__builtin_s390_vgemf", + "llvm.s390.vgemg" => "__builtin_s390_vgemg", + "llvm.s390.vgemh" => "__builtin_s390_vgemh", + "llvm.s390.vgemq" => "__builtin_s390_vgemq", "llvm.s390.vgfmab" => "__builtin_s390_vgfmab", "llvm.s390.vgfmaf" => "__builtin_s390_vgfmaf", "llvm.s390.vgfmag" => "__builtin_s390_vgfmag", @@ -5873,39 +6039,55 @@ match name { "llvm.s390.vlrl" => "__builtin_s390_vlrlr", "llvm.s390.vmaeb" => "__builtin_s390_vmaeb", "llvm.s390.vmaef" => "__builtin_s390_vmaef", + "llvm.s390.vmaeg" => "__builtin_s390_vmaeg", "llvm.s390.vmaeh" => "__builtin_s390_vmaeh", "llvm.s390.vmahb" => "__builtin_s390_vmahb", "llvm.s390.vmahf" => "__builtin_s390_vmahf", + "llvm.s390.vmahg" => "__builtin_s390_vmahg", "llvm.s390.vmahh" => "__builtin_s390_vmahh", + "llvm.s390.vmahq" => "__builtin_s390_vmahq", "llvm.s390.vmaleb" => "__builtin_s390_vmaleb", "llvm.s390.vmalef" => "__builtin_s390_vmalef", + "llvm.s390.vmaleg" => "__builtin_s390_vmaleg", "llvm.s390.vmaleh" => "__builtin_s390_vmaleh", "llvm.s390.vmalhb" => "__builtin_s390_vmalhb", "llvm.s390.vmalhf" => "__builtin_s390_vmalhf", + "llvm.s390.vmalhg" => "__builtin_s390_vmalhg", "llvm.s390.vmalhh" => "__builtin_s390_vmalhh", + "llvm.s390.vmalhq" => "__builtin_s390_vmalhq", "llvm.s390.vmalob" => "__builtin_s390_vmalob", "llvm.s390.vmalof" => "__builtin_s390_vmalof", + "llvm.s390.vmalog" => "__builtin_s390_vmalog", "llvm.s390.vmaloh" => "__builtin_s390_vmaloh", "llvm.s390.vmaob" => "__builtin_s390_vmaob", "llvm.s390.vmaof" => "__builtin_s390_vmaof", + "llvm.s390.vmaog" => "__builtin_s390_vmaog", "llvm.s390.vmaoh" => "__builtin_s390_vmaoh", "llvm.s390.vmeb" => "__builtin_s390_vmeb", "llvm.s390.vmef" => "__builtin_s390_vmef", + "llvm.s390.vmeg" => "__builtin_s390_vmeg", "llvm.s390.vmeh" => "__builtin_s390_vmeh", "llvm.s390.vmhb" => "__builtin_s390_vmhb", "llvm.s390.vmhf" => "__builtin_s390_vmhf", + "llvm.s390.vmhg" => "__builtin_s390_vmhg", "llvm.s390.vmhh" => "__builtin_s390_vmhh", + "llvm.s390.vmhq" => "__builtin_s390_vmhq", "llvm.s390.vmleb" => "__builtin_s390_vmleb", "llvm.s390.vmlef" => "__builtin_s390_vmlef", + "llvm.s390.vmleg" => "__builtin_s390_vmleg", "llvm.s390.vmleh" => "__builtin_s390_vmleh", "llvm.s390.vmlhb" => "__builtin_s390_vmlhb", "llvm.s390.vmlhf" => "__builtin_s390_vmlhf", + "llvm.s390.vmlhg" => "__builtin_s390_vmlhg", "llvm.s390.vmlhh" => "__builtin_s390_vmlhh", + "llvm.s390.vmlhq" => "__builtin_s390_vmlhq", "llvm.s390.vmlob" => "__builtin_s390_vmlob", "llvm.s390.vmlof" => "__builtin_s390_vmlof", + "llvm.s390.vmlog" => "__builtin_s390_vmlog", "llvm.s390.vmloh" => "__builtin_s390_vmloh", "llvm.s390.vmob" => "__builtin_s390_vmob", "llvm.s390.vmof" => "__builtin_s390_vmof", + "llvm.s390.vmog" => "__builtin_s390_vmog", "llvm.s390.vmoh" => "__builtin_s390_vmoh", "llvm.s390.vmslg" => "__builtin_s390_vmslg", "llvm.s390.vpdi" => "__builtin_s390_vpdi", @@ -5950,18 +6132,20 @@ match name { "llvm.s390.vtm" => "__builtin_s390_vtm", "llvm.s390.vuphb" => "__builtin_s390_vuphb", "llvm.s390.vuphf" => "__builtin_s390_vuphf", + "llvm.s390.vuphg" => "__builtin_s390_vuphg", "llvm.s390.vuphh" => "__builtin_s390_vuphh", "llvm.s390.vuplb" => "__builtin_s390_vuplb", "llvm.s390.vuplf" => "__builtin_s390_vuplf", + "llvm.s390.vuplg" => "__builtin_s390_vuplg", "llvm.s390.vuplhb" => "__builtin_s390_vuplhb", "llvm.s390.vuplhf" => "__builtin_s390_vuplhf", + "llvm.s390.vuplhg" => "__builtin_s390_vuplhg", "llvm.s390.vuplhh" => "__builtin_s390_vuplhh", "llvm.s390.vuplhw" => "__builtin_s390_vuplhw", "llvm.s390.vupllb" => "__builtin_s390_vupllb", "llvm.s390.vupllf" => "__builtin_s390_vupllf", + "llvm.s390.vupllg" => "__builtin_s390_vupllg", "llvm.s390.vupllh" => "__builtin_s390_vupllh", - // spv - "llvm.spv.create.handle" => "__builtin_hlsl_create_handle", // ve "llvm.ve.vl.andm.MMM" => "__builtin_ve_vl_andm_MMM", "llvm.ve.vl.andm.mmm" => "__builtin_ve_vl_andm_mmm", @@ -7328,6 +7512,27 @@ match name { "llvm.x86.avx.vtestz.ps.256" => "__builtin_ia32_vtestzps256", "llvm.x86.avx.vzeroall" => "__builtin_ia32_vzeroall", "llvm.x86.avx.vzeroupper" => "__builtin_ia32_vzeroupper", + "llvm.x86.avx10.mask.getexp.bf16.128" => "__builtin_ia32_vgetexpbf16128_mask", + "llvm.x86.avx10.mask.getexp.bf16.256" => "__builtin_ia32_vgetexpbf16256_mask", + "llvm.x86.avx10.mask.getexp.bf16.512" => "__builtin_ia32_vgetexpbf16512_mask", + "llvm.x86.avx10.mask.getmant.bf16.128" => "__builtin_ia32_vgetmantbf16128_mask", + "llvm.x86.avx10.mask.getmant.bf16.256" => "__builtin_ia32_vgetmantbf16256_mask", + "llvm.x86.avx10.mask.getmant.bf16.512" => "__builtin_ia32_vgetmantbf16512_mask", + "llvm.x86.avx10.mask.rcp.bf16.128" => "__builtin_ia32_vrcpbf16128_mask", + "llvm.x86.avx10.mask.rcp.bf16.256" => "__builtin_ia32_vrcpbf16256_mask", + "llvm.x86.avx10.mask.rcp.bf16.512" => "__builtin_ia32_vrcpbf16512_mask", + "llvm.x86.avx10.mask.reduce.bf16.128" => "__builtin_ia32_vreducebf16128_mask", + "llvm.x86.avx10.mask.reduce.bf16.256" => "__builtin_ia32_vreducebf16256_mask", + "llvm.x86.avx10.mask.reduce.bf16.512" => "__builtin_ia32_vreducebf16512_mask", + "llvm.x86.avx10.mask.rndscale.bf16.128" => "__builtin_ia32_vrndscalebf16_128_mask", + "llvm.x86.avx10.mask.rndscale.bf16.256" => "__builtin_ia32_vrndscalebf16_256_mask", + "llvm.x86.avx10.mask.rndscale.bf16.512" => "__builtin_ia32_vrndscalebf16_mask", + "llvm.x86.avx10.mask.rsqrt.bf16.128" => "__builtin_ia32_vrsqrtbf16128_mask", + "llvm.x86.avx10.mask.rsqrt.bf16.256" => "__builtin_ia32_vrsqrtbf16256_mask", + "llvm.x86.avx10.mask.rsqrt.bf16.512" => "__builtin_ia32_vrsqrtbf16512_mask", + "llvm.x86.avx10.mask.scalef.bf16.128" => "__builtin_ia32_vscalefbf16128_mask", + "llvm.x86.avx10.mask.scalef.bf16.256" => "__builtin_ia32_vscalefbf16256_mask", + "llvm.x86.avx10.mask.scalef.bf16.512" => "__builtin_ia32_vscalefbf16512_mask", "llvm.x86.avx10.mask.vcvt2ps2phx.128" => "__builtin_ia32_vcvt2ps2phx128_mask", "llvm.x86.avx10.mask.vcvt2ps2phx.256" => "__builtin_ia32_vcvt2ps2phx256_mask", "llvm.x86.avx10.mask.vcvt2ps2phx.512" => "__builtin_ia32_vcvt2ps2phx512_mask", @@ -7346,171 +7551,194 @@ match name { "llvm.x86.avx10.mask.vcvthf82ph128" => "__builtin_ia32_vcvthf8_2ph128_mask", "llvm.x86.avx10.mask.vcvthf82ph256" => "__builtin_ia32_vcvthf8_2ph256_mask", "llvm.x86.avx10.mask.vcvthf82ph512" => "__builtin_ia32_vcvthf8_2ph512_mask", - "llvm.x86.avx10.mask.vcvtneph2bf8128" => "__builtin_ia32_vcvtneph2bf8_128_mask", - "llvm.x86.avx10.mask.vcvtneph2bf8256" => "__builtin_ia32_vcvtneph2bf8_256_mask", - "llvm.x86.avx10.mask.vcvtneph2bf8512" => "__builtin_ia32_vcvtneph2bf8_512_mask", - "llvm.x86.avx10.mask.vcvtneph2bf8s128" => "__builtin_ia32_vcvtneph2bf8s_128_mask", - "llvm.x86.avx10.mask.vcvtneph2bf8s256" => "__builtin_ia32_vcvtneph2bf8s_256_mask", - "llvm.x86.avx10.mask.vcvtneph2bf8s512" => "__builtin_ia32_vcvtneph2bf8s_512_mask", - "llvm.x86.avx10.mask.vcvtneph2hf8128" => "__builtin_ia32_vcvtneph2hf8_128_mask", - "llvm.x86.avx10.mask.vcvtneph2hf8256" => "__builtin_ia32_vcvtneph2hf8_256_mask", - "llvm.x86.avx10.mask.vcvtneph2hf8512" => "__builtin_ia32_vcvtneph2hf8_512_mask", - "llvm.x86.avx10.mask.vcvtneph2hf8s128" => "__builtin_ia32_vcvtneph2hf8s_128_mask", - "llvm.x86.avx10.mask.vcvtneph2hf8s256" => "__builtin_ia32_vcvtneph2hf8s_256_mask", - "llvm.x86.avx10.mask.vcvtneph2hf8s512" => "__builtin_ia32_vcvtneph2hf8s_512_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtpd2dq256" => "__builtin_ia32_vcvtpd2dq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtpd2ph256" => "__builtin_ia32_vcvtpd2ph256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtpd2ps256" => "__builtin_ia32_vcvtpd2ps256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtpd2qq256" => "__builtin_ia32_vcvtpd2qq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtpd2udq256" => "__builtin_ia32_vcvtpd2udq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtpd2uqq256" => "__builtin_ia32_vcvtpd2uqq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtph2dq256" => "__builtin_ia32_vcvtph2dq256_round_mask", + "llvm.x86.avx10.mask.vcvtph2bf8128" => "__builtin_ia32_vcvtph2bf8_128_mask", + "llvm.x86.avx10.mask.vcvtph2bf8256" => "__builtin_ia32_vcvtph2bf8_256_mask", + "llvm.x86.avx10.mask.vcvtph2bf8512" => "__builtin_ia32_vcvtph2bf8_512_mask", + "llvm.x86.avx10.mask.vcvtph2bf8s128" => "__builtin_ia32_vcvtph2bf8s_128_mask", + "llvm.x86.avx10.mask.vcvtph2bf8s256" => "__builtin_ia32_vcvtph2bf8s_256_mask", + "llvm.x86.avx10.mask.vcvtph2bf8s512" => "__builtin_ia32_vcvtph2bf8s_512_mask", + "llvm.x86.avx10.mask.vcvtph2hf8128" => "__builtin_ia32_vcvtph2hf8_128_mask", + "llvm.x86.avx10.mask.vcvtph2hf8256" => "__builtin_ia32_vcvtph2hf8_256_mask", + "llvm.x86.avx10.mask.vcvtph2hf8512" => "__builtin_ia32_vcvtph2hf8_512_mask", + "llvm.x86.avx10.mask.vcvtph2hf8s128" => "__builtin_ia32_vcvtph2hf8s_128_mask", + "llvm.x86.avx10.mask.vcvtph2hf8s256" => "__builtin_ia32_vcvtph2hf8s_256_mask", + "llvm.x86.avx10.mask.vcvtph2hf8s512" => "__builtin_ia32_vcvtph2hf8s_512_mask", "llvm.x86.avx10.mask.vcvtph2ibs128" => "__builtin_ia32_vcvtph2ibs128_mask", "llvm.x86.avx10.mask.vcvtph2ibs256" => "__builtin_ia32_vcvtph2ibs256_mask", "llvm.x86.avx10.mask.vcvtph2ibs512" => "__builtin_ia32_vcvtph2ibs512_mask", "llvm.x86.avx10.mask.vcvtph2iubs128" => "__builtin_ia32_vcvtph2iubs128_mask", "llvm.x86.avx10.mask.vcvtph2iubs256" => "__builtin_ia32_vcvtph2iubs256_mask", "llvm.x86.avx10.mask.vcvtph2iubs512" => "__builtin_ia32_vcvtph2iubs512_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtph2pd256" => "__builtin_ia32_vcvtph2pd256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtph2psx256" => "__builtin_ia32_vcvtph2psx256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtph2qq256" => "__builtin_ia32_vcvtph2qq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtph2udq256" => "__builtin_ia32_vcvtph2udq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtph2uqq256" => "__builtin_ia32_vcvtph2uqq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtph2uw256" => "__builtin_ia32_vcvtph2uw256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtph2w256" => "__builtin_ia32_vcvtph2w256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtps2dq256" => "__builtin_ia32_vcvtps2dq256_round_mask", "llvm.x86.avx10.mask.vcvtps2ibs128" => "__builtin_ia32_vcvtps2ibs128_mask", "llvm.x86.avx10.mask.vcvtps2ibs256" => "__builtin_ia32_vcvtps2ibs256_mask", "llvm.x86.avx10.mask.vcvtps2ibs512" => "__builtin_ia32_vcvtps2ibs512_mask", "llvm.x86.avx10.mask.vcvtps2iubs128" => "__builtin_ia32_vcvtps2iubs128_mask", "llvm.x86.avx10.mask.vcvtps2iubs256" => "__builtin_ia32_vcvtps2iubs256_mask", "llvm.x86.avx10.mask.vcvtps2iubs512" => "__builtin_ia32_vcvtps2iubs512_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtps2pd256" => "__builtin_ia32_vcvtps2pd256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtps2ph256" => "__builtin_ia32_vcvtps2ph256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtps2phx256" => "__builtin_ia32_vcvtps2phx256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtps2qq256" => "__builtin_ia32_vcvtps2qq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtps2udq256" => "__builtin_ia32_vcvtps2udq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvtps2uqq256" => "__builtin_ia32_vcvtps2uqq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttpd2dq256" => "__builtin_ia32_vcvttpd2dq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttpd2qq256" => "__builtin_ia32_vcvttpd2qq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttpd2udq256" => "__builtin_ia32_vcvttpd2udq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttpd2uqq256" => "__builtin_ia32_vcvttpd2uqq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttph2dq256" => "__builtin_ia32_vcvttph2dq256_round_mask", + "llvm.x86.avx10.mask.vcvttpd2dqs.128" => "__builtin_ia32_vcvttpd2dqs128_mask", + "llvm.x86.avx10.mask.vcvttpd2dqs.256" => "__builtin_ia32_vcvttpd2dqs256_mask", + // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttpd2dqs.round.512" => "__builtin_ia32_vcvttpd2dqs512_round_mask", + "llvm.x86.avx10.mask.vcvttpd2qqs.128" => "__builtin_ia32_vcvttpd2qqs128_mask", + "llvm.x86.avx10.mask.vcvttpd2qqs.256" => "__builtin_ia32_vcvttpd2qqs256_mask", + // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttpd2qqs.round.512" => "__builtin_ia32_vcvttpd2qqs512_round_mask", + "llvm.x86.avx10.mask.vcvttpd2udqs.128" => "__builtin_ia32_vcvttpd2udqs128_mask", + "llvm.x86.avx10.mask.vcvttpd2udqs.256" => "__builtin_ia32_vcvttpd2udqs256_mask", + // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttpd2udqs.round.512" => "__builtin_ia32_vcvttpd2udqs512_round_mask", + "llvm.x86.avx10.mask.vcvttpd2uqqs.128" => "__builtin_ia32_vcvttpd2uqqs128_mask", + "llvm.x86.avx10.mask.vcvttpd2uqqs.256" => "__builtin_ia32_vcvttpd2uqqs256_mask", + // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttpd2uqqs.round.512" => "__builtin_ia32_vcvttpd2uqqs512_round_mask", "llvm.x86.avx10.mask.vcvttph2ibs128" => "__builtin_ia32_vcvttph2ibs128_mask", "llvm.x86.avx10.mask.vcvttph2ibs256" => "__builtin_ia32_vcvttph2ibs256_mask", "llvm.x86.avx10.mask.vcvttph2ibs512" => "__builtin_ia32_vcvttph2ibs512_mask", "llvm.x86.avx10.mask.vcvttph2iubs128" => "__builtin_ia32_vcvttph2iubs128_mask", "llvm.x86.avx10.mask.vcvttph2iubs256" => "__builtin_ia32_vcvttph2iubs256_mask", "llvm.x86.avx10.mask.vcvttph2iubs512" => "__builtin_ia32_vcvttph2iubs512_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttph2qq256" => "__builtin_ia32_vcvttph2qq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttph2udq256" => "__builtin_ia32_vcvttph2udq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttph2uqq256" => "__builtin_ia32_vcvttph2uqq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttph2uw256" => "__builtin_ia32_vcvttph2uw256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttph2w256" => "__builtin_ia32_vcvttph2w256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttps2dq256" => "__builtin_ia32_vcvttps2dq256_round_mask", + "llvm.x86.avx10.mask.vcvttps2dqs.128" => "__builtin_ia32_vcvttps2dqs128_mask", + "llvm.x86.avx10.mask.vcvttps2dqs.256" => "__builtin_ia32_vcvttps2dqs256_mask", + // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttps2dqs.round.512" => "__builtin_ia32_vcvttps2dqs512_round_mask", "llvm.x86.avx10.mask.vcvttps2ibs128" => "__builtin_ia32_vcvttps2ibs128_mask", "llvm.x86.avx10.mask.vcvttps2ibs256" => "__builtin_ia32_vcvttps2ibs256_mask", "llvm.x86.avx10.mask.vcvttps2ibs512" => "__builtin_ia32_vcvttps2ibs512_mask", "llvm.x86.avx10.mask.vcvttps2iubs128" => "__builtin_ia32_vcvttps2iubs128_mask", "llvm.x86.avx10.mask.vcvttps2iubs256" => "__builtin_ia32_vcvttps2iubs256_mask", "llvm.x86.avx10.mask.vcvttps2iubs512" => "__builtin_ia32_vcvttps2iubs512_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttps2qq256" => "__builtin_ia32_vcvttps2qq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttps2udq256" => "__builtin_ia32_vcvttps2udq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttps2uqq256" => "__builtin_ia32_vcvttps2uqq256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vfcmaddcph256" => "__builtin_ia32_vfcmaddcph256_round_mask3", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vfcmulcph256" => "__builtin_ia32_vfcmulcph256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vfixupimmpd256" => "__builtin_ia32_vfixupimmpd256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vfixupimmps256" => "__builtin_ia32_vfixupimmps256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vfmaddcph256" => "__builtin_ia32_vfmaddcph256_round_mask3", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vfmulcph256" => "__builtin_ia32_vfmulcph256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vgetexppd256" => "__builtin_ia32_vgetexppd256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vgetexpph256" => "__builtin_ia32_vgetexpph256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vgetexpps256" => "__builtin_ia32_vgetexpps256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vgetmantpd256" => "__builtin_ia32_vgetmantpd256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vgetmantph256" => "__builtin_ia32_vgetmantph256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vgetmantps256" => "__builtin_ia32_vgetmantps256_round_mask", + "llvm.x86.avx10.mask.vcvttps2qqs.128" => "__builtin_ia32_vcvttps2qqs128_mask", + "llvm.x86.avx10.mask.vcvttps2qqs.256" => "__builtin_ia32_vcvttps2qqs256_mask", + // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttps2qqs.round.512" => "__builtin_ia32_vcvttps2qqs512_round_mask", + "llvm.x86.avx10.mask.vcvttps2udqs.128" => "__builtin_ia32_vcvttps2udqs128_mask", + "llvm.x86.avx10.mask.vcvttps2udqs.256" => "__builtin_ia32_vcvttps2udqs256_mask", + // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttps2udqs.round.512" => "__builtin_ia32_vcvttps2udqs512_round_mask", + "llvm.x86.avx10.mask.vcvttps2uqqs.128" => "__builtin_ia32_vcvttps2uqqs128_mask", + "llvm.x86.avx10.mask.vcvttps2uqqs.256" => "__builtin_ia32_vcvttps2uqqs256_mask", + // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vcvttps2uqqs.round.512" => "__builtin_ia32_vcvttps2uqqs512_round_mask", // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vminmaxpd.round" => "__builtin_ia32_vminmaxpd512_round_mask", "llvm.x86.avx10.mask.vminmaxpd128" => "__builtin_ia32_vminmaxpd128_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vminmaxpd256.round" => "__builtin_ia32_vminmaxpd256_round_mask", + "llvm.x86.avx10.mask.vminmaxpd256" => "__builtin_ia32_vminmaxpd256_mask", // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vminmaxph.round" => "__builtin_ia32_vminmaxph512_round_mask", "llvm.x86.avx10.mask.vminmaxph128" => "__builtin_ia32_vminmaxph128_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vminmaxph256.round" => "__builtin_ia32_vminmaxph256_round_mask", + "llvm.x86.avx10.mask.vminmaxph256" => "__builtin_ia32_vminmaxph256_mask", // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vminmaxps.round" => "__builtin_ia32_vminmaxps512_round_mask", "llvm.x86.avx10.mask.vminmaxps128" => "__builtin_ia32_vminmaxps128_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vminmaxps256.round" => "__builtin_ia32_vminmaxps256_round_mask", + "llvm.x86.avx10.mask.vminmaxps256" => "__builtin_ia32_vminmaxps256_mask", // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vminmaxsd.round" => "__builtin_ia32_vminmaxsd_round_mask", // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vminmaxsh.round" => "__builtin_ia32_vminmaxsh_round_mask", // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vminmaxss.round" => "__builtin_ia32_vminmaxss_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vrangepd256" => "__builtin_ia32_vrangepd256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vrangeps256" => "__builtin_ia32_vrangeps256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vreducepd256" => "__builtin_ia32_vreducepd256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vreduceph256" => "__builtin_ia32_vreduceph256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vreduceps256" => "__builtin_ia32_vreduceps256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vrndscalepd256" => "__builtin_ia32_vrndscalepd256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vrndscaleph256" => "__builtin_ia32_vrndscaleph256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vrndscaleps256" => "__builtin_ia32_vrndscaleps256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vscalefpd256" => "__builtin_ia32_vscalefpd256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vscalefph256" => "__builtin_ia32_vscalefph256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.mask.vscalefps256" => "__builtin_ia32_vscalefps256_round_mask", - // [INVALID CONVERSION]: "llvm.x86.avx10.maskz.vfcmaddcph256" => "__builtin_ia32_vfcmaddcph256_round_maskz", - // [INVALID CONVERSION]: "llvm.x86.avx10.maskz.vfixupimmpd256" => "__builtin_ia32_vfixupimmpd256_round_maskz", - // [INVALID CONVERSION]: "llvm.x86.avx10.maskz.vfixupimmps256" => "__builtin_ia32_vfixupimmps256_round_maskz", - // [INVALID CONVERSION]: "llvm.x86.avx10.maskz.vfmaddcph256" => "__builtin_ia32_vfmaddcph256_round_maskz", + "llvm.x86.avx10.vaddbf16128" => "__builtin_ia32_vaddbf16128", + "llvm.x86.avx10.vaddbf16256" => "__builtin_ia32_vaddbf16256", + "llvm.x86.avx10.vaddbf16512" => "__builtin_ia32_vaddbf16512", "llvm.x86.avx10.vaddpd256" => "__builtin_ia32_vaddpd256_round", "llvm.x86.avx10.vaddph256" => "__builtin_ia32_vaddph256_round", "llvm.x86.avx10.vaddps256" => "__builtin_ia32_vaddps256_round", - "llvm.x86.avx10.vcvtne2ph2bf8128" => "__builtin_ia32_vcvtne2ph2bf8_128", - "llvm.x86.avx10.vcvtne2ph2bf8256" => "__builtin_ia32_vcvtne2ph2bf8_256", - "llvm.x86.avx10.vcvtne2ph2bf8512" => "__builtin_ia32_vcvtne2ph2bf8_512", - "llvm.x86.avx10.vcvtne2ph2bf8s128" => "__builtin_ia32_vcvtne2ph2bf8s_128", - "llvm.x86.avx10.vcvtne2ph2bf8s256" => "__builtin_ia32_vcvtne2ph2bf8s_256", - "llvm.x86.avx10.vcvtne2ph2bf8s512" => "__builtin_ia32_vcvtne2ph2bf8s_512", - "llvm.x86.avx10.vcvtne2ph2hf8128" => "__builtin_ia32_vcvtne2ph2hf8_128", - "llvm.x86.avx10.vcvtne2ph2hf8256" => "__builtin_ia32_vcvtne2ph2hf8_256", - "llvm.x86.avx10.vcvtne2ph2hf8512" => "__builtin_ia32_vcvtne2ph2hf8_512", - "llvm.x86.avx10.vcvtne2ph2hf8s128" => "__builtin_ia32_vcvtne2ph2hf8s_128", - "llvm.x86.avx10.vcvtne2ph2hf8s256" => "__builtin_ia32_vcvtne2ph2hf8s_256", - "llvm.x86.avx10.vcvtne2ph2hf8s512" => "__builtin_ia32_vcvtne2ph2hf8s_512", - "llvm.x86.avx10.vcvtnebf162ibs128" => "__builtin_ia32_vcvtnebf162ibs128", - "llvm.x86.avx10.vcvtnebf162ibs256" => "__builtin_ia32_vcvtnebf162ibs256", - "llvm.x86.avx10.vcvtnebf162ibs512" => "__builtin_ia32_vcvtnebf162ibs512", - "llvm.x86.avx10.vcvtnebf162iubs128" => "__builtin_ia32_vcvtnebf162iubs128", - "llvm.x86.avx10.vcvtnebf162iubs256" => "__builtin_ia32_vcvtnebf162iubs256", - "llvm.x86.avx10.vcvtnebf162iubs512" => "__builtin_ia32_vcvtnebf162iubs512", - "llvm.x86.avx10.vcvttnebf162ibs128" => "__builtin_ia32_vcvttnebf162ibs128", - "llvm.x86.avx10.vcvttnebf162ibs256" => "__builtin_ia32_vcvttnebf162ibs256", - "llvm.x86.avx10.vcvttnebf162ibs512" => "__builtin_ia32_vcvttnebf162ibs512", - "llvm.x86.avx10.vcvttnebf162iubs128" => "__builtin_ia32_vcvttnebf162iubs128", - "llvm.x86.avx10.vcvttnebf162iubs256" => "__builtin_ia32_vcvttnebf162iubs256", - "llvm.x86.avx10.vcvttnebf162iubs512" => "__builtin_ia32_vcvttnebf162iubs512", - "llvm.x86.avx10.vdivpd256" => "__builtin_ia32_vdivpd256_round", - "llvm.x86.avx10.vdivph256" => "__builtin_ia32_vdivph256_round", - "llvm.x86.avx10.vdivps256" => "__builtin_ia32_vdivps256_round", + "llvm.x86.avx10.vcomisbf16eq" => "__builtin_ia32_vcomisbf16eq", + "llvm.x86.avx10.vcomisbf16ge" => "__builtin_ia32_vcomisbf16ge", + "llvm.x86.avx10.vcomisbf16gt" => "__builtin_ia32_vcomisbf16gt", + "llvm.x86.avx10.vcomisbf16le" => "__builtin_ia32_vcomisbf16le", + "llvm.x86.avx10.vcomisbf16lt" => "__builtin_ia32_vcomisbf16lt", + "llvm.x86.avx10.vcomisbf16neq" => "__builtin_ia32_vcomisbf16neq", + "llvm.x86.avx10.vcvt2ph2bf8128" => "__builtin_ia32_vcvt2ph2bf8_128", + "llvm.x86.avx10.vcvt2ph2bf8256" => "__builtin_ia32_vcvt2ph2bf8_256", + "llvm.x86.avx10.vcvt2ph2bf8512" => "__builtin_ia32_vcvt2ph2bf8_512", + "llvm.x86.avx10.vcvt2ph2bf8s128" => "__builtin_ia32_vcvt2ph2bf8s_128", + "llvm.x86.avx10.vcvt2ph2bf8s256" => "__builtin_ia32_vcvt2ph2bf8s_256", + "llvm.x86.avx10.vcvt2ph2bf8s512" => "__builtin_ia32_vcvt2ph2bf8s_512", + "llvm.x86.avx10.vcvt2ph2hf8128" => "__builtin_ia32_vcvt2ph2hf8_128", + "llvm.x86.avx10.vcvt2ph2hf8256" => "__builtin_ia32_vcvt2ph2hf8_256", + "llvm.x86.avx10.vcvt2ph2hf8512" => "__builtin_ia32_vcvt2ph2hf8_512", + "llvm.x86.avx10.vcvt2ph2hf8s128" => "__builtin_ia32_vcvt2ph2hf8s_128", + "llvm.x86.avx10.vcvt2ph2hf8s256" => "__builtin_ia32_vcvt2ph2hf8s_256", + "llvm.x86.avx10.vcvt2ph2hf8s512" => "__builtin_ia32_vcvt2ph2hf8s_512", + "llvm.x86.avx10.vcvtbf162ibs128" => "__builtin_ia32_vcvtbf162ibs128", + "llvm.x86.avx10.vcvtbf162ibs256" => "__builtin_ia32_vcvtbf162ibs256", + "llvm.x86.avx10.vcvtbf162ibs512" => "__builtin_ia32_vcvtbf162ibs512", + "llvm.x86.avx10.vcvtbf162iubs128" => "__builtin_ia32_vcvtbf162iubs128", + "llvm.x86.avx10.vcvtbf162iubs256" => "__builtin_ia32_vcvtbf162iubs256", + "llvm.x86.avx10.vcvtbf162iubs512" => "__builtin_ia32_vcvtbf162iubs512", + "llvm.x86.avx10.vcvttbf162ibs128" => "__builtin_ia32_vcvttbf162ibs128", + "llvm.x86.avx10.vcvttbf162ibs256" => "__builtin_ia32_vcvttbf162ibs256", + "llvm.x86.avx10.vcvttbf162ibs512" => "__builtin_ia32_vcvttbf162ibs512", + "llvm.x86.avx10.vcvttbf162iubs128" => "__builtin_ia32_vcvttbf162iubs128", + "llvm.x86.avx10.vcvttbf162iubs256" => "__builtin_ia32_vcvttbf162iubs256", + "llvm.x86.avx10.vcvttbf162iubs512" => "__builtin_ia32_vcvttbf162iubs512", + "llvm.x86.avx10.vcvttsd2sis" => "__builtin_ia32_vcvttsd2sis32", + "llvm.x86.avx10.vcvttsd2sis64" => "__builtin_ia32_vcvttsd2sis64", + "llvm.x86.avx10.vcvttsd2usis" => "__builtin_ia32_vcvttsd2usis32", + "llvm.x86.avx10.vcvttsd2usis64" => "__builtin_ia32_vcvttsd2usis64", + "llvm.x86.avx10.vcvttss2sis" => "__builtin_ia32_vcvttss2sis32", + "llvm.x86.avx10.vcvttss2sis64" => "__builtin_ia32_vcvttss2sis64", + "llvm.x86.avx10.vcvttss2usis" => "__builtin_ia32_vcvttss2usis32", + "llvm.x86.avx10.vcvttss2usis64" => "__builtin_ia32_vcvttss2usis64", + "llvm.x86.avx10.vdivbf16128" => "__builtin_ia32_vdivbf16128", + "llvm.x86.avx10.vdivbf16256" => "__builtin_ia32_vdivbf16256", + "llvm.x86.avx10.vdivbf16512" => "__builtin_ia32_vdivbf16512", "llvm.x86.avx10.vdpphps.128" => "__builtin_ia32_vdpphps128", "llvm.x86.avx10.vdpphps.256" => "__builtin_ia32_vdpphps256", "llvm.x86.avx10.vdpphps.512" => "__builtin_ia32_vdpphps512", - "llvm.x86.avx10.vfmaddsubpd256" => "__builtin_ia32_vfmaddsubpd256_round", - "llvm.x86.avx10.vfmaddsubph256" => "__builtin_ia32_vfmaddsubph256_round", - "llvm.x86.avx10.vfmaddsubps256" => "__builtin_ia32_vfmaddsubps256_round", - "llvm.x86.avx10.vmaxpd256" => "__builtin_ia32_vmaxpd256_round", - "llvm.x86.avx10.vmaxph256" => "__builtin_ia32_vmaxph256_round", - "llvm.x86.avx10.vmaxps256" => "__builtin_ia32_vmaxps256_round", - "llvm.x86.avx10.vminmaxnepbf16128" => "__builtin_ia32_vminmaxnepbf16128", - "llvm.x86.avx10.vminmaxnepbf16256" => "__builtin_ia32_vminmaxnepbf16256", - "llvm.x86.avx10.vminmaxnepbf16512" => "__builtin_ia32_vminmaxnepbf16512", + "llvm.x86.avx10.vfmadd132bf16128" => "__builtin_ia32_vfmadd132bf16128", + "llvm.x86.avx10.vfmadd132bf16256" => "__builtin_ia32_vfmadd132bf16256", + "llvm.x86.avx10.vfmadd132bf16512" => "__builtin_ia32_vfmadd132bf16512", + "llvm.x86.avx10.vfmadd213bf16128" => "__builtin_ia32_vfmadd213bf16128", + "llvm.x86.avx10.vfmadd213bf16256" => "__builtin_ia32_vfmadd213bf16256", + "llvm.x86.avx10.vfmadd231bf16128" => "__builtin_ia32_vfmadd231bf16128", + "llvm.x86.avx10.vfmadd231bf16256" => "__builtin_ia32_vfmadd231bf16256", + "llvm.x86.avx10.vfmadd231bf16512" => "__builtin_ia32_vfmadd231bf16512", + "llvm.x86.avx10.vfmsub132bf16128" => "__builtin_ia32_vfmsub132bf16128", + "llvm.x86.avx10.vfmsub132bf16256" => "__builtin_ia32_vfmsub132bf16256", + "llvm.x86.avx10.vfmsub132bf16512" => "__builtin_ia32_vfmsub132bf16512", + "llvm.x86.avx10.vfmsub213bf16128" => "__builtin_ia32_vfmsub213bf16128", + "llvm.x86.avx10.vfmsub213bf16256" => "__builtin_ia32_vfmsub213bf16256", + "llvm.x86.avx10.vfmsub213bf16512" => "__builtin_ia32_vfmsub213bf16512", + "llvm.x86.avx10.vfmsub231bf16128" => "__builtin_ia32_vfmsub231bf16128", + "llvm.x86.avx10.vfmsub231bf16256" => "__builtin_ia32_vfmsub231bf16256", + "llvm.x86.avx10.vfmsub231bf16512" => "__builtin_ia32_vfmsub231bf16512", + "llvm.x86.avx10.vfnmadd132bf16128" => "__builtin_ia32_vfnmadd132bf16128", + "llvm.x86.avx10.vfnmadd132bf16256" => "__builtin_ia32_vfnmadd132bf16256", + "llvm.x86.avx10.vfnmadd132bf16512" => "__builtin_ia32_vfnmadd132bf16512", + "llvm.x86.avx10.vfnmadd213bf16128" => "__builtin_ia32_vfnmadd213bf16128", + "llvm.x86.avx10.vfnmadd213bf16256" => "__builtin_ia32_vfnmadd213bf16256", + "llvm.x86.avx10.vfnmadd213bf16512" => "__builtin_ia32_vfnmadd213bf16512", + "llvm.x86.avx10.vfnmadd231bf16128" => "__builtin_ia32_vfnmadd231bf16128", + "llvm.x86.avx10.vfnmadd231bf16256" => "__builtin_ia32_vfnmadd231bf16256", + "llvm.x86.avx10.vfnmadd231bf16512" => "__builtin_ia32_vfnmadd231bf16512", + "llvm.x86.avx10.vfnmsub132bf16128" => "__builtin_ia32_vfnmsub132bf16128", + "llvm.x86.avx10.vfnmsub132bf16256" => "__builtin_ia32_vfnmsub132bf16256", + "llvm.x86.avx10.vfnmsub132bf16512" => "__builtin_ia32_vfnmsub132bf16512", + "llvm.x86.avx10.vfnmsub213bf16128" => "__builtin_ia32_vfnmsub213bf16128", + "llvm.x86.avx10.vfnmsub213bf16256" => "__builtin_ia32_vfnmsub213bf16256", + "llvm.x86.avx10.vfnmsub213bf16512" => "__builtin_ia32_vfnmsub213bf16512", + "llvm.x86.avx10.vfnmsub231bf16128" => "__builtin_ia32_vfnmsub231bf16128", + "llvm.x86.avx10.vfnmsub231bf16256" => "__builtin_ia32_vfnmsub231bf16256", + "llvm.x86.avx10.vfnmsub231bf16512" => "__builtin_ia32_vfnmsub231bf16512", + "llvm.x86.avx10.vmaxbf16128" => "__builtin_ia32_vmaxbf16128", + "llvm.x86.avx10.vmaxbf16256" => "__builtin_ia32_vmaxbf16256", + "llvm.x86.avx10.vmaxbf16512" => "__builtin_ia32_vmaxbf16512", + "llvm.x86.avx10.vminbf16128" => "__builtin_ia32_vminbf16128", + "llvm.x86.avx10.vminbf16256" => "__builtin_ia32_vminbf16256", + "llvm.x86.avx10.vminbf16512" => "__builtin_ia32_vminbf16512", + "llvm.x86.avx10.vminmaxbf16128" => "__builtin_ia32_vminmaxbf16128", + "llvm.x86.avx10.vminmaxbf16256" => "__builtin_ia32_vminmaxbf16256", + "llvm.x86.avx10.vminmaxbf16512" => "__builtin_ia32_vminmaxbf16512", "llvm.x86.avx10.vminmaxpd128" => "__builtin_ia32_vminmaxpd128", "llvm.x86.avx10.vminmaxpd256" => "__builtin_ia32_vminmaxpd256", "llvm.x86.avx10.vminmaxph128" => "__builtin_ia32_vminmaxph128", "llvm.x86.avx10.vminmaxph256" => "__builtin_ia32_vminmaxph256", "llvm.x86.avx10.vminmaxps128" => "__builtin_ia32_vminmaxps128", "llvm.x86.avx10.vminmaxps256" => "__builtin_ia32_vminmaxps256", - "llvm.x86.avx10.vminpd256" => "__builtin_ia32_vminpd256_round", - "llvm.x86.avx10.vminph256" => "__builtin_ia32_vminph256_round", - "llvm.x86.avx10.vminps256" => "__builtin_ia32_vminps256_round", + "llvm.x86.avx10.vmovrsb128" => "__builtin_ia32_vmovrsb128", + "llvm.x86.avx10.vmovrsb256" => "__builtin_ia32_vmovrsb256", + "llvm.x86.avx10.vmovrsb512" => "__builtin_ia32_vmovrsb512", + "llvm.x86.avx10.vmovrsd128" => "__builtin_ia32_vmovrsd128", + "llvm.x86.avx10.vmovrsd256" => "__builtin_ia32_vmovrsd256", + "llvm.x86.avx10.vmovrsd512" => "__builtin_ia32_vmovrsd512", + "llvm.x86.avx10.vmovrsq128" => "__builtin_ia32_vmovrsq128", + "llvm.x86.avx10.vmovrsq256" => "__builtin_ia32_vmovrsq256", + "llvm.x86.avx10.vmovrsq512" => "__builtin_ia32_vmovrsq512", + "llvm.x86.avx10.vmovrsw128" => "__builtin_ia32_vmovrsw128", + "llvm.x86.avx10.vmovrsw256" => "__builtin_ia32_vmovrsw256", + "llvm.x86.avx10.vmovrsw512" => "__builtin_ia32_vmovrsw512", "llvm.x86.avx10.vmpsadbw.512" => "__builtin_ia32_mpsadbw512", - "llvm.x86.avx10.vmulpd256" => "__builtin_ia32_vmulpd256_round", - "llvm.x86.avx10.vmulph256" => "__builtin_ia32_vmulph256_round", - "llvm.x86.avx10.vmulps256" => "__builtin_ia32_vmulps256_round", + "llvm.x86.avx10.vmulbf16128" => "__builtin_ia32_vmulbf16128", + "llvm.x86.avx10.vmulbf16256" => "__builtin_ia32_vmulbf16256", + "llvm.x86.avx10.vmulbf16512" => "__builtin_ia32_vmulbf16512", "llvm.x86.avx10.vpdpbssd.512" => "__builtin_ia32_vpdpbssd512", "llvm.x86.avx10.vpdpbssds.512" => "__builtin_ia32_vpdpbssds512", "llvm.x86.avx10.vpdpbsud.512" => "__builtin_ia32_vpdpbsud512", @@ -7523,12 +7751,9 @@ match name { "llvm.x86.avx10.vpdpwusds.512" => "__builtin_ia32_vpdpwusds512", "llvm.x86.avx10.vpdpwuud.512" => "__builtin_ia32_vpdpwuud512", "llvm.x86.avx10.vpdpwuuds.512" => "__builtin_ia32_vpdpwuuds512", - "llvm.x86.avx10.vsqrtpd256" => "__builtin_ia32_vsqrtpd256_round", - "llvm.x86.avx10.vsqrtph256" => "__builtin_ia32_vsqrtph256_round", - "llvm.x86.avx10.vsqrtps256" => "__builtin_ia32_vsqrtps256_round", - "llvm.x86.avx10.vsubpd256" => "__builtin_ia32_vsubpd256_round", - "llvm.x86.avx10.vsubph256" => "__builtin_ia32_vsubph256_round", - "llvm.x86.avx10.vsubps256" => "__builtin_ia32_vsubps256_round", + "llvm.x86.avx10.vsubbf16128" => "__builtin_ia32_vsubbf16128", + "llvm.x86.avx10.vsubbf16256" => "__builtin_ia32_vsubbf16256", + "llvm.x86.avx10.vsubbf16512" => "__builtin_ia32_vsubbf16512", "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gatherd_d", "llvm.x86.avx2.gather.d.d.256" => "__builtin_ia32_gatherd_d256", "llvm.x86.avx2.gather.d.pd" => "__builtin_ia32_gatherd_pd", @@ -9279,10 +9504,15 @@ match name { "llvm.x86.mmx.femms" => "__builtin_ia32_femms", "llvm.x86.monitorx" => "__builtin_ia32_monitorx", "llvm.x86.movdir64b" => "__builtin_ia32_movdir64b", + "llvm.x86.movrsdi" => "__builtin_ia32_movrsdi", + "llvm.x86.movrshi" => "__builtin_ia32_movrshi", + "llvm.x86.movrsqi" => "__builtin_ia32_movrsqi", + "llvm.x86.movrssi" => "__builtin_ia32_movrssi", "llvm.x86.mwaitx" => "__builtin_ia32_mwaitx", "llvm.x86.pclmulqdq" => "__builtin_ia32_pclmulqdq128", "llvm.x86.pclmulqdq.256" => "__builtin_ia32_pclmulqdq256", "llvm.x86.pclmulqdq.512" => "__builtin_ia32_pclmulqdq512", + "llvm.x86.prefetchrs" => "__builtin_ia32_prefetchrs", "llvm.x86.ptwrite32" => "__builtin_ia32_ptwrite32", "llvm.x86.ptwrite64" => "__builtin_ia32_ptwrite64", "llvm.x86.rdfsbase.32" => "__builtin_ia32_rdfsbase32", @@ -9536,14 +9766,40 @@ match name { "llvm.x86.stui" => "__builtin_ia32_stui", "llvm.x86.subborrow.u32" => "__builtin_ia32_subborrow_u32", "llvm.x86.subborrow.u64" => "__builtin_ia32_subborrow_u64", + "llvm.x86.t2rpntlvwz0" => "__builtin_ia32_t2rpntlvwz0", + "llvm.x86.t2rpntlvwz0rs" => "__builtin_ia32_t2rpntlvwz0rs", + "llvm.x86.t2rpntlvwz0rst1" => "__builtin_ia32_t2rpntlvwz0rst1", + "llvm.x86.t2rpntlvwz0t1" => "__builtin_ia32_t2rpntlvwz0t1", + "llvm.x86.t2rpntlvwz1" => "__builtin_ia32_t2rpntlvwz1", + "llvm.x86.t2rpntlvwz1rs" => "__builtin_ia32_t2rpntlvwz1rs", + "llvm.x86.t2rpntlvwz1rst1" => "__builtin_ia32_t2rpntlvwz1rst1", + "llvm.x86.t2rpntlvwz1t1" => "__builtin_ia32_t2rpntlvwz1t1", "llvm.x86.tbm.bextri.u32" => "__builtin_ia32_bextri_u32", "llvm.x86.tbm.bextri.u64" => "__builtin_ia32_bextri_u64", "llvm.x86.tcmmimfp16ps" => "__builtin_ia32_tcmmimfp16ps", "llvm.x86.tcmmimfp16ps.internal" => "__builtin_ia32_tcmmimfp16ps_internal", "llvm.x86.tcmmrlfp16ps" => "__builtin_ia32_tcmmrlfp16ps", "llvm.x86.tcmmrlfp16ps.internal" => "__builtin_ia32_tcmmrlfp16ps_internal", + "llvm.x86.tconjtcmmimfp16ps" => "__builtin_ia32_tconjtcmmimfp16ps", + "llvm.x86.tconjtcmmimfp16ps.internal" => "__builtin_ia32_tconjtcmmimfp16ps_internal", + "llvm.x86.tconjtfp16" => "__builtin_ia32_tconjtfp16", + "llvm.x86.tconjtfp16.internal" => "__builtin_ia32_tconjtfp16_internal", + "llvm.x86.tcvtrowd2ps" => "__builtin_ia32_tcvtrowd2ps", + "llvm.x86.tcvtrowd2ps.internal" => "__builtin_ia32_tcvtrowd2ps_internal", + "llvm.x86.tcvtrowps2bf16h" => "__builtin_ia32_tcvtrowps2bf16h", + "llvm.x86.tcvtrowps2bf16h.internal" => "__builtin_ia32_tcvtrowps2bf16h_internal", + "llvm.x86.tcvtrowps2bf16l" => "__builtin_ia32_tcvtrowps2bf16l", + "llvm.x86.tcvtrowps2bf16l.internal" => "__builtin_ia32_tcvtrowps2bf16l_internal", + "llvm.x86.tcvtrowps2phh" => "__builtin_ia32_tcvtrowps2phh", + "llvm.x86.tcvtrowps2phh.internal" => "__builtin_ia32_tcvtrowps2phh_internal", + "llvm.x86.tcvtrowps2phl" => "__builtin_ia32_tcvtrowps2phl", + "llvm.x86.tcvtrowps2phl.internal" => "__builtin_ia32_tcvtrowps2phl_internal", "llvm.x86.tdpbf16ps" => "__builtin_ia32_tdpbf16ps", "llvm.x86.tdpbf16ps.internal" => "__builtin_ia32_tdpbf16ps_internal", + "llvm.x86.tdpbf8ps" => "__builtin_ia32_tdpbf8ps", + "llvm.x86.tdpbf8ps.internal" => "__builtin_ia32_tdpbf8ps_internal", + "llvm.x86.tdpbhf8ps" => "__builtin_ia32_tdpbhf8ps", + "llvm.x86.tdpbhf8ps.internal" => "__builtin_ia32_tdpbhf8ps_internal", "llvm.x86.tdpbssd" => "__builtin_ia32_tdpbssd", "llvm.x86.tdpbssd.internal" => "__builtin_ia32_tdpbssd_internal", "llvm.x86.tdpbsud" => "__builtin_ia32_tdpbsud", @@ -9554,17 +9810,41 @@ match name { "llvm.x86.tdpbuud.internal" => "__builtin_ia32_tdpbuud_internal", "llvm.x86.tdpfp16ps" => "__builtin_ia32_tdpfp16ps", "llvm.x86.tdpfp16ps.internal" => "__builtin_ia32_tdpfp16ps_internal", + "llvm.x86.tdphbf8ps" => "__builtin_ia32_tdphbf8ps", + "llvm.x86.tdphbf8ps.internal" => "__builtin_ia32_tdphbf8ps_internal", + "llvm.x86.tdphf8ps" => "__builtin_ia32_tdphf8ps", + "llvm.x86.tdphf8ps.internal" => "__builtin_ia32_tdphf8ps_internal", "llvm.x86.testui" => "__builtin_ia32_testui", "llvm.x86.tileloadd64" => "__builtin_ia32_tileloadd64", "llvm.x86.tileloadd64.internal" => "__builtin_ia32_tileloadd64_internal", + "llvm.x86.tileloaddrs64" => "__builtin_ia32_tileloaddrs64", + "llvm.x86.tileloaddrs64.internal" => "__builtin_ia32_tileloaddrs64_internal", + "llvm.x86.tileloaddrst164" => "__builtin_ia32_tileloaddrst164", + "llvm.x86.tileloaddrst164.internal" => "__builtin_ia32_tileloaddrst164_internal", "llvm.x86.tileloaddt164" => "__builtin_ia32_tileloaddt164", "llvm.x86.tileloaddt164.internal" => "__builtin_ia32_tileloaddt164_internal", + "llvm.x86.tilemovrow" => "__builtin_ia32_tilemovrow", + "llvm.x86.tilemovrow.internal" => "__builtin_ia32_tilemovrow_internal", "llvm.x86.tilerelease" => "__builtin_ia32_tilerelease", "llvm.x86.tilestored64" => "__builtin_ia32_tilestored64", "llvm.x86.tilestored64.internal" => "__builtin_ia32_tilestored64_internal", "llvm.x86.tilezero" => "__builtin_ia32_tilezero", "llvm.x86.tilezero.internal" => "__builtin_ia32_tilezero_internal", + "llvm.x86.tmmultf32ps" => "__builtin_ia32_tmmultf32ps", + "llvm.x86.tmmultf32ps.internal" => "__builtin_ia32_tmmultf32ps_internal", "llvm.x86.tpause" => "__builtin_ia32_tpause", + "llvm.x86.ttcmmimfp16ps" => "__builtin_ia32_ttcmmimfp16ps", + "llvm.x86.ttcmmimfp16ps.internal" => "__builtin_ia32_ttcmmimfp16ps_internal", + "llvm.x86.ttcmmrlfp16ps" => "__builtin_ia32_ttcmmrlfp16ps", + "llvm.x86.ttcmmrlfp16ps.internal" => "__builtin_ia32_ttcmmrlfp16ps_internal", + "llvm.x86.ttdpbf16ps" => "__builtin_ia32_ttdpbf16ps", + "llvm.x86.ttdpbf16ps.internal" => "__builtin_ia32_ttdpbf16ps_internal", + "llvm.x86.ttdpfp16ps" => "__builtin_ia32_ttdpfp16ps", + "llvm.x86.ttdpfp16ps.internal" => "__builtin_ia32_ttdpfp16ps_internal", + "llvm.x86.ttmmultf32ps" => "__builtin_ia32_ttmmultf32ps", + "llvm.x86.ttmmultf32ps.internal" => "__builtin_ia32_ttmmultf32ps_internal", + "llvm.x86.ttransposed" => "__builtin_ia32_ttransposed", + "llvm.x86.ttransposed.internal" => "__builtin_ia32_ttransposed_internal", "llvm.x86.umonitor" => "__builtin_ia32_umonitor", "llvm.x86.umwait" => "__builtin_ia32_umwait", "llvm.x86.urdmsr" => "__builtin_ia32_urdmsr", @@ -9604,8 +9884,10 @@ match name { "llvm.x86.vsm3rnds2" => "__builtin_ia32_vsm3rnds2", "llvm.x86.vsm4key4128" => "__builtin_ia32_vsm4key4128", "llvm.x86.vsm4key4256" => "__builtin_ia32_vsm4key4256", + "llvm.x86.vsm4key4512" => "__builtin_ia32_vsm4key4512", "llvm.x86.vsm4rnds4128" => "__builtin_ia32_vsm4rnds4128", "llvm.x86.vsm4rnds4256" => "__builtin_ia32_vsm4rnds4256", + "llvm.x86.vsm4rnds4512" => "__builtin_ia32_vsm4rnds4512", "llvm.x86.wbinvd" => "__builtin_ia32_wbinvd", "llvm.x86.wbnoinvd" => "__builtin_ia32_wbnoinvd", "llvm.x86.wrfsbase.32" => "__builtin_ia32_wrfsbase32", diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs index 9caceca9295..ba65c8205a5 100644 --- a/src/intrinsic/mod.rs +++ b/src/intrinsic/mod.rs @@ -4,9 +4,7 @@ mod simd; #[cfg(feature = "master")] use std::iter; -#[cfg(feature = "master")] -use gccjit::FunctionType; -use gccjit::{ComparisonOp, Function, RValue, ToRValue, Type, UnaryOp}; +use gccjit::{ComparisonOp, Function, FunctionType, RValue, ToRValue, Type, UnaryOp}; #[cfg(feature = "master")] use rustc_abi::ExternAbi; use rustc_abi::{BackendRepr, HasDataLayout}; @@ -132,6 +130,72 @@ fn get_simple_intrinsic<'gcc, 'tcx>( Some(cx.context.get_builtin_function(gcc_name)) } +// TODO(antoyo): We can probably remove these and use the fallback intrinsic implementation. +fn get_simple_function<'gcc, 'tcx>( + cx: &CodegenCx<'gcc, 'tcx>, + name: Symbol, +) -> Option> { + let (return_type, parameters, func_name) = match name { + sym::minimumf32 => { + let parameters = [ + cx.context.new_parameter(None, cx.float_type, "a"), + cx.context.new_parameter(None, cx.float_type, "b"), + ]; + (cx.float_type, parameters, "fminimumf") + } + sym::minimumf64 => { + let parameters = [ + cx.context.new_parameter(None, cx.double_type, "a"), + cx.context.new_parameter(None, cx.double_type, "b"), + ]; + (cx.double_type, parameters, "fminimum") + } + sym::minimumf128 => { + let f128_type = cx.type_f128(); + // GCC doesn't have the intrinsic we want so we use the compiler-builtins one + // https://docs.rs/compiler_builtins/latest/compiler_builtins/math/full_availability/fn.fminimumf128.html + let parameters = [ + cx.context.new_parameter(None, f128_type, "a"), + cx.context.new_parameter(None, f128_type, "b"), + ]; + (f128_type, parameters, "fminimumf128") + } + sym::maximumf32 => { + let parameters = [ + cx.context.new_parameter(None, cx.float_type, "a"), + cx.context.new_parameter(None, cx.float_type, "b"), + ]; + (cx.float_type, parameters, "fmaximumf") + } + sym::maximumf64 => { + let parameters = [ + cx.context.new_parameter(None, cx.double_type, "a"), + cx.context.new_parameter(None, cx.double_type, "b"), + ]; + (cx.double_type, parameters, "fmaximum") + } + sym::maximumf128 => { + let f128_type = cx.type_f128(); + // GCC doesn't have the intrinsic we want so we use the compiler-builtins one + // https://docs.rs/compiler_builtins/latest/compiler_builtins/math/full_availability/fn.fmaximumf128.html + let parameters = [ + cx.context.new_parameter(None, f128_type, "a"), + cx.context.new_parameter(None, f128_type, "b"), + ]; + (f128_type, parameters, "fmaximumf128") + } + _ => return None, + }; + Some(cx.context.new_function( + None, + FunctionType::Extern, + return_type, + ¶meters, + func_name, + false, + )) +} + impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { fn codegen_intrinsic_call( &mut self, @@ -160,6 +224,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout); let simple = get_simple_intrinsic(self, name); + let simple_func = get_simple_function(self, name); // FIXME(tempdragon): Re-enable `clippy::suspicious_else_formatting` if the following issue is solved: // https://github.com/rust-lang/rust-clippy/issues/12497 @@ -167,7 +232,15 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc #[allow(clippy::suspicious_else_formatting)] let value = match name { _ if simple.is_some() => { - let func = simple.expect("simple function"); + let func = simple.expect("simple intrinsic function"); + self.cx.context.new_call( + self.location, + func, + &args.iter().map(|arg| arg.immediate()).collect::>(), + ) + } + _ if simple_func.is_some() => { + let func = simple_func.expect("simple function"); self.cx.context.new_call( self.location, func, diff --git a/src/lib.rs b/src/lib.rs index 2c5a7871683..6e2a50d745a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,7 +16,7 @@ #![allow(internal_features)] #![doc(rust_logo)] #![feature(rustdoc_internals)] -#![feature(rustc_private, decl_macro, never_type, trusted_len, let_chains)] +#![feature(rustc_private, decl_macro, never_type, trusted_len)] #![allow(broken_intra_doc_links)] #![recursion_limit = "256"] #![warn(rust_2018_idioms)] @@ -454,7 +454,7 @@ impl WriteBackendMethods for GccCodegenBackend { } /// This is the entrypoint for a hot plugged rustc_codegen_gccjit -#[no_mangle] +#[unsafe(no_mangle)] pub fn __rustc_codegen_backend() -> Box { #[cfg(feature = "master")] let info = { diff --git a/tests/failing-ui-tests.txt b/tests/failing-ui-tests.txt index 499c1a96231..0a01a661c35 100644 --- a/tests/failing-ui-tests.txt +++ b/tests/failing-ui-tests.txt @@ -10,7 +10,7 @@ tests/ui/sepcomp/sepcomp-fns-backwards.rs tests/ui/sepcomp/sepcomp-fns.rs tests/ui/sepcomp/sepcomp-statics.rs tests/ui/asm/x86_64/may_unwind.rs -tests/ui/catch-unwind-bang.rs +tests/ui/panics/catch-unwind-bang.rs tests/ui/drop/dynamic-drop-async.rs tests/ui/cfg/cfg-panic-abort.rs tests/ui/drop/repeat-drop.rs @@ -94,23 +94,14 @@ tests/ui/simd/intrinsic/generic-as.rs tests/ui/backtrace/backtrace.rs tests/ui/lifetimes/tail-expr-lock-poisoning.rs tests/ui/runtime/rt-explody-panic-payloads.rs -tests/ui/codegen/equal-pointers-unequal/as-cast/function.rs -tests/ui/codegen/equal-pointers-unequal/as-cast/basic.rs tests/ui/codegen/equal-pointers-unequal/as-cast/inline1.rs -tests/ui/codegen/equal-pointers-unequal/as-cast/print.rs tests/ui/codegen/equal-pointers-unequal/as-cast/inline2.rs tests/ui/codegen/equal-pointers-unequal/as-cast/segfault.rs -tests/ui/codegen/equal-pointers-unequal/exposed-provenance/function.rs -tests/ui/codegen/equal-pointers-unequal/exposed-provenance/basic.rs tests/ui/codegen/equal-pointers-unequal/as-cast/zero.rs tests/ui/codegen/equal-pointers-unequal/exposed-provenance/inline1.rs -tests/ui/codegen/equal-pointers-unequal/exposed-provenance/print.rs tests/ui/codegen/equal-pointers-unequal/exposed-provenance/inline2.rs tests/ui/codegen/equal-pointers-unequal/exposed-provenance/segfault.rs tests/ui/codegen/equal-pointers-unequal/exposed-provenance/zero.rs -tests/ui/codegen/equal-pointers-unequal/strict-provenance/basic.rs -tests/ui/codegen/equal-pointers-unequal/strict-provenance/function.rs -tests/ui/codegen/equal-pointers-unequal/strict-provenance/print.rs tests/ui/codegen/equal-pointers-unequal/strict-provenance/inline1.rs tests/ui/codegen/equal-pointers-unequal/strict-provenance/inline2.rs tests/ui/codegen/equal-pointers-unequal/strict-provenance/segfault.rs diff --git a/tests/lang_tests_common.rs b/tests/lang_tests_common.rs index d5a0d71c4b2..bdcf14b4b26 100644 --- a/tests/lang_tests_common.rs +++ b/tests/lang_tests_common.rs @@ -42,7 +42,9 @@ pub fn main_inner(profile: Profile) { .expect("failed to get absolute path of `gcc-path`") .display() .to_string(); - env::set_var("LD_LIBRARY_PATH", gcc_path); + unsafe { + env::set_var("LD_LIBRARY_PATH", gcc_path); + } fn rust_filter(path: &Path) -> bool { path.is_file() && path.extension().expect("extension").to_str().expect("to_str") == "rs" @@ -67,15 +69,14 @@ pub fn main_inner(profile: Profile) { .test_dir("tests/run") .test_path_filter(filter) .test_extract(|path| { - let lines = std::fs::read_to_string(path) + std::fs::read_to_string(path) .expect("read file") .lines() .skip_while(|l| !l.starts_with("//")) .take_while(|l| l.starts_with("//")) .map(|l| &l[2..]) .collect::>() - .join("\n"); - lines + .join("\n") }) .test_cmds(move |path| { // Test command 1: Compile `x.rs` into `tempdir/x`. diff --git a/tests/run/always_inline.rs b/tests/run/always_inline.rs new file mode 100644 index 00000000000..ebd741ee090 --- /dev/null +++ b/tests/run/always_inline.rs @@ -0,0 +1,53 @@ +// Compiler: +// +// Run-time: +// status: 0 + +#![feature(no_core)] +#![no_std] +#![no_core] +#![no_main] + +extern crate mini_core; +use mini_core::*; + +#[inline(always)] +fn fib(n: u8) -> u8 { + if n == 0 { + return 1; + } + if n == 1 { + return 1; + } + fib(n - 1) + fib(n - 2) +} + +#[inline(always)] +fn fib_b(n: u8) -> u8 { + if n == 0 { + return 1; + } + if n == 1 { + return 1; + } + fib_a(n - 1) + fib_a(n - 2) +} + +#[inline(always)] +fn fib_a(n: u8) -> u8 { + if n == 0 { + return 1; + } + if n == 1 { + return 1; + } + fib_b(n - 1) + fib_b(n - 2) +} + +#[no_mangle] +extern "C" fn main(argc: i32, _argv: *const *const u8) -> i32 { + if fib(2) != fib_a(2) { + intrinsics::abort(); + } + 0 +} diff --git a/tests/run/switchint_128bit.rs b/tests/run/switchint_128bit.rs new file mode 100644 index 00000000000..decae5bfcd7 --- /dev/null +++ b/tests/run/switchint_128bit.rs @@ -0,0 +1,37 @@ +// Compiler: +// +// Run-time: +// status: 0 + +#![feature(no_core)] +#![no_std] +#![no_core] +#![no_main] + +extern crate mini_core; +use intrinsics::black_box; +use mini_core::*; + +#[no_mangle] +extern "C" fn main(argc: i32, _argv: *const *const u8) -> i32 { + // 1st. Check that small 128 bit values work. + let val = black_box(64_u128); + match val { + 0 => return 1, + 1 => return 2, + 64 => (), + _ => return 3, + } + // 2nd check that *large* values work. + const BIG: u128 = 0xDEAD_C0FE_BEEF_DECAF_BADD_DECAF_BEEF_u128; + let val = black_box(BIG); + match val { + 0 => return 4, + 1 => return 5, + // Check that we will not match on the lower u64, if the upper qword is different! + 0xcafbadddecafbeef => return 6, + 0xDEAD_C0FE_BEEF_DECAF_BADD_DECAF_BEEF_u128 => (), + _ => return 7, + } + 0 +} diff --git a/tools/generate_intrinsics.py b/tools/generate_intrinsics.py index 8efed3e43af..181f1e501a4 100644 --- a/tools/generate_intrinsics.py +++ b/tools/generate_intrinsics.py @@ -12,7 +12,7 @@ def run_command(command, cwd=None): sys.exit(1) -def clone_repository(repo_name, path, repo_url, sub_paths=None): +def clone_repository(repo_name, path, repo_url, branch="master", sub_paths=None): if os.path.exists(path): while True: choice = input("There is already a `{}` folder, do you want to update it? [y/N]".format(path)) @@ -21,7 +21,7 @@ def clone_repository(repo_name, path, repo_url, sub_paths=None): return elif choice.lower() == "y": print("Updating repository...") - run_command(["git", "pull", "origin"], cwd=path) + run_command(["git", "pull", "origin", branch], cwd=path) return else: print("Didn't understand answer...") @@ -209,6 +209,7 @@ def main(): "llvm-project", llvm_path, "https://github.com/llvm/llvm-project", + branch="main", sub_paths=["llvm/include/llvm/IR", "llvm/include/llvm/CodeGen/"], ) clone_repository( diff --git a/triagebot.toml b/triagebot.toml new file mode 100644 index 00000000000..13da0a87def --- /dev/null +++ b/triagebot.toml @@ -0,0 +1,7 @@ +# Documentation at https://forge.rust-lang.org/triagebot/index.html + +# Prevents un-canonicalized issue links (to avoid wrong issues being linked in r-l/rust) +[issue-links] + +# Prevents mentions in commits to avoid users being spammed +[no-mentions] From 3cec08a47ecf18d90d65c9dbd1927082f2af36d3 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Wed, 14 May 2025 18:00:19 +0200 Subject: [PATCH 02/10] Update gcc version used in rustc_codegen_version --- libgccjit.version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libgccjit.version b/libgccjit.version index d06646dacc3..f62154968d3 100644 --- a/libgccjit.version +++ b/libgccjit.version @@ -1 +1 @@ -8b194529188f9d3a98cc211caa805a5355bfa8f0 +04ce66d8c918de9273bd7101638ad8724edf5e21 From f36e8f53db9f3015fd872e833427f56af8035275 Mon Sep 17 00:00:00 2001 From: mejrs <59372212+mejrs@users.noreply.github.com> Date: Sun, 18 May 2025 18:14:43 +0200 Subject: [PATCH 03/10] Remove rustc_attr_data_structures re-export from rustc_attr_parsing --- src/attributes.rs | 4 ++-- src/callee.rs | 2 +- src/lib.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/attributes.rs b/src/attributes.rs index 69b04dd5796..3568989a262 100644 --- a/src/attributes.rs +++ b/src/attributes.rs @@ -2,8 +2,8 @@ use gccjit::FnAttribute; use gccjit::Function; #[cfg(feature = "master")] -use rustc_attr_parsing::InlineAttr; -use rustc_attr_parsing::InstructionSetAttr; +use rustc_attr_data_structures::InlineAttr; +use rustc_attr_data_structures::InstructionSetAttr; #[cfg(feature = "master")] use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::ty; diff --git a/src/callee.rs b/src/callee.rs index c133ae4fcdd..c8130b7c010 100644 --- a/src/callee.rs +++ b/src/callee.rs @@ -106,7 +106,7 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>) // This is a monomorphization of a generic function. if !(cx.tcx.sess.opts.share_generics() || tcx.codegen_fn_attrs(instance_def_id).inline - == rustc_attr_parsing::InlineAttr::Never) + == rustc_attr_data_structures::InlineAttr::Never) { // When not sharing generics, all instances are in the same // crate and have hidden visibility. diff --git a/src/lib.rs b/src/lib.rs index 2c5a7871683..fb8061a4abb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,7 +37,7 @@ extern crate tracing; extern crate rustc_abi; extern crate rustc_apfloat; extern crate rustc_ast; -extern crate rustc_attr_parsing; +extern crate rustc_attr_data_structures; extern crate rustc_codegen_ssa; extern crate rustc_data_structures; extern crate rustc_errors; From ce3dae9c52a145ae1cb9eea5089ec2d1cca8e9bf Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Wed, 21 May 2025 19:46:24 -0400 Subject: [PATCH 04/10] Update to nightly-2025-05-21 --- rust-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain b/rust-toolchain index a8cda28688c..bafe497a2a2 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2025-05-12" +channel = "nightly-2025-05-21" components = ["rust-src", "rustc-dev", "llvm-tools-preview"] From 593d7ca571bb00d87713110f4566208727238c0e Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Wed, 21 May 2025 21:03:48 -0400 Subject: [PATCH 05/10] Implement missing f16/f128 builtins --- src/builder.rs | 10 ++++++- src/intrinsic/mod.rs | 67 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/src/builder.rs b/src/builder.rs index 4e2163201fd..f54a1a941ea 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -765,7 +765,15 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { #[cfg(feature = "master")] match self.cx.type_kind(a_type) { - TypeKind::Half | TypeKind::Float => { + TypeKind::Half => { + let fmodf = self.context.get_builtin_function("fmodf"); + let f32_type = self.type_f32(); + let a = self.context.new_cast(self.location, a, f32_type); + let b = self.context.new_cast(self.location, b, f32_type); + let result = self.context.new_call(self.location, fmodf, &[a, b]); + return self.context.new_cast(self.location, result, a_type); + } + TypeKind::Float => { let fmodf = self.context.get_builtin_function("fmodf"); return self.context.new_call(self.location, fmodf, &[a, b]); } diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs index 4e5018ae011..8402d0c6fcf 100644 --- a/src/intrinsic/mod.rs +++ b/src/intrinsic/mod.rs @@ -45,8 +45,10 @@ fn get_simple_intrinsic<'gcc, 'tcx>( let gcc_name = match name { sym::sqrtf32 => "sqrtf", sym::sqrtf64 => "sqrt", + sym::sqrtf128 => "sqrtl", sym::powif32 => "__builtin_powif", sym::powif64 => "__builtin_powi", + sym::powif128 => "__builtin_powil", sym::sinf32 => "sinf", sym::sinf64 => "sin", sym::cosf32 => "cosf", @@ -65,6 +67,7 @@ fn get_simple_intrinsic<'gcc, 'tcx>( sym::log2f64 => "log2", sym::fmaf32 => "fmaf", sym::fmaf64 => "fma", + sym::fmaf128 => "fmal", // FIXME: calling `fma` from libc without FMA target feature uses expensive software emulation sym::fmuladdf32 => "fmaf", // TODO: use gcc intrinsic analogous to llvm.fmuladd.f32 sym::fmuladdf64 => "fma", // TODO: use gcc intrinsic analogous to llvm.fmuladd.f64 @@ -72,22 +75,29 @@ fn get_simple_intrinsic<'gcc, 'tcx>( sym::fabsf64 => "fabs", sym::minnumf32 => "fminf", sym::minnumf64 => "fmin", + sym::minnumf128 => "fminl", sym::maxnumf32 => "fmaxf", sym::maxnumf64 => "fmax", + sym::maxnumf128 => "fmaxl", sym::copysignf32 => "copysignf", sym::copysignf64 => "copysign", sym::copysignf128 => "copysignl", sym::floorf32 => "floorf", sym::floorf64 => "floor", + sym::floorf128 => "floorl", sym::ceilf32 => "ceilf", sym::ceilf64 => "ceil", + sym::ceilf128 => "ceill", sym::truncf32 => "truncf", sym::truncf64 => "trunc", + sym::truncf128 => "truncl", // We match the LLVM backend and lower this to `rint`. sym::round_ties_even_f32 => "rintf", sym::round_ties_even_f64 => "rint", + sym::round_ties_even_f128 => "rintl", sym::roundf32 => "roundf", sym::roundf64 => "round", + sym::roundf128 => "roundl", sym::abort => "abort", _ => return None, }; @@ -160,6 +170,40 @@ fn get_simple_function<'gcc, 'tcx>( )) } +fn f16_builtin<'gcc, 'tcx>( + cx: &CodegenCx<'gcc, 'tcx>, + name: Symbol, + args: &[OperandRef<'tcx, RValue<'gcc>>], +) -> RValue<'gcc> { + let f32_type = cx.type_f32(); + let builtin_name = match name { + sym::ceilf16 => "__builtin_ceilf", + sym::floorf16 => "__builtin_floorf", + sym::fmaf16 => "fmaf", + sym::maxnumf16 => "__builtin_fmaxf", + sym::minnumf16 => "__builtin_fminf", + sym::powf16 => "__builtin_powf", + sym::powif16 => { + let func = cx.context.get_builtin_function("__builtin_powif"); + let arg0 = cx.context.new_cast(None, args[0].immediate(), f32_type); + let args = [arg0, args[1].immediate()]; + let result = cx.context.new_call(None, func, &args); + return cx.context.new_cast(None, result, cx.type_f16()); + } + sym::roundf16 => "__builtin_roundf", + sym::round_ties_even_f16 => "__builtin_rintf", + sym::sqrtf16 => "__builtin_sqrtf", + sym::truncf16 => "__builtin_truncf", + _ => unreachable!(), + }; + + let func = cx.context.get_builtin_function(builtin_name); + let args: Vec<_> = + args.iter().map(|arg| cx.context.new_cast(None, arg.immediate(), f32_type)).collect(); + let result = cx.context.new_call(None, func, &args); + cx.context.new_cast(None, result, cx.type_f16()) +} + impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { fn codegen_intrinsic_call( &mut self, @@ -211,18 +255,17 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc &args.iter().map(|arg| arg.immediate()).collect::>(), ) } - sym::fmaf16 => { - // TODO(antoyo): use the correct builtin for f16. - let func = self.cx.context.get_builtin_function("fmaf"); - let args: Vec<_> = args - .iter() - .map(|arg| { - self.cx.context.new_cast(self.location, arg.immediate(), self.cx.type_f32()) - }) - .collect(); - let result = self.cx.context.new_call(self.location, func, &args); - self.cx.context.new_cast(self.location, result, self.cx.type_f16()) - } + sym::ceilf16 + | sym::floorf16 + | sym::fmaf16 + | sym::maxnumf16 + | sym::minnumf16 + | sym::powf16 + | sym::powif16 + | sym::roundf16 + | sym::round_ties_even_f16 + | sym::sqrtf16 + | sym::truncf16 => f16_builtin(self, name, args), sym::is_val_statically_known => { let a = args[0].immediate(); let builtin = self.context.get_builtin_function("__builtin_constant_p"); From dca28e588cb9c49bf275c073e28663bfc8393b5f Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Wed, 21 May 2025 21:04:07 -0400 Subject: [PATCH 06/10] Only specify that we have reliable f16 and f128 on targets where those types are supported --- src/lib.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6994c385fc8..66e9ce4627c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -521,13 +521,16 @@ fn target_config(sess: &Session, target_info: &LockedTargetInfo) -> TargetConfig let target_features = f(false); let unstable_target_features = f(true); + let has_reliable_f16 = target_info.supports_target_dependent_type(CType::Float16); + let has_reliable_f128 = target_info.supports_target_dependent_type(CType::Float128); + TargetConfig { target_features, unstable_target_features, // There are no known bugs with GCC support for f16 or f128 - has_reliable_f16: true, - has_reliable_f16_math: true, - has_reliable_f128: true, - has_reliable_f128_math: true, + has_reliable_f16, + has_reliable_f16_math: has_reliable_f16, + has_reliable_f128, + has_reliable_f128_math: has_reliable_f128, } } From dd6d5e2b103c6c1424026e41984dc4d106c499f3 Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Thu, 22 May 2025 19:02:12 -0400 Subject: [PATCH 07/10] Fix f128 intrinsics --- build_system/src/test.rs | 2 +- src/builder.rs | 19 +++++-- src/common.rs | 5 +- src/intrinsic/mod.rs | 108 +++++++++++++++++++++++++++++++++++---- 4 files changed, 116 insertions(+), 18 deletions(-) diff --git a/build_system/src/test.rs b/build_system/src/test.rs index df4ac85233b..df46bd1259f 100644 --- a/build_system/src/test.rs +++ b/build_system/src/test.rs @@ -680,7 +680,7 @@ fn test_libcore(env: &Env, args: &TestArg) -> Result<(), String> { println!("[TEST] libcore"); let path = get_sysroot_dir().join("sysroot_src/library/coretests"); let _ = remove_dir_all(path.join("target")); - run_cargo_command(&[&"test"], Some(&path), env, args)?; + run_cargo_command(&[&"test", &"--release"], Some(&path), env, args)?; Ok(()) } diff --git a/src/builder.rs b/src/builder.rs index f54a1a941ea..c7900ebcd6f 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -4,8 +4,8 @@ use std::convert::TryFrom; use std::ops::Deref; use gccjit::{ - BinaryOp, Block, ComparisonOp, Context, Function, LValue, Location, RValue, ToRValue, Type, - UnaryOp, + BinaryOp, Block, ComparisonOp, Context, Function, FunctionType, LValue, Location, RValue, + ToRValue, Type, UnaryOp, }; use rustc_abi as abi; use rustc_abi::{Align, HasDataLayout, Size, TargetDataLayout, WrappingRange}; @@ -782,8 +782,19 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { return self.context.new_call(self.location, fmod, &[a, b]); } TypeKind::FP128 => { - let fmodl = self.context.get_builtin_function("fmodl"); - return self.context.new_call(self.location, fmodl, &[a, b]); + let f128_type = self.type_f128(); + let fmodf128 = self.context.new_function( + None, + FunctionType::Extern, + f128_type, + &[ + self.context.new_parameter(None, f128_type, "a"), + self.context.new_parameter(None, f128_type, "b"), + ], + "fmodf128", + false, + ); + return self.context.new_call(self.location, fmodf128, &[a, b]); } _ => (), } diff --git a/src/common.rs b/src/common.rs index cfa951ddf4b..65f4788d902 100644 --- a/src/common.rs +++ b/src/common.rs @@ -237,14 +237,15 @@ impl<'gcc, 'tcx> ConstCodegenMethods for CodegenCx<'gcc, 'tcx> { // FIXME(antoyo): there's some issues with using the u128 code that follows, so hard-code // the paths for floating-point values. - if ty == self.float_type { + // TODO: Remove this code? + /*if ty == self.float_type { return self .context .new_rvalue_from_double(ty, f32::from_bits(data as u32) as f64); } if ty == self.double_type { return self.context.new_rvalue_from_double(ty, f64::from_bits(data as u64)); - } + }*/ let value = self.const_uint_big(self.type_ix(bitsize), data); let bytesize = layout.size(self).bytes(); diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs index 8402d0c6fcf..acecab35d72 100644 --- a/src/intrinsic/mod.rs +++ b/src/intrinsic/mod.rs @@ -45,10 +45,8 @@ fn get_simple_intrinsic<'gcc, 'tcx>( let gcc_name = match name { sym::sqrtf32 => "sqrtf", sym::sqrtf64 => "sqrt", - sym::sqrtf128 => "sqrtl", sym::powif32 => "__builtin_powif", sym::powif64 => "__builtin_powi", - sym::powif128 => "__builtin_powil", sym::sinf32 => "sinf", sym::sinf64 => "sin", sym::cosf32 => "cosf", @@ -67,7 +65,6 @@ fn get_simple_intrinsic<'gcc, 'tcx>( sym::log2f64 => "log2", sym::fmaf32 => "fmaf", sym::fmaf64 => "fma", - sym::fmaf128 => "fmal", // FIXME: calling `fma` from libc without FMA target feature uses expensive software emulation sym::fmuladdf32 => "fmaf", // TODO: use gcc intrinsic analogous to llvm.fmuladd.f32 sym::fmuladdf64 => "fma", // TODO: use gcc intrinsic analogous to llvm.fmuladd.f64 @@ -75,29 +72,22 @@ fn get_simple_intrinsic<'gcc, 'tcx>( sym::fabsf64 => "fabs", sym::minnumf32 => "fminf", sym::minnumf64 => "fmin", - sym::minnumf128 => "fminl", sym::maxnumf32 => "fmaxf", sym::maxnumf64 => "fmax", - sym::maxnumf128 => "fmaxl", sym::copysignf32 => "copysignf", sym::copysignf64 => "copysign", sym::copysignf128 => "copysignl", sym::floorf32 => "floorf", sym::floorf64 => "floor", - sym::floorf128 => "floorl", sym::ceilf32 => "ceilf", sym::ceilf64 => "ceil", - sym::ceilf128 => "ceill", sym::truncf32 => "truncf", sym::truncf64 => "trunc", - sym::truncf128 => "truncl", // We match the LLVM backend and lower this to `rint`. sym::round_ties_even_f32 => "rintf", sym::round_ties_even_f64 => "rint", - sym::round_ties_even_f128 => "rintl", sym::roundf32 => "roundf", sym::roundf64 => "round", - sym::roundf128 => "roundl", sym::abort => "abort", _ => return None, }; @@ -170,6 +160,61 @@ fn get_simple_function<'gcc, 'tcx>( )) } +fn get_simple_function_f128<'gcc, 'tcx>( + cx: &CodegenCx<'gcc, 'tcx>, + name: Symbol, +) -> Option> { + if !cx.supports_f128_type { + return None; + } + + let f128_type = cx.type_f128(); + let func_name = match name { + sym::ceilf128 => "ceilf128", + sym::floorf128 => "floorf128", + sym::truncf128 => "truncf128", + sym::roundf128 => "roundf128", + sym::round_ties_even_f128 => "roundevenf128", + sym::sqrtf128 => "sqrtf128", + _ => return None, + }; + Some(cx.context.new_function( + None, + FunctionType::Extern, + f128_type, + &[cx.context.new_parameter(None, f128_type, "a")], + func_name, + false, + )) +} + +fn get_simple_function_f128_2args<'gcc, 'tcx>( + cx: &CodegenCx<'gcc, 'tcx>, + name: Symbol, +) -> Option> { + if !cx.supports_f128_type { + return None; + } + + let f128_type = cx.type_f128(); + let func_name = match name { + sym::maxnumf128 => "fmaxf128", + sym::minnumf128 => "fminf128", + _ => return None, + }; + Some(cx.context.new_function( + None, + FunctionType::Extern, + f128_type, + &[ + cx.context.new_parameter(None, f128_type, "a"), + cx.context.new_parameter(None, f128_type, "b"), + ], + func_name, + false, + )) +} + fn f16_builtin<'gcc, 'tcx>( cx: &CodegenCx<'gcc, 'tcx>, name: Symbol, @@ -232,7 +277,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout); let simple = get_simple_intrinsic(self, name); - let simple_func = get_simple_function(self, name); + let simple_func = get_simple_function(self, name) + .or_else(|| get_simple_function_f128(self, name)) + .or_else(|| get_simple_function_f128_2args(self, name)); // FIXME(tempdragon): Re-enable `clippy::suspicious_else_formatting` if the following issue is solved: // https://github.com/rust-lang/rust-clippy/issues/12497 @@ -266,6 +313,45 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc | sym::round_ties_even_f16 | sym::sqrtf16 | sym::truncf16 => f16_builtin(self, name, args), + sym::fmaf128 => { + let f128_type = self.cx.type_f128(); + let func = self.cx.context.new_function( + None, + FunctionType::Extern, + f128_type, + &[ + self.cx.context.new_parameter(None, f128_type, "a"), + self.cx.context.new_parameter(None, f128_type, "b"), + self.cx.context.new_parameter(None, f128_type, "c"), + ], + "fmaf128", + false, + ); + self.cx.context.new_call( + self.location, + func, + &args.iter().map(|arg| arg.immediate()).collect::>(), + ) + } + sym::powif128 => { + let f128_type = self.cx.type_f128(); + let func = self.cx.context.new_function( + None, + FunctionType::Extern, + f128_type, + &[ + self.cx.context.new_parameter(None, f128_type, "a"), + self.cx.context.new_parameter(None, self.int_type, "b"), + ], + "__powitf2", + false, + ); + self.cx.context.new_call( + self.location, + func, + &args.iter().map(|arg| arg.immediate()).collect::>(), + ) + } sym::is_val_statically_known => { let a = args[0].immediate(); let builtin = self.context.get_builtin_function("__builtin_constant_p"); From f1d5cfea44007f2312c24442dff95951a7d51aff Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Sat, 24 May 2025 11:05:51 -0400 Subject: [PATCH 08/10] Skip the core test f16::test_total_cmp because it fails in debug mode even with cg_llvm --- build_system/src/test.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/build_system/src/test.rs b/build_system/src/test.rs index df46bd1259f..5ad75384e5e 100644 --- a/build_system/src/test.rs +++ b/build_system/src/test.rs @@ -680,7 +680,15 @@ fn test_libcore(env: &Env, args: &TestArg) -> Result<(), String> { println!("[TEST] libcore"); let path = get_sysroot_dir().join("sysroot_src/library/coretests"); let _ = remove_dir_all(path.join("target")); - run_cargo_command(&[&"test", &"--release"], Some(&path), env, args)?; + // TODO(antoyo): run in release mode when we fix the failures. + // TODO(antoyo): remove the --skip f16::test_total_cmp when this issue is fixed: + // https://github.com/rust-lang/rust/issues/141503 + run_cargo_command( + &[&"test", &"--", &"--skip", &"f16::test_total_cmp"], + Some(&path), + env, + args, + )?; Ok(()) } From ac69f1eecb698f14ed2df060cca99af8ba8f658d Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Fri, 30 May 2025 13:49:26 -0400 Subject: [PATCH 09/10] Pin compiler_builtins to 0.1.160 to fix some f128 tests --- ...001-Pin-compiler_builtins-to-0.1.160.patch | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 patches/0001-Pin-compiler_builtins-to-0.1.160.patch diff --git a/patches/0001-Pin-compiler_builtins-to-0.1.160.patch b/patches/0001-Pin-compiler_builtins-to-0.1.160.patch new file mode 100644 index 00000000000..39266e081ed --- /dev/null +++ b/patches/0001-Pin-compiler_builtins-to-0.1.160.patch @@ -0,0 +1,39 @@ +From cdb3d407740e4f15c3746051f8ba89b8e74e99d3 Mon Sep 17 00:00:00 2001 +From: None +Date: Fri, 30 May 2025 13:46:22 -0400 +Subject: [PATCH] Pin compiler_builtins to 0.1.160 + +--- + library/alloc/Cargo.toml | 2 +- + library/std/Cargo.toml | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/library/alloc/Cargo.toml b/library/alloc/Cargo.toml +index 9d0d957..365c9dc 100644 +--- a/library/alloc/Cargo.toml ++++ b/library/alloc/Cargo.toml +@@ -16,7 +16,7 @@ bench = false + + [dependencies] + core = { path = "../core", public = true } +-compiler_builtins = { version = "=0.1.159", features = ['rustc-dep-of-std'] } ++compiler_builtins = { version = "=0.1.160", features = ['rustc-dep-of-std'] } + + [features] + compiler-builtins-mem = ['compiler_builtins/mem'] +diff --git a/library/std/Cargo.toml b/library/std/Cargo.toml +index 4ff4895..31371f0 100644 +--- a/library/std/Cargo.toml ++++ b/library/std/Cargo.toml +@@ -18,7 +18,7 @@ cfg-if = { version = "1.0", features = ['rustc-dep-of-std'] } + panic_unwind = { path = "../panic_unwind", optional = true } + panic_abort = { path = "../panic_abort" } + core = { path = "../core", public = true } +-compiler_builtins = { version = "=0.1.159" } ++compiler_builtins = { version = "=0.1.160" } + unwind = { path = "../unwind" } + hashbrown = { version = "0.15", default-features = false, features = [ + 'rustc-dep-of-std', +-- +2.49.0 + From ba53d9749798a32a9ce3a2db4123942ed81252a7 Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Mon, 2 Jun 2025 12:44:42 -0400 Subject: [PATCH 10/10] Fix cast from u128 to f128 --- src/int.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/int.rs b/src/int.rs index 9b5b0fde6e2..fed96e5eb8c 100644 --- a/src/int.rs +++ b/src/int.rs @@ -915,7 +915,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { let name_suffix = match self.type_kind(dest_typ) { TypeKind::Float => "tisf", TypeKind::Double => "tidf", - TypeKind::FP128 => "tixf", + TypeKind::FP128 => "titf", kind => panic!("cannot cast a non-native integer to type {:?}", kind), }; let sign = if signed { "" } else { "un" };