From 64aa2c14b5c1de5beab822cb365627607dc5795e Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 30 May 2025 19:56:18 +0530
Subject: [PATCH 01/10] Codegen **non-overloaded** LLVM intrinsics using their
 name

---
 compiler/rustc_codegen_gcc/src/type_of.rs     |   8 +-
 compiler/rustc_codegen_llvm/src/abi.rs        | 161 +++++++++++++++---
 compiler/rustc_codegen_llvm/src/builder.rs    |   6 +-
 compiler/rustc_codegen_llvm/src/context.rs    |  15 ++
 compiler/rustc_codegen_llvm/src/declare.rs    |  41 +++--
 compiler/rustc_codegen_llvm/src/intrinsic.rs  |   2 +-
 compiler/rustc_codegen_llvm/src/llvm/ffi.rs   |   9 +
 compiler/rustc_codegen_llvm/src/llvm/mod.rs   |  20 +++
 compiler/rustc_codegen_llvm/src/type_.rs      |  12 +-
 compiler/rustc_codegen_ssa/src/mir/block.rs   |   4 +-
 compiler/rustc_codegen_ssa/src/mir/rvalue.rs  |   2 +-
 compiler/rustc_codegen_ssa/src/size_of_val.rs |   2 +-
 .../rustc_codegen_ssa/src/traits/type_.rs     |   6 +-
 13 files changed, 242 insertions(+), 46 deletions(-)

diff --git a/compiler/rustc_codegen_gcc/src/type_of.rs b/compiler/rustc_codegen_gcc/src/type_of.rs
index 5745acce6fee7..25b7a2e8c682b 100644
--- a/compiler/rustc_codegen_gcc/src/type_of.rs
+++ b/compiler/rustc_codegen_gcc/src/type_of.rs
@@ -1,6 +1,6 @@
 use std::fmt::Write;
 
-use gccjit::{Struct, Type};
+use gccjit::{RValue, Struct, Type};
 use rustc_abi as abi;
 use rustc_abi::Primitive::*;
 use rustc_abi::{
@@ -373,7 +373,11 @@ impl<'gcc, 'tcx> LayoutTypeCodegenMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         unimplemented!();
     }
 
-    fn fn_decl_backend_type(&self, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> Type<'gcc> {
+    fn fn_decl_backend_type(
+        &self,
+        fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
+        _fn_ptr: RValue<'gcc>,
+    ) -> Type<'gcc> {
         // FIXME(antoyo): Should we do something with `FnAbiGcc::fn_attributes`?
         let FnAbiGcc { return_type, arguments_type, is_c_variadic, .. } = fn_abi.gcc_type(self);
         self.context.new_function_pointer_type(None, return_type, &arguments_type, is_c_variadic)
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 119cd634f9827..b22bb04906f42 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -1,5 +1,5 @@
 use std::borrow::Borrow;
-use std::cmp;
+use std::{cmp, iter};
 
 use libc::c_uint;
 use rustc_abi::{
@@ -300,8 +300,39 @@ impl<'ll, 'tcx> ArgAbiBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
     }
 }
 
+pub(crate) enum FunctionSignature<'ll> {
+    /// The signature is obtained directly from LLVM, and **may not match the Rust signature**
+    Intrinsic(&'ll Type),
+    /// The name starts with `llvm.`, but can't obtain the intrinsic ID. May be invalid or upgradable
+    MaybeInvalidIntrinsic(&'ll Type),
+    /// Just the Rust signature
+    Rust(&'ll Type),
+}
+
+impl<'ll> FunctionSignature<'ll> {
+    pub(crate) fn fn_ty(&self) -> &'ll Type {
+        match self {
+            FunctionSignature::Intrinsic(fn_ty)
+            | FunctionSignature::MaybeInvalidIntrinsic(fn_ty)
+            | FunctionSignature::Rust(fn_ty) => fn_ty,
+        }
+    }
+}
+
 pub(crate) trait FnAbiLlvmExt<'ll, 'tcx> {
-    fn llvm_type(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type;
+    fn llvm_return_type(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type;
+    fn llvm_argument_types(&self, cx: &CodegenCx<'ll, 'tcx>) -> Vec<&'ll Type>;
+    /// When `do_verify` is set, this function performs checks for the signature of LLVM intrinsics
+    /// and emits a fatal error if it doesn't match. These checks are important,but somewhat expensive
+    /// So they are only used at function definitions, not at callsites
+    fn llvm_type(
+        &self,
+        cx: &CodegenCx<'ll, 'tcx>,
+        name: &[u8],
+        do_verify: bool,
+    ) -> FunctionSignature<'ll>;
+    /// **If this function is an LLVM intrinsic** checks if the LLVM signature provided matches with this
+    fn verify_intrinsic_signature(&self, cx: &CodegenCx<'ll, 'tcx>, llvm_ty: &'ll Type) -> bool;
     fn ptr_to_llvm_type(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type;
     fn llvm_cconv(&self, cx: &CodegenCx<'ll, 'tcx>) -> llvm::CallConv;
 
@@ -314,30 +345,38 @@ pub(crate) trait FnAbiLlvmExt<'ll, 'tcx> {
     );
 
     /// Apply attributes to a function call.
-    fn apply_attrs_callsite(&self, bx: &mut Builder<'_, 'll, 'tcx>, callsite: &'ll Value);
+    fn apply_attrs_callsite(
+        &self,
+        bx: &mut Builder<'_, 'll, 'tcx>,
+        callsite: &'ll Value,
+        llfn: &'ll Value,
+    );
 }
 
 impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
-    fn llvm_type(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type {
+    fn llvm_return_type(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type {
+        match &self.ret.mode {
+            PassMode::Ignore => cx.type_void(),
+            PassMode::Direct(_) | PassMode::Pair(..) => self.ret.layout.immediate_llvm_type(cx),
+            PassMode::Cast { cast, pad_i32: _ } => cast.llvm_type(cx),
+            PassMode::Indirect { .. } => cx.type_void(),
+        }
+    }
+
+    fn llvm_argument_types(&self, cx: &CodegenCx<'ll, 'tcx>) -> Vec<&'ll Type> {
+        let indirect_return = matches!(self.ret.mode, PassMode::Indirect { .. });
+
         // Ignore "extra" args from the call site for C variadic functions.
         // Only the "fixed" args are part of the LLVM function signature.
         let args =
             if self.c_variadic { &self.args[..self.fixed_count as usize] } else { &self.args };
 
-        // This capacity calculation is approximate.
-        let mut llargument_tys = Vec::with_capacity(
-            self.args.len() + if let PassMode::Indirect { .. } = self.ret.mode { 1 } else { 0 },
-        );
+        let mut llargument_tys =
+            Vec::with_capacity(args.len() + if indirect_return { 1 } else { 0 });
 
-        let llreturn_ty = match &self.ret.mode {
-            PassMode::Ignore => cx.type_void(),
-            PassMode::Direct(_) | PassMode::Pair(..) => self.ret.layout.immediate_llvm_type(cx),
-            PassMode::Cast { cast, pad_i32: _ } => cast.llvm_type(cx),
-            PassMode::Indirect { .. } => {
-                llargument_tys.push(cx.type_ptr());
-                cx.type_void()
-            }
-        };
+        if indirect_return {
+            llargument_tys.push(cx.type_ptr());
+        }
 
         for arg in args {
             // Note that the exact number of arguments pushed here is carefully synchronized with
@@ -384,10 +423,74 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
             llargument_tys.push(llarg_ty);
         }
 
-        if self.c_variadic {
-            cx.type_variadic_func(&llargument_tys, llreturn_ty)
+        llargument_tys
+    }
+
+    fn verify_intrinsic_signature(&self, cx: &CodegenCx<'ll, 'tcx>, llvm_fn_ty: &'ll Type) -> bool {
+        let rust_return_ty = self.llvm_return_type(cx);
+        let rust_argument_tys = self.llvm_argument_types(cx);
+
+        let llvm_return_ty = cx.get_return_type(llvm_fn_ty);
+        let llvm_argument_tys = cx.func_params_types(llvm_fn_ty);
+        let llvm_is_variadic = cx.func_is_variadic(llvm_fn_ty);
+
+        if self.c_variadic != llvm_is_variadic || rust_argument_tys.len() != llvm_argument_tys.len()
+        {
+            return false;
+        }
+
+        // todo: add bypasses for types not accessible from Rust here
+        iter::once((rust_return_ty, llvm_return_ty))
+            .chain(iter::zip(rust_argument_tys, llvm_argument_tys))
+            .all(|(rust_ty, llvm_ty)| rust_ty == llvm_ty)
+    }
+
+    fn llvm_type(
+        &self,
+        cx: &CodegenCx<'ll, 'tcx>,
+        name: &[u8],
+        do_verify: bool,
+    ) -> FunctionSignature<'ll> {
+        let mut maybe_invalid = false;
+
+        if name.starts_with(b"llvm.") {
+            if let Some(intrinsic) = llvm::Intrinsic::lookup(name) {
+                if !intrinsic.is_overloaded() {
+                    // FIXME: also do this for overloaded intrinsics
+                    let llvm_fn_ty = cx.intrinsic_type(intrinsic, &[]);
+                    if do_verify {
+                        if !self.verify_intrinsic_signature(cx, llvm_fn_ty) {
+                            cx.tcx.dcx().fatal(format!(
+                                "Intrinsic signature mismatch for `{}`: expected signature `{llvm_fn_ty:?}`",
+                                str::from_utf8(name).unwrap()
+                            ));
+                        }
+                    }
+                    return FunctionSignature::Intrinsic(llvm_fn_ty);
+                }
+            } else {
+                // it's one of 2 cases,
+                // - either the base name is invalid
+                // - it has been superceded by something else, so the intrinsic was removed entirely
+                // to check for upgrades, we need the `llfn`, so we defer it for now
+
+                maybe_invalid = true;
+            }
+        }
+
+        let return_ty = self.llvm_return_type(cx);
+        let argument_tys = self.llvm_argument_types(cx);
+
+        let fn_ty = if self.c_variadic {
+            cx.type_variadic_func(&argument_tys, return_ty)
         } else {
-            cx.type_func(&llargument_tys, llreturn_ty)
+            cx.type_func(&argument_tys, return_ty)
+        };
+
+        if maybe_invalid {
+            FunctionSignature::MaybeInvalidIntrinsic(fn_ty)
+        } else {
+            FunctionSignature::Rust(fn_ty)
         }
     }
 
@@ -530,7 +633,23 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
         }
     }
 
-    fn apply_attrs_callsite(&self, bx: &mut Builder<'_, 'll, 'tcx>, callsite: &'ll Value) {
+    fn apply_attrs_callsite(
+        &self,
+        bx: &mut Builder<'_, 'll, 'tcx>,
+        callsite: &'ll Value,
+        llfn: &'ll Value,
+    ) {
+        // if we are using the LLVM signature, use the LLVM attributes otherwise it might be problematic
+        let name = llvm::get_value_name(llfn);
+        if name.starts_with(b"llvm.")
+            && let Some(intrinsic) = llvm::Intrinsic::lookup(name)
+        {
+            // FIXME: also do this for overloaded intrinsics
+            if !intrinsic.is_overloaded() {
+                return;
+            }
+        }
+
         let mut func_attrs = SmallVec::<[_; 2]>::new();
         if self.ret.layout.is_uninhabited() {
             func_attrs.push(llvm::AttributeKind::NoReturn.create_attr(bx.cx.llcx));
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index ec006b591929d..b8119a58e7cf4 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -380,7 +380,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
             )
         };
         if let Some(fn_abi) = fn_abi {
-            fn_abi.apply_attrs_callsite(self, invoke);
+            fn_abi.apply_attrs_callsite(self, invoke, llfn);
         }
         invoke
     }
@@ -1433,7 +1433,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
             )
         };
         if let Some(fn_abi) = fn_abi {
-            fn_abi.apply_attrs_callsite(self, call);
+            fn_abi.apply_attrs_callsite(self, call, llfn);
         }
         call
     }
@@ -1775,7 +1775,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
             )
         };
         if let Some(fn_abi) = fn_abi {
-            fn_abi.apply_attrs_callsite(self, callbr);
+            fn_abi.apply_attrs_callsite(self, callbr, llfn);
         }
         callbr
     }
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 8d6e1d8941b72..30c093657908e 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -1264,6 +1264,21 @@ impl<'ll> CodegenCx<'ll, '_> {
         self.eh_catch_typeinfo.set(Some(eh_catch_typeinfo));
         eh_catch_typeinfo
     }
+
+    pub(crate) fn intrinsic_type(
+        &self,
+        intrinsic: llvm::Intrinsic,
+        type_params: &[&'ll Type],
+    ) -> &'ll Type {
+        unsafe {
+            llvm::LLVMIntrinsicGetType(
+                self.llcx(),
+                intrinsic.id(),
+                type_params.as_ptr(),
+                type_params.len(),
+            )
+        }
+    }
 }
 
 impl CodegenCx<'_, '_> {
diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs
index 2419ec1f88854..0692e23b7c84a 100644
--- a/compiler/rustc_codegen_llvm/src/declare.rs
+++ b/compiler/rustc_codegen_llvm/src/declare.rs
@@ -22,7 +22,7 @@ use rustc_target::callconv::FnAbi;
 use smallvec::SmallVec;
 use tracing::debug;
 
-use crate::abi::FnAbiLlvmExt;
+use crate::abi::{FnAbiLlvmExt, FunctionSignature};
 use crate::common::AsCCharPtr;
 use crate::context::{CodegenCx, GenericCx, SCx, SimpleCx};
 use crate::llvm::AttributePlace::Function;
@@ -150,17 +150,34 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     ) -> &'ll Value {
         debug!("declare_rust_fn(name={:?}, fn_abi={:?})", name, fn_abi);
 
-        // Function addresses in Rust are never significant, allowing functions to
-        // be merged.
-        let llfn = declare_raw_fn(
-            self,
-            name,
-            fn_abi.llvm_cconv(self),
-            llvm::UnnamedAddr::Global,
-            llvm::Visibility::Default,
-            fn_abi.llvm_type(self),
-        );
-        fn_abi.apply_attrs_llfn(self, llfn, instance);
+        let signature = fn_abi.llvm_type(self, name.as_bytes(), true);
+        let llfn;
+
+        if let FunctionSignature::Intrinsic(fn_ty) = signature {
+            // intrinsics have a specified set of attributes, so we don't use the `FnAbi` set for them
+            llfn = declare_simple_fn(
+                self,
+                name,
+                fn_abi.llvm_cconv(self),
+                llvm::UnnamedAddr::Global,
+                llvm::Visibility::Default,
+                fn_ty,
+            );
+        } else {
+            // Function addresses in Rust are never significant, allowing functions to
+            // be merged.
+            llfn = declare_raw_fn(
+                self,
+                name,
+                fn_abi.llvm_cconv(self),
+                llvm::UnnamedAddr::Global,
+                llvm::Visibility::Default,
+                signature.fn_ty(),
+            );
+            fn_abi.apply_attrs_llfn(self, llfn, instance);
+        }
+
+        // todo: check for upgrades, and emit error if not upgradable
 
         if self.tcx.sess.is_sanitizer_cfi_enabled() {
             if let Some(instance) = instance {
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 10697b9a71f9c..615db259487d1 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1067,7 +1067,7 @@ fn gen_fn<'a, 'll, 'tcx>(
     codegen: &mut dyn FnMut(Builder<'a, 'll, 'tcx>),
 ) -> (&'ll Type, &'ll Value) {
     let fn_abi = cx.fn_abi_of_fn_ptr(rust_fn_sig, ty::List::empty());
-    let llty = fn_abi.llvm_type(cx);
+    let llty = fn_abi.llvm_type(cx, name.as_bytes(), true).fn_ty();
     let llfn = cx.declare_fn(name, fn_abi, None);
     cx.set_frame_pointer_type(llfn);
     cx.apply_target_cpu_attr(llfn);
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index e27fbf94f341d..7e10db907a804 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1059,6 +1059,7 @@ unsafe extern "C" {
     ) -> &'a Type;
     pub(crate) fn LLVMCountParamTypes(FunctionTy: &Type) -> c_uint;
     pub(crate) fn LLVMGetParamTypes<'a>(FunctionTy: &'a Type, Dest: *mut &'a Type);
+    pub(crate) fn LLVMIsFunctionVarArg(FunctionTy: &Type) -> Bool;
 
     // Operations on struct types
     pub(crate) fn LLVMStructTypeInContext<'a>(
@@ -1194,6 +1195,14 @@ unsafe extern "C" {
 
     // Operations on functions
     pub(crate) fn LLVMSetFunctionCallConv(Fn: &Value, CC: c_uint);
+    pub(crate) fn LLVMLookupIntrinsicID(Name: *const c_char, NameLen: size_t) -> c_uint;
+    pub(crate) fn LLVMIntrinsicGetType<'a>(
+        C: &'a Context,
+        ID: c_uint,
+        ParamTypes: *const &'a Type,
+        ParamCount: size_t,
+    ) -> &'a Type;
+    pub(crate) fn LLVMIntrinsicIsOverloaded(ID: c_uint) -> Bool;
 
     // Operations on parameters
     pub(crate) fn LLVMIsAArgument(Val: &Value) -> Option<&Value>;
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index ed23f91193013..f1c524d15d349 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -327,6 +327,26 @@ pub(crate) fn get_value_name(value: &Value) -> &[u8] {
     }
 }
 
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct Intrinsic {
+    id: c_uint,
+}
+
+impl Intrinsic {
+    pub(crate) fn lookup(name: &[u8]) -> Option<Self> {
+        let id = unsafe { LLVMLookupIntrinsicID(name.as_c_char_ptr(), name.len()) };
+        if id == 0 { None } else { Some(Self { id }) }
+    }
+
+    pub(crate) fn id(self) -> c_uint {
+        self.id
+    }
+
+    pub(crate) fn is_overloaded(self) -> bool {
+        unsafe { LLVMIntrinsicIsOverloaded(self.id) == True }
+    }
+}
+
 /// Safe wrapper for `LLVMSetValueName2` from a byte slice
 pub(crate) fn set_value_name(value: &Value, name: &[u8]) {
     unsafe {
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index 169036f515298..2e9d95d9c49f2 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -75,6 +75,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
             args
         }
     }
+
+    pub(crate) fn func_is_variadic(&self, ty: &'ll Type) -> bool {
+        unsafe { llvm::LLVMIsFunctionVarArg(ty) == True }
+    }
 }
 impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     pub(crate) fn type_bool(&self) -> &'ll Type {
@@ -284,8 +288,12 @@ impl<'ll, 'tcx> LayoutTypeCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
     fn cast_backend_type(&self, ty: &CastTarget) -> &'ll Type {
         ty.llvm_type(self)
     }
-    fn fn_decl_backend_type(&self, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> &'ll Type {
-        fn_abi.llvm_type(self)
+    fn fn_decl_backend_type(
+        &self,
+        fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
+        fn_ptr: &'ll Value,
+    ) -> &'ll Type {
+        fn_abi.llvm_type(self, llvm::get_value_name(fn_ptr), false).fn_ty()
     }
     fn fn_ptr_backend_type(&self, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> &'ll Type {
         fn_abi.ptr_to_llvm_type(self)
diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs
index 43b87171d510d..993b918ab2e9b 100644
--- a/compiler/rustc_codegen_ssa/src/mir/block.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/block.rs
@@ -187,7 +187,7 @@ impl<'a, 'tcx> TerminatorCodegenHelper<'tcx> {
 
         // If there is a cleanup block and the function we're calling can unwind, then
         // do an invoke, otherwise do a call.
-        let fn_ty = bx.fn_decl_backend_type(fn_abi);
+        let fn_ty = bx.fn_decl_backend_type(fn_abi, fn_ptr);
 
         let fn_attrs = if bx.tcx().def_kind(fx.instance.def_id()).has_codegen_attrs() {
             Some(bx.tcx().codegen_fn_attrs(fx.instance.def_id()))
@@ -1835,7 +1835,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         if is_call_from_compiler_builtins_to_upstream_monomorphization(bx.tcx(), instance) {
             bx.abort();
         } else {
-            let fn_ty = bx.fn_decl_backend_type(fn_abi);
+            let fn_ty = bx.fn_decl_backend_type(fn_abi, fn_ptr);
 
             let llret = bx.call(fn_ty, None, Some(fn_abi), fn_ptr, &[], funclet.as_ref(), None);
             bx.apply_attrs_to_cleanup_callsite(llret);
diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index 5c14fe5cd10b7..48101cf6664a6 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -779,7 +779,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                     };
                     let fn_ptr = bx.get_fn_addr(instance);
                     let fn_abi = bx.fn_abi_of_instance(instance, ty::List::empty());
-                    let fn_ty = bx.fn_decl_backend_type(fn_abi);
+                    let fn_ty = bx.fn_decl_backend_type(fn_abi, fn_ptr);
                     let fn_attrs = if bx.tcx().def_kind(instance.def_id()).has_codegen_attrs() {
                         Some(bx.tcx().codegen_fn_attrs(instance.def_id()))
                     } else {
diff --git a/compiler/rustc_codegen_ssa/src/size_of_val.rs b/compiler/rustc_codegen_ssa/src/size_of_val.rs
index 577012151e49f..778440853987d 100644
--- a/compiler/rustc_codegen_ssa/src/size_of_val.rs
+++ b/compiler/rustc_codegen_ssa/src/size_of_val.rs
@@ -68,7 +68,7 @@ pub fn size_and_align_of_dst<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
             // Generate the call. Cannot use `do_call` since we don't have a MIR terminator so we
             // can't create a `TerminationCodegenHelper`. (But we are in good company, this code is
             // duplicated plenty of times.)
-            let fn_ty = bx.fn_decl_backend_type(fn_abi);
+            let fn_ty = bx.fn_decl_backend_type(fn_abi, llfn);
 
             bx.call(
                 fn_ty,
diff --git a/compiler/rustc_codegen_ssa/src/traits/type_.rs b/compiler/rustc_codegen_ssa/src/traits/type_.rs
index c3fc21a92854a..4c3e057f6f90c 100644
--- a/compiler/rustc_codegen_ssa/src/traits/type_.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/type_.rs
@@ -96,7 +96,11 @@ pub trait LayoutTypeCodegenMethods<'tcx>: BackendTypes {
     /// such as when it's stack-allocated or when it's being loaded or stored.
     fn backend_type(&self, layout: TyAndLayout<'tcx>) -> Self::Type;
     fn cast_backend_type(&self, ty: &CastTarget) -> Self::Type;
-    fn fn_decl_backend_type(&self, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> Self::Type;
+    fn fn_decl_backend_type(
+        &self,
+        fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
+        fn_ptr: Self::Value,
+    ) -> Self::Type;
     fn fn_ptr_backend_type(&self, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> Self::Type;
     fn reg_backend_type(&self, ty: &Reg) -> Self::Type;
     /// The backend type used for a rust type when it's in an SSA register.

From 57b555982eabc20b321a1806e075d5a5d228a23b Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 30 May 2025 12:19:41 +0530
Subject: [PATCH 02/10] Check for AutoUpgraded intrinsics, and emit a hard
 error for unknown intrinsics

---
 compiler/rustc_codegen_llvm/src/declare.rs    | 22 ++++++++++++++++++-
 compiler/rustc_codegen_llvm/src/llvm/ffi.rs   |  7 ++++++
 .../rustc_llvm/llvm-wrapper/RustWrapper.cpp   | 12 ++++++++++
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs
index 0692e23b7c84a..7d29e88d73aee 100644
--- a/compiler/rustc_codegen_llvm/src/declare.rs
+++ b/compiler/rustc_codegen_llvm/src/declare.rs
@@ -177,7 +177,27 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
             fn_abi.apply_attrs_llfn(self, llfn, instance);
         }
 
-        // todo: check for upgrades, and emit error if not upgradable
+        if let FunctionSignature::MaybeInvalidIntrinsic(..) = signature {
+            let mut new_llfn = None;
+            let can_upgrade =
+                unsafe { llvm::LLVMRustUpgradeIntrinsicFunction(llfn, &mut new_llfn, false) };
+
+            if can_upgrade {
+                // not all intrinsics are upgraded to some other intrinsics, most are upgraded to instruction sequences
+                if let Some(new_llfn) = new_llfn {
+                    self.tcx.dcx().note(format!(
+                        "Using deprecated intrinsic `{name}`, `{}` can be used instead",
+                        str::from_utf8(llvm::get_value_name(new_llfn)).unwrap()
+                    ));
+                } else if self.tcx.sess.opts.verbose {
+                    self.tcx.dcx().note(format!(
+                        "Using deprecated intrinsic `{name}`, consider using other intrinsics/instructions"
+                    ));
+                }
+            } else {
+                self.tcx.dcx().fatal(format!("Invalid LLVM intrinsic: `{name}`"))
+            }
+        }
 
         if self.tcx.sess.is_sanitizer_cfi_enabled() {
             if let Some(instance) = instance {
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 7e10db907a804..812751be03f71 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1195,6 +1195,8 @@ unsafe extern "C" {
 
     // Operations on functions
     pub(crate) fn LLVMSetFunctionCallConv(Fn: &Value, CC: c_uint);
+
+    // Operations about llvm intrinsics
     pub(crate) fn LLVMLookupIntrinsicID(Name: *const c_char, NameLen: size_t) -> c_uint;
     pub(crate) fn LLVMIntrinsicGetType<'a>(
         C: &'a Context,
@@ -1203,6 +1205,11 @@ unsafe extern "C" {
         ParamCount: size_t,
     ) -> &'a Type;
     pub(crate) fn LLVMIntrinsicIsOverloaded(ID: c_uint) -> Bool;
+    pub(crate) fn LLVMRustUpgradeIntrinsicFunction<'a>(
+        Fn: &'a Value,
+        NewFn: &mut Option<&'a Value>,
+        CanUpgradeDebugIntrinsicsToRecords: bool,
+    ) -> bool;
 
     // Operations on parameters
     pub(crate) fn LLVMIsAArgument(Val: &Value) -> Option<&Value>;
diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
index 90aa9188c8300..a935dc6dca8df 100644
--- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
+++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
@@ -9,6 +9,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DiagnosticHandler.h"
@@ -1901,6 +1902,17 @@ extern "C" void LLVMRustGetMangledName(LLVMValueRef V, RustStringRef Str) {
   Mangler().getNameWithPrefix(OS, GV, true);
 }
 
+extern "C" bool
+LLVMRustUpgradeIntrinsicFunction(LLVMValueRef Fn, LLVMValueRef *NewFn,
+                                 bool canUpgradeDebugIntrinsicsToRecords) {
+  Function *F = unwrap<Function>(Fn);
+  Function *NewF = nullptr;
+  bool CanUpgrade =
+      UpgradeIntrinsicFunction(F, NewF, canUpgradeDebugIntrinsicsToRecords);
+  *NewFn = wrap(NewF);
+  return CanUpgrade;
+}
+
 extern "C" int32_t LLVMRustGetElementTypeArgIndex(LLVMValueRef CallSite) {
   auto *CB = unwrap<CallBase>(CallSite);
   switch (CB->getIntrinsicID()) {

From 4e5bae569a42131f85c10ac3ee93b213086483d8 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 30 May 2025 12:38:52 +0530
Subject: [PATCH 03/10] Add auto-destructuring for structs  - Remove redundant
 bitcasts at callsite

edit (squash with struct)
---
 compiler/rustc_codegen_llvm/src/abi.rs      |  31 ++++-
 compiler/rustc_codegen_llvm/src/builder.rs  | 147 +++++++++++++-------
 compiler/rustc_codegen_llvm/src/llvm/ffi.rs |   3 +
 compiler/rustc_codegen_llvm/src/type_.rs    |  10 ++
 4 files changed, 141 insertions(+), 50 deletions(-)

diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index b22bb04906f42..329567c3347d2 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -7,6 +7,7 @@ use rustc_abi::{
     X86Call,
 };
 use rustc_codegen_ssa::MemFlags;
+use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
 use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue};
 use rustc_codegen_ssa::traits::*;
@@ -22,7 +23,7 @@ use smallvec::SmallVec;
 
 use crate::attributes::{self, llfn_attrs_from_instance};
 use crate::builder::Builder;
-use crate::context::CodegenCx;
+use crate::context::{CodegenCx, GenericCx, SCx};
 use crate::llvm::{self, Attribute, AttributePlace};
 use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
@@ -353,6 +354,32 @@ pub(crate) trait FnAbiLlvmExt<'ll, 'tcx> {
     );
 }
 
+impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
+    pub(crate) fn equate_ty(&self, rust_ty: &'ll Type, llvm_ty: &'ll Type) -> bool {
+        if rust_ty == llvm_ty {
+            return true;
+        }
+
+        match self.type_kind(llvm_ty) {
+            TypeKind::Struct if self.type_kind(rust_ty) == TypeKind::Struct => {
+                let rust_element_tys = self.struct_element_types(rust_ty);
+                let llvm_element_tys = self.struct_element_types(llvm_ty);
+
+                if rust_element_tys.len() != llvm_element_tys.len() {
+                    return false;
+                }
+
+                iter::zip(rust_element_tys, llvm_element_tys).all(
+                    |(rust_element_ty, llvm_element_ty)| {
+                        self.equate_ty(rust_element_ty, llvm_element_ty)
+                    },
+                )
+            }
+            _ => false,
+        }
+    }
+}
+
 impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
     fn llvm_return_type(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type {
         match &self.ret.mode {
@@ -442,7 +469,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
         // todo: add bypasses for types not accessible from Rust here
         iter::once((rust_return_ty, llvm_return_ty))
             .chain(iter::zip(rust_argument_tys, llvm_argument_tys))
-            .all(|(rust_ty, llvm_ty)| rust_ty == llvm_ty)
+            .all(|(rust_ty, llvm_ty)| cx.equate_ty(rust_ty, llvm_ty))
     }
 
     fn llvm_type(
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index b8119a58e7cf4..9ded3149e125b 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -67,7 +67,6 @@ impl<'a, 'll> SBuilder<'a, 'll> {
     ) -> &'ll Value {
         debug!("call {:?} with args ({:?})", llfn, args);
 
-        let args = self.check_call("call", llty, llfn, args);
         let funclet_bundle = funclet.map(|funclet| funclet.bundle());
         let mut bundles: SmallVec<[_; 2]> = SmallVec::new();
         if let Some(funclet_bundle) = funclet_bundle {
@@ -349,7 +348,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     ) -> &'ll Value {
         debug!("invoke {:?} with args ({:?})", llfn, args);
 
-        let args = self.check_call("invoke", llty, llfn, args);
+        let args = self.cast_arguments("invoke", llty, llfn, args, fn_abi.is_some());
         let funclet_bundle = funclet.map(|funclet| funclet.bundle());
         let mut bundles: SmallVec<[_; 2]> = SmallVec::new();
         if let Some(funclet_bundle) = funclet_bundle {
@@ -381,8 +380,10 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         };
         if let Some(fn_abi) = fn_abi {
             fn_abi.apply_attrs_callsite(self, invoke, llfn);
+            self.cast_return(fn_abi, llfn, invoke)
+        } else {
+            invoke
         }
-        invoke
     }
 
     fn unreachable(&mut self) {
@@ -1404,7 +1405,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     ) -> &'ll Value {
         debug!("call {:?} with args ({:?})", llfn, args);
 
-        let args = self.check_call("call", llty, llfn, args);
+        let args = self.cast_arguments("call", llty, llfn, args, fn_abi.is_some());
         let funclet_bundle = funclet.map(|funclet| funclet.bundle());
         let mut bundles: SmallVec<[_; 2]> = SmallVec::new();
         if let Some(funclet_bundle) = funclet_bundle {
@@ -1434,8 +1435,10 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         };
         if let Some(fn_abi) = fn_abi {
             fn_abi.apply_attrs_callsite(self, call, llfn);
+            self.cast_return(fn_abi, llfn, call)
+        } else {
+            call
         }
-        call
     }
 
     fn zext(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
@@ -1602,47 +1605,6 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
         ret.expect("LLVM does not have support for catchret")
     }
 
-    fn check_call<'b>(
-        &mut self,
-        typ: &str,
-        fn_ty: &'ll Type,
-        llfn: &'ll Value,
-        args: &'b [&'ll Value],
-    ) -> Cow<'b, [&'ll Value]> {
-        assert!(
-            self.cx.type_kind(fn_ty) == TypeKind::Function,
-            "builder::{typ} not passed a function, but {fn_ty:?}"
-        );
-
-        let param_tys = self.cx.func_params_types(fn_ty);
-
-        let all_args_match = iter::zip(&param_tys, args.iter().map(|&v| self.cx.val_ty(v)))
-            .all(|(expected_ty, actual_ty)| *expected_ty == actual_ty);
-
-        if all_args_match {
-            return Cow::Borrowed(args);
-        }
-
-        let casted_args: Vec<_> = iter::zip(param_tys, args)
-            .enumerate()
-            .map(|(i, (expected_ty, &actual_val))| {
-                let actual_ty = self.cx.val_ty(actual_val);
-                if expected_ty != actual_ty {
-                    debug!(
-                        "type mismatch in function call of {:?}. \
-                            Expected {:?} for param {}, got {:?}; injecting bitcast",
-                        llfn, expected_ty, i, actual_ty
-                    );
-                    self.bitcast(actual_val, expected_ty)
-                } else {
-                    actual_val
-                }
-            })
-            .collect();
-
-        Cow::Owned(casted_args)
-    }
-
     pub(crate) fn va_arg(&mut self, list: &'ll Value, ty: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMBuildVAArg(self.llbuilder, list, ty, UNNAMED) }
     }
@@ -1714,6 +1676,93 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         self.call(self.type_func(&[src_ty], dest_ty), None, None, f, &[val], None, None)
     }
 
+    fn autocast(
+        &mut self,
+        llfn: &'ll Value,
+        val: &'ll Value,
+        src_ty: &'ll Type,
+        dest_ty: &'ll Type,
+        is_argument: bool,
+    ) -> &'ll Value {
+        let (rust_ty, llvm_ty) = if is_argument { (src_ty, dest_ty) } else { (dest_ty, src_ty) };
+
+        if rust_ty == llvm_ty {
+            return val;
+        }
+
+        match self.type_kind(llvm_ty) {
+            TypeKind::Struct => {
+                let mut ret = self.const_poison(dest_ty);
+                for (idx, (src_element_ty, dest_element_ty)) in
+                    iter::zip(self.struct_element_types(src_ty), self.struct_element_types(dest_ty))
+                        .enumerate()
+                {
+                    let elt = self.extract_value(val, idx as u64);
+                    let casted_elt =
+                        self.autocast(llfn, elt, src_element_ty, dest_element_ty, is_argument);
+                    ret = self.insert_value(ret, casted_elt, idx as u64);
+                }
+                ret
+            }
+            _ => unreachable!(),
+        }
+    }
+
+    fn cast_arguments<'b>(
+        &mut self,
+        typ: &str,
+        fn_ty: &'ll Type,
+        llfn: &'ll Value,
+        args: &'b [&'ll Value],
+        has_fnabi: bool,
+    ) -> Cow<'b, [&'ll Value]> {
+        assert_eq!(
+            self.type_kind(fn_ty),
+            TypeKind::Function,
+            "{typ} not passed a function, but {fn_ty:?}"
+        );
+
+        let param_tys = self.func_params_types(fn_ty);
+
+        let mut casted_args = Cow::Borrowed(args);
+
+        for (idx, (dest_ty, &arg)) in iter::zip(param_tys, args).enumerate() {
+            let src_ty = self.val_ty(arg);
+            assert!(
+                self.equate_ty(src_ty, dest_ty),
+                "Cannot match `{dest_ty:?}` (expected) with `{src_ty:?}` (found) in `{llfn:?}`"
+            );
+
+            let casted_arg = self.autocast(llfn, arg, src_ty, dest_ty, true);
+            if arg != casted_arg {
+                assert!(
+                    has_fnabi,
+                    "Should inject autocasts in function call of {llfn:?}, but not able to get Rust signature"
+                );
+
+                casted_args.to_mut()[idx] = casted_arg;
+            }
+        }
+
+        casted_args
+    }
+
+    fn cast_return(
+        &mut self,
+        fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
+        llfn: &'ll Value,
+        ret: &'ll Value,
+    ) -> &'ll Value {
+        let src_ty = self.val_ty(ret);
+        let dest_ty = fn_abi.llvm_return_type(self);
+        assert!(
+            self.equate_ty(dest_ty, src_ty),
+            "Cannot match `{src_ty:?}` (expected) with `{dest_ty:?}` (found) in `{llfn:?}`"
+        );
+
+        self.autocast(llfn, ret, src_ty, dest_ty, false)
+    }
+
     pub(crate) fn landing_pad(
         &mut self,
         ty: &'ll Type,
@@ -1743,7 +1792,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     ) -> &'ll Value {
         debug!("invoke {:?} with args ({:?})", llfn, args);
 
-        let args = self.check_call("callbr", llty, llfn, args);
+        let args = self.cast_arguments("callbr", llty, llfn, args, fn_abi.is_some());
         let funclet_bundle = funclet.map(|funclet| funclet.bundle());
         let mut bundles: SmallVec<[_; 2]> = SmallVec::new();
         if let Some(funclet_bundle) = funclet_bundle {
@@ -1776,8 +1825,10 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         };
         if let Some(fn_abi) = fn_abi {
             fn_abi.apply_attrs_callsite(self, callbr, llfn);
+            self.cast_return(fn_abi, llfn, callbr)
+        } else {
+            callbr
         }
-        callbr
     }
 
     // Emits CFI pointer type membership tests.
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 812751be03f71..61c667d698bbd 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1730,6 +1730,9 @@ unsafe extern "C" {
         Packed: Bool,
     );
 
+    pub(crate) fn LLVMCountStructElementTypes(StructTy: &Type) -> c_uint;
+    pub(crate) fn LLVMGetStructElementTypes<'a>(StructTy: &'a Type, Dest: *mut &'a Type);
+
     pub(crate) safe fn LLVMMetadataAsValue<'a>(C: &'a Context, MD: &'a Metadata) -> &'a Value;
 
     pub(crate) fn LLVMSetUnnamedAddress(Global: &Value, UnnamedAddr: UnnamedAddr);
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index 2e9d95d9c49f2..a2596db7ec4b4 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -79,6 +79,16 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
     pub(crate) fn func_is_variadic(&self, ty: &'ll Type) -> bool {
         unsafe { llvm::LLVMIsFunctionVarArg(ty) == True }
     }
+
+    pub(crate) fn struct_element_types(&self, ty: &'ll Type) -> Vec<&'ll Type> {
+        unsafe {
+            let n_args = llvm::LLVMCountStructElementTypes(ty) as usize;
+            let mut args = Vec::with_capacity(n_args);
+            llvm::LLVMGetStructElementTypes(ty, args.as_mut_ptr());
+            args.set_len(n_args);
+            args
+        }
+    }
 }
 impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     pub(crate) fn type_bool(&self) -> &'ll Type {

From c5f75ec81ec25b99bfb8eb50231a0428a0bfccd6 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 30 May 2025 13:10:20 +0530
Subject: [PATCH 04/10] Add bypass for `bf16` and `bf16xN`

---
 compiler/rustc_codegen_llvm/src/abi.rs      | 11 +++++++++++
 compiler/rustc_codegen_llvm/src/builder.rs  |  2 +-
 compiler/rustc_codegen_llvm/src/llvm/ffi.rs |  3 +++
 compiler/rustc_codegen_llvm/src/type_.rs    |  6 +++++-
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 329567c3347d2..0f823d09be78b 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -361,6 +361,17 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
         }
 
         match self.type_kind(llvm_ty) {
+            TypeKind::BFloat => rust_ty == self.type_i16(),
+            TypeKind::Vector => {
+                let llvm_element_count = self.vector_length(llvm_ty) as u64;
+                let llvm_element_ty = self.element_type(llvm_ty);
+
+                if llvm_element_ty == self.type_bf16() {
+                    rust_ty == self.type_vector(self.type_i16(), llvm_element_count)
+                } else {
+                    false
+                }
+            }
             TypeKind::Struct if self.type_kind(rust_ty) == TypeKind::Struct => {
                 let rust_element_tys = self.struct_element_types(rust_ty);
                 let llvm_element_tys = self.struct_element_types(llvm_ty);
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 9ded3149e125b..f811513675ab5 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1704,7 +1704,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
                 }
                 ret
             }
-            _ => unreachable!(),
+            _ => self.bitcast(val, dest_ty), // for `bf16(xN)` <-> `u16(xN)`
         }
     }
 
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 61c667d698bbd..3383669e5162d 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1050,6 +1050,9 @@ unsafe extern "C" {
     pub(crate) fn LLVMDoubleTypeInContext(C: &Context) -> &Type;
     pub(crate) fn LLVMFP128TypeInContext(C: &Context) -> &Type;
 
+    // Operations on non-IEEE real types
+    pub(crate) fn LLVMBFloatTypeInContext(C: &Context) -> &Type;
+
     // Operations on function types
     pub(crate) fn LLVMFunctionType<'a>(
         ReturnType: &'a Type,
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index a2596db7ec4b4..1b303e7289ae5 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -168,6 +168,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
             )
         }
     }
+
+    pub(crate) fn type_bf16(&self) -> &'ll Type {
+        unsafe { llvm::LLVMBFloatTypeInContext(self.llcx()) }
+    }
 }
 
 impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {
@@ -241,7 +245,7 @@ impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {
 
     fn float_width(&self, ty: &'ll Type) -> usize {
         match self.type_kind(ty) {
-            TypeKind::Half => 16,
+            TypeKind::Half | TypeKind::BFloat => 16,
             TypeKind::Float => 32,
             TypeKind::Double => 64,
             TypeKind::X86_FP80 => 80,

From b9342575082fa64de689b7c36bcd3ef026145aa6 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 30 May 2025 13:11:54 +0530
Subject: [PATCH 05/10] Add bypass for `i1xN`

---
 compiler/rustc_codegen_llvm/src/abi.rs     |  3 ++
 compiler/rustc_codegen_llvm/src/builder.rs | 49 +++++++++++++++++++++-
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 0f823d09be78b..22485637e9363 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -368,6 +368,9 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
 
                 if llvm_element_ty == self.type_bf16() {
                     rust_ty == self.type_vector(self.type_i16(), llvm_element_count)
+                } else if llvm_element_ty == self.type_i1() {
+                    let int_width = cmp::max(llvm_element_count.next_power_of_two(), 8);
+                    rust_ty == self.type_ix(int_width)
                 } else {
                     false
                 }
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index f811513675ab5..0f574a4499f67 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1,6 +1,6 @@
 use std::borrow::{Borrow, Cow};
 use std::ops::Deref;
-use std::{iter, ptr};
+use std::{cmp, iter, ptr};
 
 pub(crate) mod autodiff;
 
@@ -1676,6 +1676,46 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         self.call(self.type_func(&[src_ty], dest_ty), None, None, f, &[val], None, None)
     }
 
+    fn trunc_int_to_i1_vector(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
+        let vector_length = self.vector_length(dest_ty) as u64;
+        let int_width = cmp::max(vector_length.next_power_of_two(), 8);
+
+        let bitcasted = self.bitcast(val, self.type_vector(self.type_i1(), int_width));
+        if vector_length == int_width {
+            bitcasted
+        } else {
+            let shuffle_mask =
+                (0..vector_length).map(|i| self.const_i32(i as i32)).collect::<Vec<_>>();
+            self.shuffle_vector(bitcasted, bitcasted, self.const_vector(&shuffle_mask))
+        }
+    }
+
+    fn zext_i1_vector_to_int(
+        &mut self,
+        mut val: &'ll Value,
+        src_ty: &'ll Type,
+        dest_ty: &'ll Type,
+    ) -> &'ll Value {
+        let vector_length = self.vector_length(src_ty) as u64;
+        let int_width = cmp::max(vector_length.next_power_of_two(), 8);
+
+        if vector_length != int_width {
+            let shuffle_indices = match vector_length {
+                0 => unreachable!("zero length vectors are not allowed"),
+                1 => vec![0, 1, 1, 1, 1, 1, 1, 1],
+                2 => vec![0, 1, 2, 3, 2, 3, 2, 3],
+                3 => vec![0, 1, 2, 3, 4, 5, 3, 4],
+                4.. => (0..int_width as i32).collect(),
+            };
+            let shuffle_mask =
+                shuffle_indices.into_iter().map(|i| self.const_i32(i)).collect::<Vec<_>>();
+            val =
+                self.shuffle_vector(val, self.const_null(src_ty), self.const_vector(&shuffle_mask));
+        }
+
+        self.bitcast(val, dest_ty)
+    }
+
     fn autocast(
         &mut self,
         llfn: &'ll Value,
@@ -1691,6 +1731,13 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         }
 
         match self.type_kind(llvm_ty) {
+            TypeKind::Vector if self.element_type(llvm_ty) == self.type_i1() => {
+                if is_argument {
+                    self.trunc_int_to_i1_vector(val, dest_ty)
+                } else {
+                    self.zext_i1_vector_to_int(val, src_ty, dest_ty)
+                }
+            }
             TypeKind::Struct => {
                 let mut ret = self.const_poison(dest_ty);
                 for (idx, (src_element_ty, dest_element_ty)) in

From 157ff72ffb9511bc01a4adfc01e93e7c2574c689 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 30 May 2025 23:22:11 +0530
Subject: [PATCH 06/10] Add autocast tests  - Correct usage of invalid
 intrinsics in tests

---
 tests/codegen/inject-autocast.rs              | 94 +++++++++++++++++++
 tests/run-make/simd-ffi/simd.rs               |  2 +-
 tests/ui/codegen/deprecated-llvm-intrinsic.rs | 28 ++++++
 .../codegen/deprecated-llvm-intrinsic.stderr  |  2 +
 .../incorrect-llvm-intrinsic-signature.rs     | 15 +++
 .../incorrect-llvm-intrinsic-signature.stderr |  4 +
 tests/ui/codegen/invalid-llvm-intrinsic.rs    | 15 +++
 .../ui/codegen/invalid-llvm-intrinsic.stderr  |  4 +
 8 files changed, 163 insertions(+), 1 deletion(-)
 create mode 100644 tests/codegen/inject-autocast.rs
 create mode 100644 tests/ui/codegen/deprecated-llvm-intrinsic.rs
 create mode 100644 tests/ui/codegen/deprecated-llvm-intrinsic.stderr
 create mode 100644 tests/ui/codegen/incorrect-llvm-intrinsic-signature.rs
 create mode 100644 tests/ui/codegen/incorrect-llvm-intrinsic-signature.stderr
 create mode 100644 tests/ui/codegen/invalid-llvm-intrinsic.rs
 create mode 100644 tests/ui/codegen/invalid-llvm-intrinsic.stderr

diff --git a/tests/codegen/inject-autocast.rs b/tests/codegen/inject-autocast.rs
new file mode 100644
index 0000000000000..f51044dacf657
--- /dev/null
+++ b/tests/codegen/inject-autocast.rs
@@ -0,0 +1,94 @@
+//@ compile-flags: -C opt-level=0
+//@ only-x86_64
+
+#![feature(link_llvm_intrinsics, abi_unadjusted, repr_simd, simd_ffi, portable_simd, f16)]
+#![crate_type = "lib"]
+
+use std::simd::{f32x4, i16x8, i64x2};
+
+#[repr(C, packed)]
+pub struct Bar(u32, i64x2, i64x2, i64x2, i64x2, i64x2, i64x2);
+// CHECK: %Bar = type <{ i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }>
+
+#[repr(simd)]
+pub struct f16x8([f16; 8]);
+
+// CHECK-LABEL: @struct_with_i1_vector_autocast
+#[no_mangle]
+pub unsafe fn struct_with_i1_vector_autocast(a: i64x2, b: i64x2) -> (u8, u8) {
+    extern "unadjusted" {
+        #[link_name = "llvm.x86.avx512.vp2intersect.q.128"]
+        fn foo(a: i64x2, b: i64x2) -> (u8, u8);
+    }
+
+    // CHECK: %0 = call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %a, <2 x i64> %b)
+    // CHECK-NEXT: %1 = extractvalue { <2 x i1>, <2 x i1> } %0, 0
+    // CHECK-NEXT: %2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+    // CHECK-NEXT: %3 = bitcast <8 x i1> %2 to i8
+    // CHECK-NEXT: %4 = insertvalue { i8, i8 } poison, i8 %3, 0
+    // CHECK-NEXT: %5 = extractvalue { <2 x i1>, <2 x i1> } %0, 1
+    // CHECK-NEXT: %6 = shufflevector <2 x i1> %5, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+    // CHECK-NEXT: %7 = bitcast <8 x i1> %6 to i8
+    // CHECK-NEXT: %8 = insertvalue { i8, i8 } %4, i8 %7, 1
+    foo(a, b)
+}
+
+// CHECK-LABEL: @bf16_vector_autocast
+#[no_mangle]
+pub unsafe fn bf16_vector_autocast(a: f32x4) -> i16x8 {
+    extern "unadjusted" {
+        #[link_name = "llvm.x86.vcvtneps2bf16128"]
+        fn foo(a: f32x4) -> i16x8;
+    }
+
+    // CHECK: %0 = call <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %a)
+    // CHECK-NEXT: %_0 = bitcast <8 x bfloat> %0 to <8 x i16>
+    foo(a)
+}
+
+// CHECK-LABEL: @struct_autocast
+#[no_mangle]
+pub unsafe fn struct_autocast(key_metadata: u32, key: i64x2) -> Bar {
+    extern "unadjusted" {
+        #[link_name = "llvm.x86.encodekey128"]
+        fn foo(key_metadata: u32, key: i64x2) -> Bar;
+    }
+
+    // CHECK: %0 = call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %key_metadata, <2 x i64> %key)
+    // CHECK-NEXT: %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
+    // CHECK-NEXT: %2 = insertvalue %Bar poison, i32 %1, 0
+    // CHECK-NEXT: %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
+    // CHECK-NEXT: %4 = insertvalue %Bar %2, <2 x i64> %3, 1
+    // CHECK-NEXT: %5 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
+    // CHECK-NEXT: %6 = insertvalue %Bar %4, <2 x i64> %5, 2
+    // CHECK-NEXT: %7 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
+    // CHECK-NEXT: %8 = insertvalue %Bar %6, <2 x i64> %7, 3
+    // CHECK-NEXT: %9 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
+    // CHECK-NEXT: %10 = insertvalue %Bar %8, <2 x i64> %9, 4
+    // CHECK-NEXT: %11 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
+    // CHECK-NEXT: %12 = insertvalue %Bar %10, <2 x i64> %11, 5
+    // CHECK-NEXT: %13 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
+    // CHECK-NEXT: %14 = insertvalue %Bar %12, <2 x i64> %13, 6
+    foo(key_metadata, key)
+}
+
+// CHECK-LABEL: @i1_vector_autocast
+#[no_mangle]
+pub unsafe fn i1_vector_autocast(a: f16x8) -> u8 {
+    extern "unadjusted" {
+        #[link_name = "llvm.x86.avx512fp16.fpclass.ph.128"]
+        fn foo(a: f16x8, b: i32) -> u8;
+    }
+
+    // CHECK: %0 = call <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half> %a, i32 1)
+    // CHECK-NEXT: %_0 = bitcast <8 x i1> %0 to i8
+    foo(a, 1)
+}
+
+// CHECK: declare { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64>, <2 x i64>)
+
+// CHECK: declare <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float>)
+
+// CHECK: declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>)
+
+// CHECK: declare <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half>, i32 immarg)
diff --git a/tests/run-make/simd-ffi/simd.rs b/tests/run-make/simd-ffi/simd.rs
index 9ea8eb8cf8831..2a2032f218e75 100644
--- a/tests/run-make/simd-ffi/simd.rs
+++ b/tests/run-make/simd-ffi/simd.rs
@@ -35,7 +35,7 @@ extern "C" {
     fn integer(a: i32x4, b: i32x4) -> i32x4;
     // vmaxq_s32
     #[cfg(target_arch = "aarch64")]
-    #[link_name = "llvm.aarch64.neon.maxs.v4i32"]
+    #[link_name = "llvm.aarch64.neon.smax.v4i32"]
     fn integer(a: i32x4, b: i32x4) -> i32x4;
 
     // Use a generic LLVM intrinsic to do type checking on other platforms
diff --git a/tests/ui/codegen/deprecated-llvm-intrinsic.rs b/tests/ui/codegen/deprecated-llvm-intrinsic.rs
new file mode 100644
index 0000000000000..8ae363054cc74
--- /dev/null
+++ b/tests/ui/codegen/deprecated-llvm-intrinsic.rs
@@ -0,0 +1,28 @@
+//@ add-core-stubs
+//@ build-pass
+//@ ignore-pass
+//@ compile-flags: --target aarch64-unknown-linux-gnu
+//@ needs-llvm-components: aarch64
+#![feature(no_core, lang_items, link_llvm_intrinsics, abi_unadjusted, repr_simd, simd_ffi)]
+#![no_std]
+#![no_core]
+#![allow(internal_features, non_camel_case_types, improper_ctypes)]
+#![crate_type = "lib"]
+
+extern crate minicore;
+use minicore::*;
+
+#[repr(simd)]
+pub struct i8x8([i8; 8]);
+
+extern "unadjusted" {
+    #[link_name = "llvm.aarch64.neon.rbit.v8i8"]
+    fn foo(a: i8x8) -> i8x8;
+}
+
+#[target_feature(enable = "neon")]
+pub unsafe fn bar(a: i8x8) -> i8x8 {
+    foo(a)
+}
+
+//~? NOTE: Using deprecated intrinsic `llvm.aarch64.neon.rbit.v8i8`, `llvm.bitreverse.v8i8` can be used instead
diff --git a/tests/ui/codegen/deprecated-llvm-intrinsic.stderr b/tests/ui/codegen/deprecated-llvm-intrinsic.stderr
new file mode 100644
index 0000000000000..214ac269e7d0e
--- /dev/null
+++ b/tests/ui/codegen/deprecated-llvm-intrinsic.stderr
@@ -0,0 +1,2 @@
+note: Using deprecated intrinsic `llvm.aarch64.neon.rbit.v8i8`, `llvm.bitreverse.v8i8` can be used instead
+
diff --git a/tests/ui/codegen/incorrect-llvm-intrinsic-signature.rs b/tests/ui/codegen/incorrect-llvm-intrinsic-signature.rs
new file mode 100644
index 0000000000000..84c4c0d747247
--- /dev/null
+++ b/tests/ui/codegen/incorrect-llvm-intrinsic-signature.rs
@@ -0,0 +1,15 @@
+//@ build-fail
+
+#![feature(link_llvm_intrinsics, abi_unadjusted)]
+#![allow(internal_features, non_camel_case_types, improper_ctypes)]
+
+extern "unadjusted" {
+    #[link_name = "llvm.assume"]
+    fn foo();
+}
+
+pub fn main() {
+    unsafe { foo() }
+}
+
+//~? ERROR: Intrinsic signature mismatch for `llvm.assume`: expected signature `void (i1)`
diff --git a/tests/ui/codegen/incorrect-llvm-intrinsic-signature.stderr b/tests/ui/codegen/incorrect-llvm-intrinsic-signature.stderr
new file mode 100644
index 0000000000000..f67ba8a65a40c
--- /dev/null
+++ b/tests/ui/codegen/incorrect-llvm-intrinsic-signature.stderr
@@ -0,0 +1,4 @@
+error: Intrinsic signature mismatch for `llvm.assume`: expected signature `void (i1)`
+
+error: aborting due to 1 previous error
+
diff --git a/tests/ui/codegen/invalid-llvm-intrinsic.rs b/tests/ui/codegen/invalid-llvm-intrinsic.rs
new file mode 100644
index 0000000000000..2f1ff826ed39b
--- /dev/null
+++ b/tests/ui/codegen/invalid-llvm-intrinsic.rs
@@ -0,0 +1,15 @@
+//@ build-fail
+
+#![feature(link_llvm_intrinsics, abi_unadjusted)]
+#![allow(internal_features, non_camel_case_types, improper_ctypes)]
+
+extern "unadjusted" {
+    #[link_name = "llvm.abcde"]
+    fn foo();
+}
+
+pub fn main() {
+    unsafe { foo() }
+}
+
+//~? ERROR: Invalid LLVM intrinsic: `llvm.abcde`
diff --git a/tests/ui/codegen/invalid-llvm-intrinsic.stderr b/tests/ui/codegen/invalid-llvm-intrinsic.stderr
new file mode 100644
index 0000000000000..467d6a62553cc
--- /dev/null
+++ b/tests/ui/codegen/invalid-llvm-intrinsic.stderr
@@ -0,0 +1,4 @@
+error: Invalid LLVM intrinsic: `llvm.abcde`
+
+error: aborting due to 1 previous error
+

From 888c6c4c9ca3d523e1de4d0eb6043bf5e4b26502 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 30 May 2025 21:41:36 +0530
Subject: [PATCH 07/10] Simplify implementation of Rust intrinsics by using
 type parameters in the cache

---
 compiler/rustc_codegen_gcc/src/builder.rs     |   2 +-
 compiler/rustc_codegen_llvm/src/builder.rs    | 179 ++----
 compiler/rustc_codegen_llvm/src/context.rs    | 471 ++++----------
 compiler/rustc_codegen_llvm/src/intrinsic.rs  | 586 +++++++-----------
 compiler/rustc_codegen_llvm/src/llvm/ffi.rs   |   7 +
 compiler/rustc_codegen_llvm/src/llvm/mod.rs   |  28 +-
 compiler/rustc_codegen_llvm/src/type_.rs      |   9 +
 .../rustc_codegen_ssa/src/traits/builder.rs   |   4 +-
 8 files changed, 460 insertions(+), 826 deletions(-)

diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
index d1fb8d8f9d6b8..1bd8921210085 100644
--- a/compiler/rustc_codegen_gcc/src/builder.rs
+++ b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -897,7 +897,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     fn checked_binop(
         &mut self,
         oop: OverflowOp,
-        typ: Ty<'_>,
+        typ: Ty<'tcx>,
         lhs: Self::Value,
         rhs: Self::Value,
     ) -> (Self::Value, Self::Value) {
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 0f574a4499f67..ad2b07067c1ab 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -14,7 +14,6 @@ use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_hir::def_id::DefId;
-use rustc_middle::bug;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::ty::layout::{
     FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTypingEnv, LayoutError, LayoutOfHelpers,
@@ -485,73 +484,31 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     fn checked_binop(
         &mut self,
         oop: OverflowOp,
-        ty: Ty<'_>,
+        ty: Ty<'tcx>,
         lhs: Self::Value,
         rhs: Self::Value,
     ) -> (Self::Value, Self::Value) {
-        use rustc_middle::ty::IntTy::*;
-        use rustc_middle::ty::UintTy::*;
-        use rustc_middle::ty::{Int, Uint};
-
-        let new_kind = match ty.kind() {
-            Int(t @ Isize) => Int(t.normalize(self.tcx.sess.target.pointer_width)),
-            Uint(t @ Usize) => Uint(t.normalize(self.tcx.sess.target.pointer_width)),
-            t @ (Uint(_) | Int(_)) => *t,
-            _ => panic!("tried to get overflow intrinsic for op applied to non-int type"),
+        let (size, signed) = ty.int_size_and_signed(self.tcx);
+        let width = size.bits();
+
+        if oop == OverflowOp::Sub && !signed {
+            // Emit sub and icmp instead of llvm.usub.with.overflow. LLVM considers these
+            // to be the canonical form. It will attempt to reform llvm.usub.with.overflow
+            // in the backend if profitable.
+            let sub = self.sub(lhs, rhs);
+            let cmp = self.icmp(IntPredicate::IntULT, lhs, rhs);
+            return (sub, cmp);
+        }
+
+        let oop_str = match oop {
+            OverflowOp::Add => "add",
+            OverflowOp::Sub => "sub",
+            OverflowOp::Mul => "mul",
         };
 
-        let name = match oop {
-            OverflowOp::Add => match new_kind {
-                Int(I8) => "llvm.sadd.with.overflow.i8",
-                Int(I16) => "llvm.sadd.with.overflow.i16",
-                Int(I32) => "llvm.sadd.with.overflow.i32",
-                Int(I64) => "llvm.sadd.with.overflow.i64",
-                Int(I128) => "llvm.sadd.with.overflow.i128",
-
-                Uint(U8) => "llvm.uadd.with.overflow.i8",
-                Uint(U16) => "llvm.uadd.with.overflow.i16",
-                Uint(U32) => "llvm.uadd.with.overflow.i32",
-                Uint(U64) => "llvm.uadd.with.overflow.i64",
-                Uint(U128) => "llvm.uadd.with.overflow.i128",
-
-                _ => unreachable!(),
-            },
-            OverflowOp::Sub => match new_kind {
-                Int(I8) => "llvm.ssub.with.overflow.i8",
-                Int(I16) => "llvm.ssub.with.overflow.i16",
-                Int(I32) => "llvm.ssub.with.overflow.i32",
-                Int(I64) => "llvm.ssub.with.overflow.i64",
-                Int(I128) => "llvm.ssub.with.overflow.i128",
-
-                Uint(_) => {
-                    // Emit sub and icmp instead of llvm.usub.with.overflow. LLVM considers these
-                    // to be the canonical form. It will attempt to reform llvm.usub.with.overflow
-                    // in the backend if profitable.
-                    let sub = self.sub(lhs, rhs);
-                    let cmp = self.icmp(IntPredicate::IntULT, lhs, rhs);
-                    return (sub, cmp);
-                }
+        let name = format!("llvm.{}{oop_str}.with.overflow", if signed { 's' } else { 'u' });
 
-                _ => unreachable!(),
-            },
-            OverflowOp::Mul => match new_kind {
-                Int(I8) => "llvm.smul.with.overflow.i8",
-                Int(I16) => "llvm.smul.with.overflow.i16",
-                Int(I32) => "llvm.smul.with.overflow.i32",
-                Int(I64) => "llvm.smul.with.overflow.i64",
-                Int(I128) => "llvm.smul.with.overflow.i128",
-
-                Uint(U8) => "llvm.umul.with.overflow.i8",
-                Uint(U16) => "llvm.umul.with.overflow.i16",
-                Uint(U32) => "llvm.umul.with.overflow.i32",
-                Uint(U64) => "llvm.umul.with.overflow.i64",
-                Uint(U128) => "llvm.umul.with.overflow.i128",
-
-                _ => unreachable!(),
-            },
-        };
-
-        let res = self.call_intrinsic(name, &[lhs, rhs]);
+        let res = self.call_intrinsic(&name, &[self.type_ix(width)], &[lhs, rhs]);
         (self.extract_value(res, 0), self.extract_value(res, 1))
     }
 
@@ -955,11 +912,11 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
-        self.fptoint_sat(false, val, dest_ty)
+        self.call_intrinsic("llvm.fptoui.sat", &[dest_ty, self.val_ty(val)], &[val])
     }
 
     fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
-        self.fptoint_sat(true, val, dest_ty)
+        self.call_intrinsic("llvm.fptosi.sat", &[dest_ty, self.val_ty(val)], &[val])
     }
 
     fn fptoui(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
@@ -982,15 +939,12 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
             if self.cx.type_kind(src_ty) != TypeKind::Vector {
                 let float_width = self.cx.float_width(src_ty);
                 let int_width = self.cx.int_width(dest_ty);
-                let name = match (int_width, float_width) {
-                    (32, 32) => Some("llvm.wasm.trunc.unsigned.i32.f32"),
-                    (32, 64) => Some("llvm.wasm.trunc.unsigned.i32.f64"),
-                    (64, 32) => Some("llvm.wasm.trunc.unsigned.i64.f32"),
-                    (64, 64) => Some("llvm.wasm.trunc.unsigned.i64.f64"),
-                    _ => None,
-                };
-                if let Some(name) = name {
-                    return self.call_intrinsic(name, &[val]);
+                if matches!((int_width, float_width), (32 | 64, 32 | 64)) {
+                    return self.call_intrinsic(
+                        "llvm.wasm.trunc.unsigned",
+                        &[dest_ty, src_ty],
+                        &[val],
+                    );
                 }
             }
         }
@@ -1004,15 +958,12 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
             if self.cx.type_kind(src_ty) != TypeKind::Vector {
                 let float_width = self.cx.float_width(src_ty);
                 let int_width = self.cx.int_width(dest_ty);
-                let name = match (int_width, float_width) {
-                    (32, 32) => Some("llvm.wasm.trunc.signed.i32.f32"),
-                    (32, 64) => Some("llvm.wasm.trunc.signed.i32.f64"),
-                    (64, 32) => Some("llvm.wasm.trunc.signed.i64.f32"),
-                    (64, 64) => Some("llvm.wasm.trunc.signed.i64.f64"),
-                    _ => None,
-                };
-                if let Some(name) = name {
-                    return self.call_intrinsic(name, &[val]);
+                if matches!((int_width, float_width), (32 | 64, 32 | 64)) {
+                    return self.call_intrinsic(
+                        "llvm.wasm.trunc.signed",
+                        &[dest_ty, src_ty],
+                        &[val],
+                    );
                 }
             }
         }
@@ -1085,22 +1036,10 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
             return None;
         }
 
-        let name = match (ty.is_signed(), ty.primitive_size(self.tcx).bits()) {
-            (true, 8) => "llvm.scmp.i8.i8",
-            (true, 16) => "llvm.scmp.i8.i16",
-            (true, 32) => "llvm.scmp.i8.i32",
-            (true, 64) => "llvm.scmp.i8.i64",
-            (true, 128) => "llvm.scmp.i8.i128",
-
-            (false, 8) => "llvm.ucmp.i8.i8",
-            (false, 16) => "llvm.ucmp.i8.i16",
-            (false, 32) => "llvm.ucmp.i8.i32",
-            (false, 64) => "llvm.ucmp.i8.i64",
-            (false, 128) => "llvm.ucmp.i8.i128",
+        let size = ty.primitive_size(self.tcx);
+        let name = if ty.is_signed() { "llvm.scmp" } else { "llvm.ucmp" };
 
-            _ => bug!("three-way compare unsupported for type {ty:?}"),
-        };
-        Some(self.call_intrinsic(name, &[lhs, rhs]))
+        Some(self.call_intrinsic(&name, &[self.type_i8(), self.type_ix(size.bits())], &[lhs, rhs]))
     }
 
     /* Miscellaneous instructions */
@@ -1386,11 +1325,11 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn lifetime_start(&mut self, ptr: &'ll Value, size: Size) {
-        self.call_lifetime_intrinsic("llvm.lifetime.start.p0i8", ptr, size);
+        self.call_lifetime_intrinsic("llvm.lifetime.start", ptr, size);
     }
 
     fn lifetime_end(&mut self, ptr: &'ll Value, size: Size) {
-        self.call_lifetime_intrinsic("llvm.lifetime.end.p0i8", ptr, size);
+        self.call_lifetime_intrinsic("llvm.lifetime.end", ptr, size);
     }
 
     fn call(
@@ -1611,8 +1550,13 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
 }
 
 impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
-    pub(crate) fn call_intrinsic(&mut self, intrinsic: &str, args: &[&'ll Value]) -> &'ll Value {
-        let (ty, f) = self.cx.get_intrinsic(intrinsic);
+    pub(crate) fn call_intrinsic(
+        &mut self,
+        base_name: &str,
+        type_params: &[&'ll Type],
+        args: &[&'ll Value],
+    ) -> &'ll Value {
+        let (ty, f) = self.cx.get_intrinsic(base_name, type_params);
         self.call(ty, None, None, f, args, None, None)
     }
 
@@ -1626,7 +1570,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
             return;
         }
 
-        self.call_intrinsic(intrinsic, &[self.cx.const_u64(size), ptr]);
+        self.call_intrinsic(intrinsic, &[self.type_ptr()], &[self.cx.const_u64(size), ptr]);
     }
 }
 impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
@@ -1651,31 +1595,6 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     }
 }
 impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
-    fn fptoint_sat(&mut self, signed: bool, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
-        let src_ty = self.cx.val_ty(val);
-        let (float_ty, int_ty, vector_length) = if self.cx.type_kind(src_ty) == TypeKind::Vector {
-            assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));
-            (
-                self.cx.element_type(src_ty),
-                self.cx.element_type(dest_ty),
-                Some(self.cx.vector_length(src_ty)),
-            )
-        } else {
-            (src_ty, dest_ty, None)
-        };
-        let float_width = self.cx.float_width(float_ty);
-        let int_width = self.cx.int_width(int_ty);
-
-        let instr = if signed { "fptosi" } else { "fptoui" };
-        let name = if let Some(vector_length) = vector_length {
-            format!("llvm.{instr}.sat.v{vector_length}i{int_width}.v{vector_length}f{float_width}")
-        } else {
-            format!("llvm.{instr}.sat.i{int_width}.f{float_width}")
-        };
-        let f = self.declare_cfn(&name, llvm::UnnamedAddr::No, self.type_func(&[src_ty], dest_ty));
-        self.call(self.type_func(&[src_ty], dest_ty), None, None, f, &[val], None, None)
-    }
-
     fn trunc_int_to_i1_vector(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
         let vector_length = self.vector_length(dest_ty) as u64;
         let int_width = cmp::max(vector_length.next_power_of_two(), 8);
@@ -1985,7 +1904,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         num_counters: &'ll Value,
         index: &'ll Value,
     ) {
-        self.call_intrinsic("llvm.instrprof.increment", &[fn_name, hash, num_counters, index]);
+        self.call_intrinsic("llvm.instrprof.increment", &[], &[fn_name, hash, num_counters, index]);
     }
 
     /// Emits a call to `llvm.instrprof.mcdc.parameters`.
@@ -2004,7 +1923,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         hash: &'ll Value,
         bitmap_bits: &'ll Value,
     ) {
-        self.call_intrinsic("llvm.instrprof.mcdc.parameters", &[fn_name, hash, bitmap_bits]);
+        self.call_intrinsic("llvm.instrprof.mcdc.parameters", &[], &[fn_name, hash, bitmap_bits]);
     }
 
     #[instrument(level = "debug", skip(self))]
@@ -2016,7 +1935,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         mcdc_temp: &'ll Value,
     ) {
         let args = &[fn_name, hash, bitmap_index, mcdc_temp];
-        self.call_intrinsic("llvm.instrprof.mcdc.tvbitmap.update", args);
+        self.call_intrinsic("llvm.instrprof.mcdc.tvbitmap.update", &[], args);
     }
 
     #[instrument(level = "debug", skip(self))]
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 30c093657908e..95cd78ec05643 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -137,7 +137,8 @@ pub(crate) struct FullCx<'ll, 'tcx> {
     eh_catch_typeinfo: Cell<Option<&'ll Value>>,
     pub rust_try_fn: Cell<Option<(&'ll Type, &'ll Value)>>,
 
-    intrinsics: RefCell<FxHashMap<&'static str, (&'ll Type, &'ll Value)>>,
+    intrinsics:
+        RefCell<FxHashMap<(&'static str, SmallVec<[&'ll Type; 2]>), (&'ll Type, &'ll Value)>>,
 
     /// A counter that is used for generating local symbol names
     local_gen_sym_counter: Cell<usize>,
@@ -842,17 +843,24 @@ impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
 }
 
 impl<'ll> CodegenCx<'ll, '_> {
-    pub(crate) fn get_intrinsic(&self, key: &str) -> (&'ll Type, &'ll Value) {
-        if let Some(v) = self.intrinsics.borrow().get(key).cloned() {
-            return v;
+    pub(crate) fn get_intrinsic(
+        &self,
+        base_name: &str,
+        type_params: &[&'ll Type],
+    ) -> (&'ll Type, &'ll Value) {
+        if let Some(v) =
+            self.intrinsics.borrow().get(&(base_name, SmallVec::from_slice(type_params)))
+        {
+            return *v;
         }
 
-        self.declare_intrinsic(key).unwrap_or_else(|| bug!("unknown intrinsic '{}'", key))
+        self.declare_intrinsic(base_name, type_params)
     }
 
     fn insert_intrinsic(
         &self,
-        name: &'static str,
+        base_name: &'static str,
+        type_params: &[&'ll Type],
         args: Option<&[&'ll llvm::Type]>,
         ret: &'ll llvm::Type,
     ) -> (&'ll llvm::Type, &'ll llvm::Value) {
@@ -861,372 +869,156 @@ impl<'ll> CodegenCx<'ll, '_> {
         } else {
             self.type_variadic_func(&[], ret)
         };
-        let f = self.declare_cfn(name, llvm::UnnamedAddr::No, fn_ty);
-        self.intrinsics.borrow_mut().insert(name, (fn_ty, f));
+
+        let intrinsic = llvm::Intrinsic::lookup(base_name.as_bytes())
+            .expect("Unknown LLVM intrinsic `{base_name}`");
+
+        let full_name = if intrinsic.is_overloaded() {
+            &intrinsic.overloaded_name(self.llmod, type_params)
+        } else {
+            base_name
+        };
+
+        let f = self.declare_cfn(full_name, llvm::UnnamedAddr::No, fn_ty);
+        self.intrinsics
+            .borrow_mut()
+            .insert((base_name, SmallVec::from_slice(type_params)), (fn_ty, f));
         (fn_ty, f)
     }
 
-    fn declare_intrinsic(&self, key: &str) -> Option<(&'ll Type, &'ll Value)> {
+    fn declare_intrinsic(
+        &self,
+        base_name: &str,
+        type_params: &[&'ll Type],
+    ) -> (&'ll Type, &'ll Value) {
+        macro_rules! param {
+            ($index:literal) => {
+                type_params[$index]
+            };
+            ($other:expr) => {
+                $other
+            };
+        }
         macro_rules! ifn {
-            ($name:expr, fn() -> $ret:expr) => (
-                if key == $name {
-                    return Some(self.insert_intrinsic($name, Some(&[]), $ret));
-                }
-            );
             ($name:expr, fn(...) -> $ret:expr) => (
-                if key == $name {
-                    return Some(self.insert_intrinsic($name, None, $ret));
+                if base_name == $name {
+                    return self.insert_intrinsic($name, type_params, None, param!($ret));
                 }
             );
             ($name:expr, fn($($arg:expr),*) -> $ret:expr) => (
-                if key == $name {
-                    return Some(self.insert_intrinsic($name, Some(&[$($arg),*]), $ret));
+                if base_name == $name {
+                    return self.insert_intrinsic($name, type_params, Some(&[$(param!($arg)),*]), param!($ret));
                 }
             );
         }
         macro_rules! mk_struct {
-            ($($field_ty:expr),*) => (self.type_struct( &[$($field_ty),*], false))
+            ($($field_ty:expr),*) => (self.type_struct( &[$(param!($field_ty)),*], false))
         }
 
+        let same_width_vector = |index, element_ty| {
+            self.type_vector(element_ty, self.vector_length(type_params[index]) as u64)
+        };
+
         let ptr = self.type_ptr();
         let void = self.type_void();
         let i1 = self.type_i1();
-        let t_i8 = self.type_i8();
         let t_i16 = self.type_i16();
         let t_i32 = self.type_i32();
         let t_i64 = self.type_i64();
-        let t_i128 = self.type_i128();
         let t_isize = self.type_isize();
-        let t_f16 = self.type_f16();
-        let t_f32 = self.type_f32();
-        let t_f64 = self.type_f64();
-        let t_f128 = self.type_f128();
         let t_metadata = self.type_metadata();
         let t_token = self.type_token();
 
         ifn!("llvm.wasm.get.exception", fn(t_token) -> ptr);
         ifn!("llvm.wasm.get.ehselector", fn(t_token) -> t_i32);
 
-        ifn!("llvm.wasm.trunc.unsigned.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.wasm.trunc.unsigned.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.wasm.trunc.unsigned.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.wasm.trunc.unsigned.i64.f64", fn(t_f64) -> t_i64);
-        ifn!("llvm.wasm.trunc.signed.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.wasm.trunc.signed.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.wasm.trunc.signed.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.wasm.trunc.signed.i64.f64", fn(t_f64) -> t_i64);
-
-        ifn!("llvm.fptosi.sat.i8.f32", fn(t_f32) -> t_i8);
-        ifn!("llvm.fptosi.sat.i16.f32", fn(t_f32) -> t_i16);
-        ifn!("llvm.fptosi.sat.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.fptosi.sat.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.fptosi.sat.i128.f32", fn(t_f32) -> t_i128);
-        ifn!("llvm.fptosi.sat.i8.f64", fn(t_f64) -> t_i8);
-        ifn!("llvm.fptosi.sat.i16.f64", fn(t_f64) -> t_i16);
-        ifn!("llvm.fptosi.sat.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.fptosi.sat.i64.f64", fn(t_f64) -> t_i64);
-        ifn!("llvm.fptosi.sat.i128.f64", fn(t_f64) -> t_i128);
-
-        ifn!("llvm.fptoui.sat.i8.f32", fn(t_f32) -> t_i8);
-        ifn!("llvm.fptoui.sat.i16.f32", fn(t_f32) -> t_i16);
-        ifn!("llvm.fptoui.sat.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.fptoui.sat.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.fptoui.sat.i128.f32", fn(t_f32) -> t_i128);
-        ifn!("llvm.fptoui.sat.i8.f64", fn(t_f64) -> t_i8);
-        ifn!("llvm.fptoui.sat.i16.f64", fn(t_f64) -> t_i16);
-        ifn!("llvm.fptoui.sat.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.fptoui.sat.i64.f64", fn(t_f64) -> t_i64);
-        ifn!("llvm.fptoui.sat.i128.f64", fn(t_f64) -> t_i128);
+        ifn!("llvm.wasm.trunc.unsigned", fn(1) -> 0);
+        ifn!("llvm.wasm.trunc.signed", fn(1) -> 0);
+        ifn!("llvm.fptosi.sat", fn(1) -> 0);
+        ifn!("llvm.fptoui.sat", fn(1) -> 0);
 
         ifn!("llvm.trap", fn() -> void);
         ifn!("llvm.debugtrap", fn() -> void);
         ifn!("llvm.frameaddress", fn(t_i32) -> ptr);
 
-        ifn!("llvm.powi.f16.i32", fn(t_f16, t_i32) -> t_f16);
-        ifn!("llvm.powi.f32.i32", fn(t_f32, t_i32) -> t_f32);
-        ifn!("llvm.powi.f64.i32", fn(t_f64, t_i32) -> t_f64);
-        ifn!("llvm.powi.f128.i32", fn(t_f128, t_i32) -> t_f128);
-
-        ifn!("llvm.pow.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.pow.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.pow.f64", fn(t_f64, t_f64) -> t_f64);
-        ifn!("llvm.pow.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.sqrt.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.sqrt.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.sqrt.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.sqrt.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.sin.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.sin.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.sin.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.sin.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.cos.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.cos.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.cos.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.cos.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.exp.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.exp.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.exp.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.exp.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.exp2.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.exp2.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.exp2.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.exp2.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.log.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.log.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.log.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.log.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.log10.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.log10.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.log10.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.log10.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.log2.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.log2.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.log2.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.log2.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.fma.f16", fn(t_f16, t_f16, t_f16) -> t_f16);
-        ifn!("llvm.fma.f32", fn(t_f32, t_f32, t_f32) -> t_f32);
-        ifn!("llvm.fma.f64", fn(t_f64, t_f64, t_f64) -> t_f64);
-        ifn!("llvm.fma.f128", fn(t_f128, t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.fmuladd.f16", fn(t_f16, t_f16, t_f16) -> t_f16);
-        ifn!("llvm.fmuladd.f32", fn(t_f32, t_f32, t_f32) -> t_f32);
-        ifn!("llvm.fmuladd.f64", fn(t_f64, t_f64, t_f64) -> t_f64);
-        ifn!("llvm.fmuladd.f128", fn(t_f128, t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.fabs.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.fabs.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.fabs.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.fabs.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.minnum.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.minnum.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.minnum.f64", fn(t_f64, t_f64) -> t_f64);
-        ifn!("llvm.minnum.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.minimum.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.minimum.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.minimum.f64", fn(t_f64, t_f64) -> t_f64);
-        // There are issues on x86_64 and aarch64 with the f128 variant.
-        //  - https://github.com/llvm/llvm-project/issues/139380
-        //  - https://github.com/llvm/llvm-project/issues/139381
-        // ifn!("llvm.minimum.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.maxnum.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.maxnum.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.maxnum.f64", fn(t_f64, t_f64) -> t_f64);
-        ifn!("llvm.maxnum.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.maximum.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.maximum.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.maximum.f64", fn(t_f64, t_f64) -> t_f64);
-        // There are issues on x86_64 and aarch64 with the f128 variant.
-        //  - https://github.com/llvm/llvm-project/issues/139380
-        //  - https://github.com/llvm/llvm-project/issues/139381
-        // ifn!("llvm.maximum.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.floor.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.floor.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.floor.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.floor.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.ceil.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.ceil.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.ceil.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.ceil.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.trunc.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.trunc.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.trunc.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.trunc.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.copysign.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.copysign.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.copysign.f64", fn(t_f64, t_f64) -> t_f64);
-        ifn!("llvm.copysign.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.round.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.round.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.round.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.round.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.roundeven.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.roundeven.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.roundeven.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.roundeven.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.rint.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.rint.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.rint.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.rint.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.nearbyint.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.nearbyint.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.nearbyint.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.nearbyint.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.ctpop.i8", fn(t_i8) -> t_i8);
-        ifn!("llvm.ctpop.i16", fn(t_i16) -> t_i16);
-        ifn!("llvm.ctpop.i32", fn(t_i32) -> t_i32);
-        ifn!("llvm.ctpop.i64", fn(t_i64) -> t_i64);
-        ifn!("llvm.ctpop.i128", fn(t_i128) -> t_i128);
-
-        ifn!("llvm.ctlz.i8", fn(t_i8, i1) -> t_i8);
-        ifn!("llvm.ctlz.i16", fn(t_i16, i1) -> t_i16);
-        ifn!("llvm.ctlz.i32", fn(t_i32, i1) -> t_i32);
-        ifn!("llvm.ctlz.i64", fn(t_i64, i1) -> t_i64);
-        ifn!("llvm.ctlz.i128", fn(t_i128, i1) -> t_i128);
-
-        ifn!("llvm.cttz.i8", fn(t_i8, i1) -> t_i8);
-        ifn!("llvm.cttz.i16", fn(t_i16, i1) -> t_i16);
-        ifn!("llvm.cttz.i32", fn(t_i32, i1) -> t_i32);
-        ifn!("llvm.cttz.i64", fn(t_i64, i1) -> t_i64);
-        ifn!("llvm.cttz.i128", fn(t_i128, i1) -> t_i128);
-
-        ifn!("llvm.bswap.i16", fn(t_i16) -> t_i16);
-        ifn!("llvm.bswap.i32", fn(t_i32) -> t_i32);
-        ifn!("llvm.bswap.i64", fn(t_i64) -> t_i64);
-        ifn!("llvm.bswap.i128", fn(t_i128) -> t_i128);
-
-        ifn!("llvm.bitreverse.i8", fn(t_i8) -> t_i8);
-        ifn!("llvm.bitreverse.i16", fn(t_i16) -> t_i16);
-        ifn!("llvm.bitreverse.i32", fn(t_i32) -> t_i32);
-        ifn!("llvm.bitreverse.i64", fn(t_i64) -> t_i64);
-        ifn!("llvm.bitreverse.i128", fn(t_i128) -> t_i128);
-
-        ifn!("llvm.fshl.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
-        ifn!("llvm.fshl.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
-        ifn!("llvm.fshl.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
-        ifn!("llvm.fshl.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
-        ifn!("llvm.fshl.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.fshr.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
-        ifn!("llvm.fshr.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
-        ifn!("llvm.fshr.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
-        ifn!("llvm.fshr.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
-        ifn!("llvm.fshr.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.sadd.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.sadd.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.sadd.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.sadd.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.sadd.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.uadd.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.uadd.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.uadd.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.uadd.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.uadd.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.ssub.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.ssub.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.ssub.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.ssub.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.ssub.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.usub.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.usub.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.usub.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.usub.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.usub.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.smul.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.smul.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.smul.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.smul.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.smul.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.umul.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.umul.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.umul.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.umul.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.umul.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.sadd.sat.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.sadd.sat.i16", fn(t_i16, t_i16) -> t_i16);
-        ifn!("llvm.sadd.sat.i32", fn(t_i32, t_i32) -> t_i32);
-        ifn!("llvm.sadd.sat.i64", fn(t_i64, t_i64) -> t_i64);
-        ifn!("llvm.sadd.sat.i128", fn(t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.uadd.sat.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.uadd.sat.i16", fn(t_i16, t_i16) -> t_i16);
-        ifn!("llvm.uadd.sat.i32", fn(t_i32, t_i32) -> t_i32);
-        ifn!("llvm.uadd.sat.i64", fn(t_i64, t_i64) -> t_i64);
-        ifn!("llvm.uadd.sat.i128", fn(t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.ssub.sat.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.ssub.sat.i16", fn(t_i16, t_i16) -> t_i16);
-        ifn!("llvm.ssub.sat.i32", fn(t_i32, t_i32) -> t_i32);
-        ifn!("llvm.ssub.sat.i64", fn(t_i64, t_i64) -> t_i64);
-        ifn!("llvm.ssub.sat.i128", fn(t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.usub.sat.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.usub.sat.i16", fn(t_i16, t_i16) -> t_i16);
-        ifn!("llvm.usub.sat.i32", fn(t_i32, t_i32) -> t_i32);
-        ifn!("llvm.usub.sat.i64", fn(t_i64, t_i64) -> t_i64);
-        ifn!("llvm.usub.sat.i128", fn(t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.scmp.i8.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.scmp.i8.i16", fn(t_i16, t_i16) -> t_i8);
-        ifn!("llvm.scmp.i8.i32", fn(t_i32, t_i32) -> t_i8);
-        ifn!("llvm.scmp.i8.i64", fn(t_i64, t_i64) -> t_i8);
-        ifn!("llvm.scmp.i8.i128", fn(t_i128, t_i128) -> t_i8);
-
-        ifn!("llvm.ucmp.i8.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.ucmp.i8.i16", fn(t_i16, t_i16) -> t_i8);
-        ifn!("llvm.ucmp.i8.i32", fn(t_i32, t_i32) -> t_i8);
-        ifn!("llvm.ucmp.i8.i64", fn(t_i64, t_i64) -> t_i8);
-        ifn!("llvm.ucmp.i8.i128", fn(t_i128, t_i128) -> t_i8);
-
-        ifn!("llvm.lifetime.start.p0i8", fn(t_i64, ptr) -> void);
-        ifn!("llvm.lifetime.end.p0i8", fn(t_i64, ptr) -> void);
-
-        // FIXME: This is an infinitesimally small portion of the types you can
-        // pass to this intrinsic, if we can ever lazily register intrinsics we
-        // should register these when they're used, that way any type can be
-        // passed.
-        ifn!("llvm.is.constant.i1", fn(i1) -> i1);
-        ifn!("llvm.is.constant.i8", fn(t_i8) -> i1);
-        ifn!("llvm.is.constant.i16", fn(t_i16) -> i1);
-        ifn!("llvm.is.constant.i32", fn(t_i32) -> i1);
-        ifn!("llvm.is.constant.i64", fn(t_i64) -> i1);
-        ifn!("llvm.is.constant.i128", fn(t_i128) -> i1);
-        ifn!("llvm.is.constant.isize", fn(t_isize) -> i1);
-        ifn!("llvm.is.constant.f16", fn(t_f16) -> i1);
-        ifn!("llvm.is.constant.f32", fn(t_f32) -> i1);
-        ifn!("llvm.is.constant.f64", fn(t_f64) -> i1);
-        ifn!("llvm.is.constant.f128", fn(t_f128) -> i1);
-        ifn!("llvm.is.constant.ptr", fn(ptr) -> i1);
-
-        ifn!("llvm.expect.i1", fn(i1, i1) -> i1);
-        ifn!("llvm.eh.typeid.for", fn(ptr) -> t_i32);
+        ifn!("llvm.powi", fn(0, 1) -> 0);
+        ifn!("llvm.pow", fn(0, 0) -> 0);
+        ifn!("llvm.sqrt", fn(0) -> 0);
+        ifn!("llvm.sin", fn(0) -> 0);
+        ifn!("llvm.cos", fn(0) -> 0);
+        ifn!("llvm.exp", fn(0) -> 0);
+        ifn!("llvm.exp2", fn(0) -> 0);
+        ifn!("llvm.log", fn(0) -> 0);
+        ifn!("llvm.log10", fn(0) -> 0);
+        ifn!("llvm.log2", fn(0) -> 0);
+        ifn!("llvm.fma", fn(0, 0, 0) -> 0);
+        ifn!("llvm.fmuladd", fn(0, 0, 0) -> 0);
+        ifn!("llvm.fabs", fn(0) -> 0);
+        ifn!("llvm.minnum", fn(0, 0) -> 0);
+        ifn!("llvm.minimum", fn(0, 0) -> 0);
+        ifn!("llvm.maxnum", fn(0, 0) -> 0);
+        ifn!("llvm.maximum", fn(0, 0) -> 0);
+        ifn!("llvm.floor", fn(0) -> 0);
+        ifn!("llvm.ceil", fn(0) -> 0);
+        ifn!("llvm.trunc", fn(0) -> 0);
+        ifn!("llvm.copysign", fn(0, 0) -> 0);
+        ifn!("llvm.round", fn(0) -> 0);
+        ifn!("llvm.rint", fn(0) -> 0);
+        ifn!("llvm.nearbyint", fn(0) -> 0);
+
+        ifn!("llvm.ctpop", fn(0) -> 0);
+        ifn!("llvm.ctlz", fn(0, i1) -> 0);
+        ifn!("llvm.cttz", fn(0, i1) -> 0);
+        ifn!("llvm.bswap", fn(0) -> 0);
+        ifn!("llvm.bitreverse", fn(0) -> 0);
+        ifn!("llvm.fshl", fn(0, 0, 0) -> 0);
+        ifn!("llvm.fshr", fn(0, 0, 0) -> 0);
+
+        ifn!("llvm.sadd.with.overflow", fn(0, 0) -> mk_struct! {0, i1});
+        ifn!("llvm.uadd.with.overflow", fn(0, 0) -> mk_struct! {0, i1});
+        ifn!("llvm.ssub.with.overflow", fn(0, 0) -> mk_struct! {0, i1});
+        ifn!("llvm.usub.with.overflow", fn(0, 0) -> mk_struct! {0, i1});
+        ifn!("llvm.smul.with.overflow", fn(0, 0) -> mk_struct! {0, i1});
+        ifn!("llvm.umul.with.overflow", fn(0, 0) -> mk_struct! {0, i1});
+
+        ifn!("llvm.sadd.sat", fn(0, 0) -> 0);
+        ifn!("llvm.uadd.sat", fn(0, 0) -> 0);
+        ifn!("llvm.ssub.sat", fn(0, 0) -> 0);
+        ifn!("llvm.usub.sat", fn(0, 0) -> 0);
+
+        ifn!("llvm.scmp", fn(1, 1) -> 0);
+        ifn!("llvm.ucmp", fn(1, 1) -> 0);
+
+        ifn!("llvm.lifetime.start", fn(t_i64, 0) -> void);
+        ifn!("llvm.lifetime.end", fn(t_i64, 0) -> void);
+
+        ifn!("llvm.is.constant", fn(0) -> i1);
+        ifn!("llvm.expect", fn(0, 0) -> 0);
+
+        ifn!("llvm.eh.typeid.for", fn(0) -> t_i32);
         ifn!("llvm.localescape", fn(...) -> void);
         ifn!("llvm.localrecover", fn(ptr, ptr, t_i32) -> ptr);
-        ifn!("llvm.x86.seh.recoverfp", fn(ptr, ptr) -> ptr);
 
         ifn!("llvm.assume", fn(i1) -> void);
-        ifn!("llvm.prefetch", fn(ptr, t_i32, t_i32, t_i32) -> void);
-
-        // This isn't an "LLVM intrinsic", but LLVM's optimization passes
-        // recognize it like one (including turning it into `bcmp` sometimes)
-        // and we use it to implement intrinsics like `raw_eq` and `compare_bytes`
-        match self.sess().target.arch.as_ref() {
-            "avr" | "msp430" => ifn!("memcmp", fn(ptr, ptr, t_isize) -> t_i16),
-            _ => ifn!("memcmp", fn(ptr, ptr, t_isize) -> t_i32),
+        ifn!("llvm.prefetch", fn(0, t_i32, t_i32, t_i32) -> void);
+
+        if base_name == "memcmp" {
+            let ret = match self.sess().target.arch.as_ref() {
+                "avr" | "msp430" => t_i16,
+                _ => t_i32,
+            };
+
+            let fn_ty = self.type_func(&[ptr, ptr, t_isize], ret);
+            let f = self.declare_cfn("memcmp", llvm::UnnamedAddr::No, fn_ty);
+            self.intrinsics.borrow_mut().insert(("memcmp", SmallVec::new()), (fn_ty, f));
+
+            return (fn_ty, f);
         }
 
         // variadic intrinsics
-        ifn!("llvm.va_start", fn(ptr) -> void);
-        ifn!("llvm.va_end", fn(ptr) -> void);
-        ifn!("llvm.va_copy", fn(ptr, ptr) -> void);
+        ifn!("llvm.va_start", fn(0) -> void);
+        ifn!("llvm.va_end", fn(0) -> void);
+        ifn!("llvm.va_copy", fn(0, 0) -> void);
 
         if self.sess().instrument_coverage() {
             ifn!("llvm.instrprof.increment", fn(ptr, t_i64, t_i32, t_i32) -> void);
@@ -1238,14 +1030,19 @@ impl<'ll> CodegenCx<'ll, '_> {
         ifn!("llvm.type.checked.load", fn(ptr, t_i32, t_metadata) -> mk_struct! {ptr, i1});
 
         if self.sess().opts.debuginfo != DebugInfo::None {
-            ifn!("llvm.dbg.declare", fn(t_metadata, t_metadata) -> void);
-            ifn!("llvm.dbg.value", fn(t_metadata, t_i64, t_metadata) -> void);
+            ifn!("llvm.dbg.declare", fn(t_metadata, t_metadata, t_metadata) -> void);
+            ifn!("llvm.dbg.value", fn(t_metadata, t_metadata, t_metadata) -> void);
         }
 
-        ifn!("llvm.ptrmask", fn(ptr, t_isize) -> ptr);
+        ifn!("llvm.ptrmask", fn(0, 1) -> 0);
         ifn!("llvm.threadlocal.address", fn(ptr) -> ptr);
 
-        None
+        ifn!("llvm.masked.load", fn(1, t_i32, same_width_vector(0, i1), 0) -> 0);
+        ifn!("llvm.masked.store", fn(0, 1, t_i32, same_width_vector(0, i1)) -> void);
+        ifn!("llvm.masked.gather", fn(1, t_i32, same_width_vector(0, i1), 0) -> 0);
+        ifn!("llvm.masked.scatter", fn(0, 1, t_i32, same_width_vector(0, i1)) -> void);
+
+        bug!("Unknown intrinsic: `{base_name}`")
     }
 
     pub(crate) fn eh_catch_typeinfo(&self) -> &'ll Value {
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 615db259487d1..3e650a717646c 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -15,7 +15,7 @@ use rustc_middle::ty::{self, GenericArgsRef, Ty};
 use rustc_middle::{bug, span_bug};
 use rustc_span::{Span, Symbol, sym};
 use rustc_symbol_mangling::mangle_internal_symbol;
-use rustc_target::spec::{HasTargetSpec, PanicStrategy};
+use rustc_target::spec::PanicStrategy;
 use tracing::debug;
 
 use crate::abi::FnAbiLlvmExt;
@@ -27,137 +27,142 @@ use crate::type_of::LayoutLlvmExt;
 use crate::va_arg::emit_va_arg;
 use crate::value::Value;
 
-fn get_simple_intrinsic<'ll>(
-    cx: &CodegenCx<'ll, '_>,
+fn call_simple_intrinsic<'ll, 'tcx>(
+    bx: &mut Builder<'_, 'll, 'tcx>,
     name: Symbol,
-) -> Option<(&'ll Type, &'ll Value)> {
-    let llvm_name = match name {
-        sym::sqrtf16 => "llvm.sqrt.f16",
-        sym::sqrtf32 => "llvm.sqrt.f32",
-        sym::sqrtf64 => "llvm.sqrt.f64",
-        sym::sqrtf128 => "llvm.sqrt.f128",
-
-        sym::powif16 => "llvm.powi.f16.i32",
-        sym::powif32 => "llvm.powi.f32.i32",
-        sym::powif64 => "llvm.powi.f64.i32",
-        sym::powif128 => "llvm.powi.f128.i32",
-
-        sym::sinf16 => "llvm.sin.f16",
-        sym::sinf32 => "llvm.sin.f32",
-        sym::sinf64 => "llvm.sin.f64",
-        sym::sinf128 => "llvm.sin.f128",
-
-        sym::cosf16 => "llvm.cos.f16",
-        sym::cosf32 => "llvm.cos.f32",
-        sym::cosf64 => "llvm.cos.f64",
-        sym::cosf128 => "llvm.cos.f128",
-
-        sym::powf16 => "llvm.pow.f16",
-        sym::powf32 => "llvm.pow.f32",
-        sym::powf64 => "llvm.pow.f64",
-        sym::powf128 => "llvm.pow.f128",
-
-        sym::expf16 => "llvm.exp.f16",
-        sym::expf32 => "llvm.exp.f32",
-        sym::expf64 => "llvm.exp.f64",
-        sym::expf128 => "llvm.exp.f128",
-
-        sym::exp2f16 => "llvm.exp2.f16",
-        sym::exp2f32 => "llvm.exp2.f32",
-        sym::exp2f64 => "llvm.exp2.f64",
-        sym::exp2f128 => "llvm.exp2.f128",
-
-        sym::logf16 => "llvm.log.f16",
-        sym::logf32 => "llvm.log.f32",
-        sym::logf64 => "llvm.log.f64",
-        sym::logf128 => "llvm.log.f128",
-
-        sym::log10f16 => "llvm.log10.f16",
-        sym::log10f32 => "llvm.log10.f32",
-        sym::log10f64 => "llvm.log10.f64",
-        sym::log10f128 => "llvm.log10.f128",
-
-        sym::log2f16 => "llvm.log2.f16",
-        sym::log2f32 => "llvm.log2.f32",
-        sym::log2f64 => "llvm.log2.f64",
-        sym::log2f128 => "llvm.log2.f128",
-
-        sym::fmaf16 => "llvm.fma.f16",
-        sym::fmaf32 => "llvm.fma.f32",
-        sym::fmaf64 => "llvm.fma.f64",
-        sym::fmaf128 => "llvm.fma.f128",
-
-        sym::fmuladdf16 => "llvm.fmuladd.f16",
-        sym::fmuladdf32 => "llvm.fmuladd.f32",
-        sym::fmuladdf64 => "llvm.fmuladd.f64",
-        sym::fmuladdf128 => "llvm.fmuladd.f128",
-
-        sym::fabsf16 => "llvm.fabs.f16",
-        sym::fabsf32 => "llvm.fabs.f32",
-        sym::fabsf64 => "llvm.fabs.f64",
-        sym::fabsf128 => "llvm.fabs.f128",
-
-        sym::minnumf16 => "llvm.minnum.f16",
-        sym::minnumf32 => "llvm.minnum.f32",
-        sym::minnumf64 => "llvm.minnum.f64",
-        sym::minnumf128 => "llvm.minnum.f128",
-
-        sym::minimumf16 => "llvm.minimum.f16",
-        sym::minimumf32 => "llvm.minimum.f32",
-        sym::minimumf64 => "llvm.minimum.f64",
+    args: &[OperandRef<'tcx, &'ll Value>],
+) -> Option<&'ll Value> {
+    let (base_name, type_params): (&'static str, &[&'ll Type]) = match name {
+        sym::sqrtf16 => ("llvm.sqrt", &[bx.type_f16()]),
+        sym::sqrtf32 => ("llvm.sqrt", &[bx.type_f32()]),
+        sym::sqrtf64 => ("llvm.sqrt", &[bx.type_f64()]),
+        sym::sqrtf128 => ("llvm.sqrt", &[bx.type_f128()]),
+
+        sym::powif16 => ("llvm.powi", &[bx.type_f16(), bx.type_i32()]),
+        sym::powif32 => ("llvm.powi", &[bx.type_f32(), bx.type_i32()]),
+        sym::powif64 => ("llvm.powi", &[bx.type_f64(), bx.type_i32()]),
+        sym::powif128 => ("llvm.powi", &[bx.type_f128(), bx.type_i32()]),
+
+        sym::sinf16 => ("llvm.sin", &[bx.type_f16()]),
+        sym::sinf32 => ("llvm.sin", &[bx.type_f32()]),
+        sym::sinf64 => ("llvm.sin", &[bx.type_f64()]),
+        sym::sinf128 => ("llvm.sin", &[bx.type_f128()]),
+
+        sym::cosf16 => ("llvm.cos", &[bx.type_f16()]),
+        sym::cosf32 => ("llvm.cos", &[bx.type_f32()]),
+        sym::cosf64 => ("llvm.cos", &[bx.type_f64()]),
+        sym::cosf128 => ("llvm.cos", &[bx.type_f128()]),
+
+        sym::powf16 => ("llvm.pow", &[bx.type_f16()]),
+        sym::powf32 => ("llvm.pow", &[bx.type_f32()]),
+        sym::powf64 => ("llvm.pow", &[bx.type_f64()]),
+        sym::powf128 => ("llvm.pow", &[bx.type_f128()]),
+
+        sym::expf16 => ("llvm.exp", &[bx.type_f16()]),
+        sym::expf32 => ("llvm.exp", &[bx.type_f32()]),
+        sym::expf64 => ("llvm.exp", &[bx.type_f64()]),
+        sym::expf128 => ("llvm.exp", &[bx.type_f128()]),
+
+        sym::exp2f16 => ("llvm.exp2", &[bx.type_f16()]),
+        sym::exp2f32 => ("llvm.exp2", &[bx.type_f32()]),
+        sym::exp2f64 => ("llvm.exp2", &[bx.type_f64()]),
+        sym::exp2f128 => ("llvm.exp2", &[bx.type_f128()]),
+
+        sym::logf16 => ("llvm.log", &[bx.type_f16()]),
+        sym::logf32 => ("llvm.log", &[bx.type_f32()]),
+        sym::logf64 => ("llvm.log", &[bx.type_f64()]),
+        sym::logf128 => ("llvm.log", &[bx.type_f128()]),
+
+        sym::log10f16 => ("llvm.log10", &[bx.type_f16()]),
+        sym::log10f32 => ("llvm.log10", &[bx.type_f32()]),
+        sym::log10f64 => ("llvm.log10", &[bx.type_f64()]),
+        sym::log10f128 => ("llvm.log10", &[bx.type_f128()]),
+
+        sym::log2f16 => ("llvm.log2", &[bx.type_f16()]),
+        sym::log2f32 => ("llvm.log2", &[bx.type_f32()]),
+        sym::log2f64 => ("llvm.log2", &[bx.type_f64()]),
+        sym::log2f128 => ("llvm.log2", &[bx.type_f128()]),
+
+        sym::fmaf16 => ("llvm.fma", &[bx.type_f16()]),
+        sym::fmaf32 => ("llvm.fma", &[bx.type_f32()]),
+        sym::fmaf64 => ("llvm.fma", &[bx.type_f64()]),
+        sym::fmaf128 => ("llvm.fma", &[bx.type_f128()]),
+
+        sym::fmuladdf16 => ("llvm.fmuladd", &[bx.type_f16()]),
+        sym::fmuladdf32 => ("llvm.fmuladd", &[bx.type_f32()]),
+        sym::fmuladdf64 => ("llvm.fmuladd", &[bx.type_f64()]),
+        sym::fmuladdf128 => ("llvm.fmuladd", &[bx.type_f128()]),
+
+        sym::fabsf16 => ("llvm.fabs", &[bx.type_f16()]),
+        sym::fabsf32 => ("llvm.fabs", &[bx.type_f32()]),
+        sym::fabsf64 => ("llvm.fabs", &[bx.type_f64()]),
+        sym::fabsf128 => ("llvm.fabs", &[bx.type_f128()]),
+
+        sym::minnumf16 => ("llvm.minnum", &[bx.type_f16()]),
+        sym::minnumf32 => ("llvm.minnum", &[bx.type_f32()]),
+        sym::minnumf64 => ("llvm.minnum", &[bx.type_f64()]),
+        sym::minnumf128 => ("llvm.minnum", &[bx.type_f128()]),
+
+        sym::minimumf16 => ("llvm.minimum", &[bx.type_f16()]),
+        sym::minimumf32 => ("llvm.minimum", &[bx.type_f32()]),
+        sym::minimumf64 => ("llvm.minimum", &[bx.type_f64()]),
         // There are issues on x86_64 and aarch64 with the f128 variant,
         // let's instead use the instrinsic fallback body.
-        // sym::minimumf128 => "llvm.minimum.f128",
-        sym::maxnumf16 => "llvm.maxnum.f16",
-        sym::maxnumf32 => "llvm.maxnum.f32",
-        sym::maxnumf64 => "llvm.maxnum.f64",
-        sym::maxnumf128 => "llvm.maxnum.f128",
-
-        sym::maximumf16 => "llvm.maximum.f16",
-        sym::maximumf32 => "llvm.maximum.f32",
-        sym::maximumf64 => "llvm.maximum.f64",
+        // sym::minimumf128 => ("llvm.minimum", &[cx.type_f128()]),
+        sym::maxnumf16 => ("llvm.maxnum", &[bx.type_f16()]),
+        sym::maxnumf32 => ("llvm.maxnum", &[bx.type_f32()]),
+        sym::maxnumf64 => ("llvm.maxnum", &[bx.type_f64()]),
+        sym::maxnumf128 => ("llvm.maxnum", &[bx.type_f128()]),
+
+        sym::maximumf16 => ("llvm.maximum", &[bx.type_f16()]),
+        sym::maximumf32 => ("llvm.maximum", &[bx.type_f32()]),
+        sym::maximumf64 => ("llvm.maximum", &[bx.type_f64()]),
         // There are issues on x86_64 and aarch64 with the f128 variant,
         // let's instead use the instrinsic fallback body.
-        // sym::maximumf128 => "llvm.maximum.f128",
-        sym::copysignf16 => "llvm.copysign.f16",
-        sym::copysignf32 => "llvm.copysign.f32",
-        sym::copysignf64 => "llvm.copysign.f64",
-        sym::copysignf128 => "llvm.copysign.f128",
-
-        sym::floorf16 => "llvm.floor.f16",
-        sym::floorf32 => "llvm.floor.f32",
-        sym::floorf64 => "llvm.floor.f64",
-        sym::floorf128 => "llvm.floor.f128",
-
-        sym::ceilf16 => "llvm.ceil.f16",
-        sym::ceilf32 => "llvm.ceil.f32",
-        sym::ceilf64 => "llvm.ceil.f64",
-        sym::ceilf128 => "llvm.ceil.f128",
-
-        sym::truncf16 => "llvm.trunc.f16",
-        sym::truncf32 => "llvm.trunc.f32",
-        sym::truncf64 => "llvm.trunc.f64",
-        sym::truncf128 => "llvm.trunc.f128",
+        // sym::maximumf128 => ("llvm.maximum", &[cx.type_f128()]),
+        sym::copysignf16 => ("llvm.copysign", &[bx.type_f16()]),
+        sym::copysignf32 => ("llvm.copysign", &[bx.type_f32()]),
+        sym::copysignf64 => ("llvm.copysign", &[bx.type_f64()]),
+        sym::copysignf128 => ("llvm.copysign", &[bx.type_f128()]),
+
+        sym::floorf16 => ("llvm.floor", &[bx.type_f16()]),
+        sym::floorf32 => ("llvm.floor", &[bx.type_f32()]),
+        sym::floorf64 => ("llvm.floor", &[bx.type_f64()]),
+        sym::floorf128 => ("llvm.floor", &[bx.type_f128()]),
+
+        sym::ceilf16 => ("llvm.ceil", &[bx.type_f16()]),
+        sym::ceilf32 => ("llvm.ceil", &[bx.type_f32()]),
+        sym::ceilf64 => ("llvm.ceil", &[bx.type_f64()]),
+        sym::ceilf128 => ("llvm.ceil", &[bx.type_f128()]),
+
+        sym::truncf16 => ("llvm.trunc", &[bx.type_f16()]),
+        sym::truncf32 => ("llvm.trunc", &[bx.type_f32()]),
+        sym::truncf64 => ("llvm.trunc", &[bx.type_f64()]),
+        sym::truncf128 => ("llvm.trunc", &[bx.type_f128()]),
 
         // We could use any of `rint`, `nearbyint`, or `roundeven`
         // for this -- they are all identical in semantics when
         // assuming the default FP environment.
         // `rint` is what we used for $forever.
-        sym::round_ties_even_f16 => "llvm.rint.f16",
-        sym::round_ties_even_f32 => "llvm.rint.f32",
-        sym::round_ties_even_f64 => "llvm.rint.f64",
-        sym::round_ties_even_f128 => "llvm.rint.f128",
+        sym::round_ties_even_f16 => ("llvm.rint", &[bx.type_f16()]),
+        sym::round_ties_even_f32 => ("llvm.rint", &[bx.type_f32()]),
+        sym::round_ties_even_f64 => ("llvm.rint", &[bx.type_f64()]),
+        sym::round_ties_even_f128 => ("llvm.rint", &[bx.type_f128()]),
 
-        sym::roundf16 => "llvm.round.f16",
-        sym::roundf32 => "llvm.round.f32",
-        sym::roundf64 => "llvm.round.f64",
-        sym::roundf128 => "llvm.round.f128",
+        sym::roundf16 => ("llvm.round", &[bx.type_f16()]),
+        sym::roundf32 => ("llvm.round", &[bx.type_f32()]),
+        sym::roundf64 => ("llvm.round", &[bx.type_f64()]),
+        sym::roundf128 => ("llvm.round", &[bx.type_f128()]),
 
-        sym::ptr_mask => "llvm.ptrmask",
+        sym::ptr_mask => ("llvm.ptrmask", &[bx.type_ptr(), bx.type_isize()]),
 
         _ => return None,
     };
-    Some(cx.get_intrinsic(llvm_name))
+    Some(bx.call_intrinsic(
+        base_name,
+        type_params,
+        &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
+    ))
 }
 
 impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
@@ -173,36 +178,16 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
         let name = tcx.item_name(instance.def_id());
         let fn_args = instance.args;
 
-        let simple = get_simple_intrinsic(self, name);
+        let simple = call_simple_intrinsic(self, name, args);
         let llval = match name {
-            _ if simple.is_some() => {
-                let (simple_ty, simple_fn) = simple.unwrap();
-                self.call(
-                    simple_ty,
-                    None,
-                    None,
-                    simple_fn,
-                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
-                    None,
-                    Some(instance),
-                )
-            }
+            _ if simple.is_some() => simple.unwrap(),
             sym::is_val_statically_known => {
-                let intrinsic_type = args[0].layout.immediate_llvm_type(self.cx);
-                let kind = self.type_kind(intrinsic_type);
-                let intrinsic_name = match kind {
-                    TypeKind::Pointer | TypeKind::Integer => {
-                        Some(format!("llvm.is.constant.{intrinsic_type:?}"))
-                    }
-                    // LLVM float types' intrinsic names differ from their type names.
-                    TypeKind::Half => Some(format!("llvm.is.constant.f16")),
-                    TypeKind::Float => Some(format!("llvm.is.constant.f32")),
-                    TypeKind::Double => Some(format!("llvm.is.constant.f64")),
-                    TypeKind::FP128 => Some(format!("llvm.is.constant.f128")),
-                    _ => None,
-                };
-                if let Some(intrinsic_name) = intrinsic_name {
-                    self.call_intrinsic(&intrinsic_name, &[args[0].immediate()])
+                if let OperandValue::Immediate(imm) = args[0].val {
+                    self.call_intrinsic(
+                        "llvm.is.constant",
+                        &[args[0].layout.immediate_llvm_type(self.cx)],
+                        &[imm],
+                    )
                 } else {
                     self.const_bool(false)
                 }
@@ -246,10 +231,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 );
                 return Ok(());
             }
-            sym::breakpoint => self.call_intrinsic("llvm.debugtrap", &[]),
-            sym::va_copy => {
-                self.call_intrinsic("llvm.va_copy", &[args[0].immediate(), args[1].immediate()])
-            }
+            sym::breakpoint => self.call_intrinsic("llvm.debugtrap", &[], &[]),
+            sym::va_copy => self.call_intrinsic(
+                "llvm.va_copy",
+                &[self.type_ptr()],
+                &[args[0].immediate(), args[1].immediate()],
+            ),
             sym::va_arg => {
                 match result.layout.backend_repr {
                     BackendRepr::Scalar(scalar) => {
@@ -324,6 +311,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 };
                 self.call_intrinsic(
                     "llvm.prefetch",
+                    &[self.type_ptr()],
                     &[
                         args[0].immediate(),
                         self.const_i32(rw),
@@ -385,11 +373,13 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 }
                 let (size, signed) = ty.int_size_and_signed(self.tcx);
                 let width = size.bits();
+                let llty = self.type_ix(width);
                 match name {
                     sym::ctlz | sym::cttz => {
                         let y = self.const_bool(false);
                         let ret = self.call_intrinsic(
-                            &format!("llvm.{name}.i{width}"),
+                            &format!("llvm.{name}"),
+                            &[llty],
                             &[args[0].immediate(), y],
                         );
 
@@ -397,62 +387,54 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     }
                     sym::ctlz_nonzero => {
                         let y = self.const_bool(true);
-                        let llvm_name = &format!("llvm.ctlz.i{width}");
-                        let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
+                        let ret =
+                            self.call_intrinsic("llvm.ctlz", &[llty], &[args[0].immediate(), y]);
                         self.intcast(ret, result.layout.llvm_type(self), false)
                     }
                     sym::cttz_nonzero => {
                         let y = self.const_bool(true);
-                        let llvm_name = &format!("llvm.cttz.i{width}");
-                        let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
+                        let ret =
+                            self.call_intrinsic("llvm.cttz", &[llty], &[args[0].immediate(), y]);
                         self.intcast(ret, result.layout.llvm_type(self), false)
                     }
                     sym::ctpop => {
-                        let ret = self.call_intrinsic(
-                            &format!("llvm.ctpop.i{width}"),
-                            &[args[0].immediate()],
-                        );
+                        let ret =
+                            self.call_intrinsic("llvm.ctpop", &[llty], &[args[0].immediate()]);
                         self.intcast(ret, result.layout.llvm_type(self), false)
                     }
                     sym::bswap => {
                         if width == 8 {
                             args[0].immediate() // byte swap a u8/i8 is just a no-op
                         } else {
-                            self.call_intrinsic(
-                                &format!("llvm.bswap.i{width}"),
-                                &[args[0].immediate()],
-                            )
+                            self.call_intrinsic("llvm.bswap", &[llty], &[args[0].immediate()])
                         }
                     }
-                    sym::bitreverse => self.call_intrinsic(
-                        &format!("llvm.bitreverse.i{width}"),
-                        &[args[0].immediate()],
-                    ),
+                    sym::bitreverse => {
+                        self.call_intrinsic("llvm.bitreverse", &[llty], &[args[0].immediate()])
+                    }
                     sym::rotate_left | sym::rotate_right => {
                         let is_left = name == sym::rotate_left;
                         let val = args[0].immediate();
                         let raw_shift = args[1].immediate();
                         // rotate = funnel shift with first two args the same
-                        let llvm_name =
-                            &format!("llvm.fsh{}.i{}", if is_left { 'l' } else { 'r' }, width);
+                        let llvm_name = format!("llvm.fsh{}", if is_left { 'l' } else { 'r' });
 
                         // llvm expects shift to be the same type as the values, but rust
                         // always uses `u32`.
                         let raw_shift = self.intcast(raw_shift, self.val_ty(val), false);
 
-                        self.call_intrinsic(llvm_name, &[val, val, raw_shift])
+                        self.call_intrinsic(&llvm_name, &[llty], &[val, val, raw_shift])
                     }
                     sym::saturating_add | sym::saturating_sub => {
                         let is_add = name == sym::saturating_add;
                         let lhs = args[0].immediate();
                         let rhs = args[1].immediate();
-                        let llvm_name = &format!(
-                            "llvm.{}{}.sat.i{}",
+                        let llvm_name = format!(
+                            "llvm.{}{}.sat",
                             if signed { 's' } else { 'u' },
                             if is_add { "add" } else { "sub" },
-                            width
                         );
-                        self.call_intrinsic(llvm_name, &[lhs, rhs])
+                        self.call_intrinsic(&llvm_name, &[llty], &[lhs, rhs])
                     }
                     _ => bug!(),
                 }
@@ -484,7 +466,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     self.icmp(IntPredicate::IntEQ, a_val, b_val)
                 } else {
                     let n = self.const_usize(layout.size().bytes());
-                    let cmp = self.call_intrinsic("memcmp", &[a, b, n]);
+                    let cmp = self.call_intrinsic("memcmp", &[], &[a, b, n]);
                     match self.cx.sess().target.arch.as_ref() {
                         "avr" | "msp430" => self.icmp(IntPredicate::IntEQ, cmp, self.const_i16(0)),
                         _ => self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0)),
@@ -496,6 +478,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 // Here we assume that the `memcmp` provided by the target is a NOP for size 0.
                 let cmp = self.call_intrinsic(
                     "memcmp",
+                    &[],
                     &[args[0].immediate(), args[1].immediate(), args[2].immediate()],
                 );
                 // Some targets have `memcmp` returning `i16`, but the intrinsic is always `i32`.
@@ -619,18 +602,22 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
     }
 
     fn abort(&mut self) {
-        self.call_intrinsic("llvm.trap", &[]);
+        self.call_intrinsic("llvm.trap", &[], &[]);
     }
 
     fn assume(&mut self, val: Self::Value) {
         if self.cx.sess().opts.optimize != rustc_session::config::OptLevel::No {
-            self.call_intrinsic("llvm.assume", &[val]);
+            self.call_intrinsic("llvm.assume", &[], &[val]);
         }
     }
 
     fn expect(&mut self, cond: Self::Value, expected: bool) -> Self::Value {
         if self.cx.sess().opts.optimize != rustc_session::config::OptLevel::No {
-            self.call_intrinsic("llvm.expect.i1", &[cond, self.const_bool(expected)])
+            self.call_intrinsic(
+                "llvm.expect",
+                &[self.type_i1()],
+                &[cond, self.const_bool(expected)],
+            )
         } else {
             cond
         }
@@ -644,17 +631,20 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
     ) -> Self::Value {
         let typeid = self.get_metadata_value(typeid);
         let vtable_byte_offset = self.const_i32(vtable_byte_offset as i32);
-        let type_checked_load =
-            self.call_intrinsic("llvm.type.checked.load", &[llvtable, vtable_byte_offset, typeid]);
+        let type_checked_load = self.call_intrinsic(
+            "llvm.type.checked.load",
+            &[],
+            &[llvtable, vtable_byte_offset, typeid],
+        );
         self.extract_value(type_checked_load, 0)
     }
 
     fn va_start(&mut self, va_list: &'ll Value) -> &'ll Value {
-        self.call_intrinsic("llvm.va_start", &[va_list])
+        self.call_intrinsic("llvm.va_start", &[self.type_ptr()], &[va_list])
     }
 
     fn va_end(&mut self, va_list: &'ll Value) -> &'ll Value {
-        self.call_intrinsic("llvm.va_end", &[va_list])
+        self.call_intrinsic("llvm.va_end", &[self.type_ptr()], &[va_list])
     }
 }
 
@@ -893,8 +883,8 @@ fn codegen_wasm_try<'ll, 'tcx>(
         let null = bx.const_null(bx.type_ptr());
         let funclet = bx.catch_pad(cs, &[null]);
 
-        let ptr = bx.call_intrinsic("llvm.wasm.get.exception", &[funclet.cleanuppad()]);
-        let _sel = bx.call_intrinsic("llvm.wasm.get.ehselector", &[funclet.cleanuppad()]);
+        let ptr = bx.call_intrinsic("llvm.wasm.get.exception", &[], &[funclet.cleanuppad()]);
+        let _sel = bx.call_intrinsic("llvm.wasm.get.ehselector", &[], &[funclet.cleanuppad()]);
 
         let catch_ty = bx.type_func(&[bx.type_ptr(), bx.type_ptr()], bx.type_void());
         bx.call(catch_ty, None, None, catch_func, &[data, ptr], Some(&funclet), None);
@@ -1031,7 +1021,7 @@ fn codegen_emcc_try<'ll, 'tcx>(
         let selector = bx.extract_value(vals, 1);
 
         // Check if the typeid we got is the one for a Rust panic.
-        let rust_typeid = bx.call_intrinsic("llvm.eh.typeid.for", &[tydesc]);
+        let rust_typeid = bx.call_intrinsic("llvm.eh.typeid.for", &[bx.type_ptr()], &[tydesc]);
         let is_rust_panic = bx.icmp(IntPredicate::IntEQ, selector, rust_typeid);
         let is_rust_panic = bx.zext(is_rust_panic, bx.type_bool());
 
@@ -1522,56 +1512,37 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }};
         }
 
-        let (elem_ty_str, elem_ty) = if let ty::Float(f) = in_elem.kind() {
-            let elem_ty = bx.cx.type_float_from_ty(*f);
-            match f.bit_width() {
-                16 => ("f16", elem_ty),
-                32 => ("f32", elem_ty),
-                64 => ("f64", elem_ty),
-                128 => ("f128", elem_ty),
-                _ => return_error!(InvalidMonomorphization::FloatingPointVector {
-                    span,
-                    name,
-                    f_ty: *f,
-                    in_ty,
-                }),
-            }
+        let elem_ty = if let ty::Float(f) = in_elem.kind() {
+            bx.cx.type_float_from_ty(*f)
         } else {
             return_error!(InvalidMonomorphization::FloatingPointType { span, name, in_ty });
         };
 
         let vec_ty = bx.type_vector(elem_ty, in_len);
 
-        let (intr_name, fn_ty) = match name {
-            sym::simd_ceil => ("ceil", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fabs => ("fabs", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fcos => ("cos", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fexp2 => ("exp2", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fexp => ("exp", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_flog10 => ("log10", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_flog2 => ("log2", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
-            sym::simd_relaxed_fma => ("fmuladd", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
-            sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fsqrt => ("sqrt", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_round => ("round", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_trunc => ("trunc", bx.type_func(&[vec_ty], vec_ty)),
+        let intr_name = match name {
+            sym::simd_ceil => "llvm.ceil",
+            sym::simd_fabs => "llvm.fabs",
+            sym::simd_fcos => "llvm.cos",
+            sym::simd_fexp2 => "llvm.exp2",
+            sym::simd_fexp => "llvm.exp",
+            sym::simd_flog10 => "llvm.log10",
+            sym::simd_flog2 => "llvm.log2",
+            sym::simd_flog => "llvm.log",
+            sym::simd_floor => "llvm.floor",
+            sym::simd_fma => "llvm.fma",
+            sym::simd_relaxed_fma => "llvm.fmuladd",
+            sym::simd_fsin => "llvm.sin",
+            sym::simd_fsqrt => "llvm.sqrt",
+            sym::simd_round => "llvm.round",
+            sym::simd_trunc => "llvm.trunc",
             _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name }),
         };
-        let llvm_name = &format!("llvm.{intr_name}.v{in_len}{elem_ty_str}");
-        let f = bx.declare_cfn(llvm_name, llvm::UnnamedAddr::No, fn_ty);
-        let c = bx.call(
-            fn_ty,
-            None,
-            None,
-            f,
+        Ok(bx.call_intrinsic(
+            intr_name,
+            &[vec_ty],
             &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
-            None,
-            None,
-        );
-        Ok(c)
+        ))
     }
 
     if std::matches!(
@@ -1595,29 +1566,6 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
     }
 
-    // FIXME: use:
-    //  https://github.com/llvm-mirror/llvm/blob/master/include/llvm/IR/Function.h#L182
-    //  https://github.com/llvm-mirror/llvm/blob/master/include/llvm/IR/Intrinsics.h#L81
-    fn llvm_vector_str(bx: &Builder<'_, '_, '_>, elem_ty: Ty<'_>, vec_len: u64) -> String {
-        match *elem_ty.kind() {
-            ty::Int(v) => format!(
-                "v{}i{}",
-                vec_len,
-                // Normalize to prevent crash if v: IntTy::Isize
-                v.normalize(bx.target_spec().pointer_width).bit_width().unwrap()
-            ),
-            ty::Uint(v) => format!(
-                "v{}i{}",
-                vec_len,
-                // Normalize to prevent crash if v: UIntTy::Usize
-                v.normalize(bx.target_spec().pointer_width).bit_width().unwrap()
-            ),
-            ty::Float(v) => format!("v{}f{}", vec_len, v.bit_width()),
-            ty::RawPtr(_, _) => format!("v{}p0", vec_len),
-            _ => unreachable!(),
-        }
-    }
-
     fn llvm_vector_ty<'ll>(cx: &CodegenCx<'ll, '_>, elem_ty: Ty<'_>, vec_len: u64) -> &'ll Type {
         let elem_ty = match *elem_ty.kind() {
             ty::Int(v) => cx.type_int_from_ty(v),
@@ -1698,38 +1646,22 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         );
 
         // Alignment of T, must be a constant integer value:
-        let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(in_elem).bytes() as i32);
 
         // Truncate the mask vector to a vector of i1s:
         let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
-        let mask_ty = bx.type_vector(bx.type_i1(), in_len);
 
         // Type of the vector of pointers:
         let llvm_pointer_vec_ty = llvm_vector_ty(bx, element_ty1, in_len);
-        let llvm_pointer_vec_str = llvm_vector_str(bx, element_ty1, in_len);
 
         // Type of the vector of elements:
         let llvm_elem_vec_ty = llvm_vector_ty(bx, element_ty0, in_len);
-        let llvm_elem_vec_str = llvm_vector_str(bx, element_ty0, in_len);
 
-        let llvm_intrinsic =
-            format!("llvm.masked.gather.{llvm_elem_vec_str}.{llvm_pointer_vec_str}");
-        let fn_ty = bx.type_func(
-            &[llvm_pointer_vec_ty, alignment_ty, mask_ty, llvm_elem_vec_ty],
-            llvm_elem_vec_ty,
-        );
-        let f = bx.declare_cfn(&llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-        let v = bx.call(
-            fn_ty,
-            None,
-            None,
-            f,
+        return Ok(bx.call_intrinsic(
+            "llvm.masked.gather",
+            &[llvm_elem_vec_ty, llvm_pointer_vec_ty],
             &[args[1].immediate(), alignment, mask, args[0].immediate()],
-            None,
-            None,
-        );
-        return Ok(v);
+        ));
     }
 
     if name == sym::simd_masked_load {
@@ -1795,32 +1727,20 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         );
 
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
-        let mask_ty = bx.type_vector(bx.type_i1(), mask_len);
 
         // Alignment of T, must be a constant integer value:
-        let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
 
         let llvm_pointer = bx.type_ptr();
 
         // Type of the vector of elements:
         let llvm_elem_vec_ty = llvm_vector_ty(bx, values_elem, values_len);
-        let llvm_elem_vec_str = llvm_vector_str(bx, values_elem, values_len);
-
-        let llvm_intrinsic = format!("llvm.masked.load.{llvm_elem_vec_str}.p0");
-        let fn_ty = bx
-            .type_func(&[llvm_pointer, alignment_ty, mask_ty, llvm_elem_vec_ty], llvm_elem_vec_ty);
-        let f = bx.declare_cfn(&llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-        let v = bx.call(
-            fn_ty,
-            None,
-            None,
-            f,
+
+        return Ok(bx.call_intrinsic(
+            "llvm.masked.load",
+            &[llvm_elem_vec_ty, llvm_pointer],
             &[args[1].immediate(), alignment, mask, args[2].immediate()],
-            None,
-            None,
-        );
-        return Ok(v);
+        ));
     }
 
     if name == sym::simd_masked_store {
@@ -1880,33 +1800,20 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         );
 
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
-        let mask_ty = bx.type_vector(bx.type_i1(), mask_len);
 
         // Alignment of T, must be a constant integer value:
-        let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
 
-        let ret_t = bx.type_void();
-
         let llvm_pointer = bx.type_ptr();
 
         // Type of the vector of elements:
         let llvm_elem_vec_ty = llvm_vector_ty(bx, values_elem, values_len);
-        let llvm_elem_vec_str = llvm_vector_str(bx, values_elem, values_len);
-
-        let llvm_intrinsic = format!("llvm.masked.store.{llvm_elem_vec_str}.p0");
-        let fn_ty = bx.type_func(&[llvm_elem_vec_ty, llvm_pointer, alignment_ty, mask_ty], ret_t);
-        let f = bx.declare_cfn(&llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-        let v = bx.call(
-            fn_ty,
-            None,
-            None,
-            f,
+
+        return Ok(bx.call_intrinsic(
+            "llvm.masked.store",
+            &[llvm_elem_vec_ty, llvm_pointer],
             &[args[2].immediate(), args[1].immediate(), alignment, mask],
-            None,
-            None,
-        );
-        return Ok(v);
+        ));
     }
 
     if name == sym::simd_scatter {
@@ -1971,38 +1878,22 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         );
 
         // Alignment of T, must be a constant integer value:
-        let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(in_elem).bytes() as i32);
 
         // Truncate the mask vector to a vector of i1s:
         let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
-        let mask_ty = bx.type_vector(bx.type_i1(), in_len);
-
-        let ret_t = bx.type_void();
 
         // Type of the vector of pointers:
         let llvm_pointer_vec_ty = llvm_vector_ty(bx, element_ty1, in_len);
-        let llvm_pointer_vec_str = llvm_vector_str(bx, element_ty1, in_len);
 
         // Type of the vector of elements:
         let llvm_elem_vec_ty = llvm_vector_ty(bx, element_ty0, in_len);
-        let llvm_elem_vec_str = llvm_vector_str(bx, element_ty0, in_len);
-
-        let llvm_intrinsic =
-            format!("llvm.masked.scatter.{llvm_elem_vec_str}.{llvm_pointer_vec_str}");
-        let fn_ty =
-            bx.type_func(&[llvm_elem_vec_ty, llvm_pointer_vec_ty, alignment_ty, mask_ty], ret_t);
-        let f = bx.declare_cfn(&llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-        let v = bx.call(
-            fn_ty,
-            None,
-            None,
-            f,
+
+        return Ok(bx.call_intrinsic(
+            "llvm.masked.scatter",
+            &[llvm_elem_vec_ty, llvm_pointer_vec_ty],
             &[args[0].immediate(), args[1].immediate(), alignment, mask],
-            None,
-            None,
-        );
-        return Ok(v);
+        ));
     }
 
     macro_rules! arith_red {
@@ -2431,40 +2322,31 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             },
             in_len as u64,
         );
-        let intrinsic_name = match name {
-            sym::simd_bswap => "bswap",
-            sym::simd_bitreverse => "bitreverse",
-            sym::simd_ctlz => "ctlz",
-            sym::simd_ctpop => "ctpop",
-            sym::simd_cttz => "cttz",
+        let llvm_intrinsic = match name {
+            sym::simd_bswap => "llvm.bswap",
+            sym::simd_bitreverse => "llvm.bitreverse",
+            sym::simd_ctlz => "llvm.ctlz",
+            sym::simd_ctpop => "llvm.ctpop",
+            sym::simd_cttz => "llvm.cttz",
             _ => unreachable!(),
         };
         let int_size = in_elem.int_size_and_signed(bx.tcx()).0.bits();
-        let llvm_intrinsic = &format!("llvm.{}.v{}i{}", intrinsic_name, in_len, int_size,);
 
         return match name {
             // byte swap is no-op for i8/u8
             sym::simd_bswap if int_size == 8 => Ok(args[0].immediate()),
             sym::simd_ctlz | sym::simd_cttz => {
                 // for the (int, i1 immediate) pair, the second arg adds `(0, true) => poison`
-                let fn_ty = bx.type_func(&[vec_ty, bx.type_i1()], vec_ty);
                 let dont_poison_on_zero = bx.const_int(bx.type_i1(), 0);
-                let f = bx.declare_cfn(llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-                Ok(bx.call(
-                    fn_ty,
-                    None,
-                    None,
-                    f,
+                Ok(bx.call_intrinsic(
+                    llvm_intrinsic,
+                    &[vec_ty],
                     &[args[0].immediate(), dont_poison_on_zero],
-                    None,
-                    None,
                 ))
             }
             sym::simd_bswap | sym::simd_bitreverse | sym::simd_ctpop => {
                 // simple unary argument cases
-                let fn_ty = bx.type_func(&[vec_ty], vec_ty);
-                let f = bx.declare_cfn(llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-                Ok(bx.call(fn_ty, None, None, f, &[args[0].immediate()], None, None))
+                Ok(bx.call_intrinsic(llvm_intrinsic, &[vec_ty], &[args[0].immediate()]))
             }
             _ => unreachable!(),
         };
@@ -2495,10 +2377,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let lhs = args[0].immediate();
         let rhs = args[1].immediate();
         let is_add = name == sym::simd_saturating_add;
-        let ptr_bits = bx.tcx().data_layout.pointer_size.bits() as _;
-        let (signed, elem_width, elem_ty) = match *in_elem.kind() {
-            ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_int_from_ty(i)),
-            ty::Uint(i) => (false, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_uint_from_ty(i)),
+        let (signed, elem_ty) = match *in_elem.kind() {
+            ty::Int(i) => (true, bx.cx.type_int_from_ty(i)),
+            ty::Uint(i) => (false, bx.cx.type_uint_from_ty(i)),
             _ => {
                 return_error!(InvalidMonomorphization::ExpectedVectorElementType {
                     span,
@@ -2508,19 +2389,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                 });
             }
         };
-        let llvm_intrinsic = &format!(
-            "llvm.{}{}.sat.v{}i{}",
+        let llvm_intrinsic = format!(
+            "llvm.{}{}.sat",
             if signed { 's' } else { 'u' },
             if is_add { "add" } else { "sub" },
-            in_len,
-            elem_width
         );
         let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
 
-        let fn_ty = bx.type_func(&[vec_ty, vec_ty], vec_ty);
-        let f = bx.declare_cfn(llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-        let v = bx.call(fn_ty, None, None, f, &[lhs, rhs], None, None);
-        return Ok(v);
+        return Ok(bx.call_intrinsic(&llvm_intrinsic, &[vec_ty], &[lhs, rhs]));
     }
 
     span_bug!(span, "unknown SIMD intrinsic");
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 3383669e5162d..ced2a31ddfad1 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1213,6 +1213,13 @@ unsafe extern "C" {
         NewFn: &mut Option<&'a Value>,
         CanUpgradeDebugIntrinsicsToRecords: bool,
     ) -> bool;
+    pub(crate) fn LLVMIntrinsicCopyOverloadedName2<'a>(
+        Mod: &'a Module,
+        ID: c_uint,
+        ParamTypes: *const &'a Type,
+        ParamCount: size_t,
+        NameLength: *mut size_t,
+    ) -> *mut c_char;
 
     // Operations on parameters
     pub(crate) fn LLVMIsAArgument(Val: &Value) -> Option<&Value>;
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index f1c524d15d349..a326f58fb01d4 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -1,9 +1,9 @@
 #![allow(non_snake_case)]
 
 use std::ffi::{CStr, CString};
-use std::ptr;
 use std::str::FromStr;
 use std::string::FromUtf8Error;
+use std::{ptr, slice};
 
 use libc::c_uint;
 use rustc_abi::{Align, Size, WrappingRange};
@@ -345,6 +345,32 @@ impl Intrinsic {
     pub(crate) fn is_overloaded(self) -> bool {
         unsafe { LLVMIntrinsicIsOverloaded(self.id) == True }
     }
+
+    pub(crate) fn overloaded_name<'ll>(
+        self,
+        llmod: &'ll Module,
+        type_params: &[&'ll Type],
+    ) -> String {
+        let mut len = 0;
+        let ptr = unsafe {
+            LLVMIntrinsicCopyOverloadedName2(
+                llmod,
+                self.id,
+                type_params.as_ptr(),
+                type_params.len(),
+                &mut len,
+            )
+        };
+
+        let slice = unsafe { slice::from_raw_parts_mut(ptr.cast(), len) };
+        let copied = str::from_utf8(slice).expect("Non-UTF8 intrinsic name").to_string();
+
+        unsafe {
+            libc::free(ptr.cast());
+        }
+
+        copied
+    }
 }
 
 /// Safe wrapper for `LLVMSetValueName2` from a byte slice
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index 1b303e7289ae5..655836375180e 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -1,4 +1,5 @@
 use std::borrow::Borrow;
+use std::hash::{Hash, Hasher};
 use std::{fmt, ptr};
 
 use libc::{c_char, c_uint};
@@ -25,6 +26,14 @@ impl PartialEq for Type {
     }
 }
 
+impl Eq for Type {}
+
+impl Hash for Type {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        ptr::hash(self, state);
+    }
+}
+
 impl fmt::Debug for Type {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.write_str(
diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs
index 7f78bc7569568..f35f551d5906a 100644
--- a/compiler/rustc_codegen_ssa/src/traits/builder.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs
@@ -23,7 +23,7 @@ use crate::common::{AtomicRmwBinOp, IntPredicate, RealPredicate, Synchronization
 use crate::mir::operand::{OperandRef, OperandValue};
 use crate::mir::place::{PlaceRef, PlaceValue};
 
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum OverflowOp {
     Add,
     Sub,
@@ -215,7 +215,7 @@ pub trait BuilderMethods<'a, 'tcx>:
     fn checked_binop(
         &mut self,
         oop: OverflowOp,
-        ty: Ty<'_>,
+        ty: Ty<'tcx>,
         lhs: Self::Value,
         rhs: Self::Value,
     ) -> (Self::Value, Self::Value);

From c4859737e39176f0f5102e0d16502f2b26f727f2 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 30 May 2025 21:42:24 +0530
Subject: [PATCH 08/10] Add bypass for `x86amx`

---
 compiler/rustc_codegen_llvm/src/abi.rs      | 19 +++++++++++++++++++
 compiler/rustc_codegen_llvm/src/builder.rs  |  7 +++++++
 compiler/rustc_codegen_llvm/src/context.rs  |  4 ++++
 compiler/rustc_codegen_llvm/src/llvm/ffi.rs |  2 ++
 compiler/rustc_codegen_llvm/src/type_.rs    |  4 ++++
 5 files changed, 36 insertions(+)

diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 22485637e9363..40df2b99b13fd 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -361,6 +361,25 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
         }
 
         match self.type_kind(llvm_ty) {
+            TypeKind::X86_AMX if self.type_kind(rust_ty) == TypeKind::Vector => {
+                let element_count = self.vector_length(rust_ty);
+                let element_ty = self.element_type(rust_ty);
+
+                let element_size_bits = match self.type_kind(element_ty) {
+                    TypeKind::Half => 16,
+                    TypeKind::Float => 32,
+                    TypeKind::Double => 64,
+                    TypeKind::FP128 => 128,
+                    TypeKind::Integer => self.int_width(element_ty),
+                    TypeKind::Pointer => self.int_width(self.isize_ty()),
+                    _ => bug!(
+                        "Vector element type `{element_ty:?}` not one of integer, float or pointer"
+                    ),
+                };
+                let vector_size_bits = element_size_bits * element_count as u64;
+
+                vector_size_bits == 8192
+            }
             TypeKind::BFloat => rust_ty == self.type_i16(),
             TypeKind::Vector => {
                 let llvm_element_count = self.vector_length(llvm_ty) as u64;
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index ad2b07067c1ab..ed89a3b8a84ca 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1650,6 +1650,13 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         }
 
         match self.type_kind(llvm_ty) {
+            TypeKind::X86_AMX => {
+                if is_argument {
+                    self.call_intrinsic("llvm.x86.cast.vector.to.tile", &[rust_ty], &[val])
+                } else {
+                    self.call_intrinsic("llvm.x86.cast.tile.to.vector", &[rust_ty], &[val])
+                }
+            }
             TypeKind::Vector if self.element_type(llvm_ty) == self.type_i1() => {
                 if is_argument {
                     self.trunc_int_to_i1_vector(val, dest_ty)
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 95cd78ec05643..a485cad618828 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -928,6 +928,7 @@ impl<'ll> CodegenCx<'ll, '_> {
         let t_isize = self.type_isize();
         let t_metadata = self.type_metadata();
         let t_token = self.type_token();
+        let x86amx = self.type_x86amx();
 
         ifn!("llvm.wasm.get.exception", fn(t_token) -> ptr);
         ifn!("llvm.wasm.get.ehselector", fn(t_token) -> t_i32);
@@ -1042,6 +1043,9 @@ impl<'ll> CodegenCx<'ll, '_> {
         ifn!("llvm.masked.gather", fn(1, t_i32, same_width_vector(0, i1), 0) -> 0);
         ifn!("llvm.masked.scatter", fn(0, 1, t_i32, same_width_vector(0, i1)) -> void);
 
+        ifn!("llvm.x86.cast.vector.to.tile", fn(0) -> x86amx);
+        ifn!("llvm.x86.cast.tile.to.vector", fn(x86amx) -> 0);
+
         bug!("Unknown intrinsic: `{base_name}`")
     }
 
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index ced2a31ddfad1..ec5cf22fd0d82 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1084,6 +1084,8 @@ unsafe extern "C" {
     pub(crate) fn LLVMTokenTypeInContext(C: &Context) -> &Type;
     pub(crate) fn LLVMMetadataTypeInContext(C: &Context) -> &Type;
 
+    pub(crate) fn LLVMX86AMXTypeInContext(C: &Context) -> &Type;
+
     // Operations on all values
     pub(crate) fn LLVMTypeOf(Val: &Value) -> &Type;
     pub(crate) fn LLVMGetValueName2(Val: &Value, Length: *mut size_t) -> *const c_char;
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index 655836375180e..1a20378232065 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -66,6 +66,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
         unsafe { llvm::LLVMMetadataTypeInContext(self.llcx()) }
     }
 
+    pub(crate) fn type_x86amx(&self) -> &'ll Type {
+        unsafe { llvm::LLVMX86AMXTypeInContext(self.llcx()) }
+    }
+
     ///x Creates an integer type with the given number of bits, e.g., i24
     pub(crate) fn type_ix(&self, num_bits: u64) -> &'ll Type {
         unsafe { llvm::LLVMIntTypeInContext(self.llcx(), num_bits as c_uint) }

From e1cec6bdff9a4c575ef92356dd1095c0138d18bf Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 30 May 2025 12:20:16 +0530
Subject: [PATCH 09/10] Disable ABI checks for the `unadjusted` ABI

---
 .../rustc_monomorphize/src/mono_checks/abi_check.rs | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/compiler/rustc_monomorphize/src/mono_checks/abi_check.rs b/compiler/rustc_monomorphize/src/mono_checks/abi_check.rs
index 8dbbb4d1713a3..111330b73efbb 100644
--- a/compiler/rustc_monomorphize/src/mono_checks/abi_check.rs
+++ b/compiler/rustc_monomorphize/src/mono_checks/abi_check.rs
@@ -1,6 +1,6 @@
 //! This module ensures that if a function's ABI requires a particular target feature,
 //! that target feature is enabled both on the callee and all callers.
-use rustc_abi::{BackendRepr, CanonAbi, RegKind, X86Call};
+use rustc_abi::{BackendRepr, CanonAbi, ExternAbi, RegKind, X86Call};
 use rustc_hir::{CRATE_HIR_ID, HirId};
 use rustc_middle::mir::{self, Location, traversal};
 use rustc_middle::ty::layout::LayoutCx;
@@ -152,6 +152,12 @@ fn do_check_wasm_abi<'tcx>(
 /// or return values for which the corresponding target feature is not enabled.
 fn check_instance_abi<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) {
     let typing_env = ty::TypingEnv::fully_monomorphized();
+    let ty = instance.ty(tcx, typing_env);
+    if ty.is_fn() && ty.fn_sig(tcx).abi() == ExternAbi::Unadjusted {
+        // We disable all checks for the unadjusted abi to allow linking to arbitrary LLVM
+        // intrinsics
+        return;
+    }
     let Ok(abi) = tcx.fn_abi_of_instance(typing_env.as_query_input((instance, ty::List::empty())))
     else {
         // An error will be reported during codegen if we cannot determine the ABI of this
@@ -184,9 +190,12 @@ fn check_call_site_abi<'tcx>(
     caller: InstanceKind<'tcx>,
     loc: impl Fn() -> (Span, HirId) + Copy,
 ) {
-    if callee.fn_sig(tcx).abi().is_rustic_abi() {
+    let extern_abi = callee.fn_sig(tcx).abi();
+    if extern_abi.is_rustic_abi() || extern_abi == ExternAbi::Unadjusted {
         // We directly handle the soundness of Rust ABIs -- so let's skip the majority of
         // call sites to avoid a perf regression.
+        // We disable all checks for the unadjusted abi to allow linking to arbitrary LLVM
+        // intrinsics
         return;
     }
     let typing_env = ty::TypingEnv::fully_monomorphized();

From 155a726865e98bb7391ebda5372bec46bdccc583 Mon Sep 17 00:00:00 2001
From: sayantn <sayantn05@gmail.com>
Date: Fri, 30 May 2025 23:46:07 +0530
Subject: [PATCH 10/10] Add AMX autocast tests

---
 tests/codegen/inject-autocast.rs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/codegen/inject-autocast.rs b/tests/codegen/inject-autocast.rs
index f51044dacf657..8e9ba614c9615 100644
--- a/tests/codegen/inject-autocast.rs
+++ b/tests/codegen/inject-autocast.rs
@@ -6,6 +6,9 @@
 
 use std::simd::{f32x4, i16x8, i64x2};
 
+#[repr(simd)]
+pub struct Tile([i8; 1024]);
+
 #[repr(C, packed)]
 pub struct Bar(u32, i64x2, i64x2, i64x2, i64x2, i64x2, i64x2);
 // CHECK: %Bar = type <{ i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }>
@@ -13,6 +16,22 @@ pub struct Bar(u32, i64x2, i64x2, i64x2, i64x2, i64x2, i64x2);
 #[repr(simd)]
 pub struct f16x8([f16; 8]);
 
+// CHECK-LABEL: @amx_autocast
+#[no_mangle]
+pub unsafe fn amx_autocast(m: u16, n: u16, k: u16, a: Tile, b: Tile, c: Tile) -> Tile {
+    extern "unadjusted" {
+        #[link_name = "llvm.x86.tdpbuud.internal"]
+        fn foo(m: u16, n: u16, k: u16, a: Tile, b: Tile, c: Tile) -> Tile;
+    }
+
+    // CHECK: %3 = call x86_amx @llvm.x86.cast.vector.to.tile.v1024i8(<1024 x i8> %0)
+    // CHECK-NEXT: %4 = call x86_amx @llvm.x86.cast.vector.to.tile.v1024i8(<1024 x i8> %1)
+    // CHECK-NEXT: %5 = call x86_amx @llvm.x86.cast.vector.to.tile.v1024i8(<1024 x i8> %2)
+    // CHECK-NEXT: %6 = call x86_amx @llvm.x86.tdpbuud.internal(i16 %m, i16 %n, i16 %k, x86_amx %3, x86_amx %4, x86_amx %5)
+    // CHECK-NEXT: %7 = call <1024 x i8> @llvm.x86.cast.tile.to.vector.v1024i8(x86_amx %6)
+    foo(m, n, k, a, b, c)
+}
+
 // CHECK-LABEL: @struct_with_i1_vector_autocast
 #[no_mangle]
 pub unsafe fn struct_with_i1_vector_autocast(a: i64x2, b: i64x2) -> (u8, u8) {
@@ -85,6 +104,12 @@ pub unsafe fn i1_vector_autocast(a: f16x8) -> u8 {
     foo(a, 1)
 }
 
+// CHECK: declare x86_amx @llvm.x86.tdpbuud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+
+// CHECK: declare x86_amx @llvm.x86.cast.vector.to.tile.v1024i8(<1024 x i8>)
+
+// CHECK: declare <1024 x i8> @llvm.x86.cast.tile.to.vector.v1024i8(x86_amx)
+
 // CHECK: declare { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64>, <2 x i64>)
 
 // CHECK: declare <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float>)