Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_cranelift/example/mini_core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ pub mod intrinsics {
#[rustc_intrinsic]
pub fn bswap<T>(x: T) -> T;
#[rustc_intrinsic]
pub unsafe fn write_bytes<T>(dst: *mut T, val: u8, count: usize);
pub unsafe fn write_bytes<T, B>(dst: *mut T, val: B, count: usize);
#[rustc_intrinsic]
pub unsafe fn unreachable() -> !;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ static NUM_REF: &'static u8 = unsafe { &*&raw const NUM };

unsafe fn zeroed<T>() -> T {
let mut uninit = MaybeUninit { uninit: () };
intrinsics::write_bytes(&mut uninit.value.value as *mut T, 0, 1);
intrinsics::write_bytes(&mut uninit.value.value as *mut T, 0u8, 1);
uninit.value.value
}

Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,12 @@ fn codegen_regular_intrinsic_call<'tcx>(

sym::write_bytes | sym::volatile_set_memory => {
intrinsic_args!(fx, args => (dst, val, count); intrinsic);
if val.layout().size.bytes() != 1 {
// incorrect sizes can be encountered on dead branches
fx.bcx.ins().trap(TrapCode::user(1).unwrap());
return Ok(());
};

let val = val.load_scalar(fx);
let count = count.load_scalar(fx);

Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_gcc/example/mini_core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ pub mod intrinsics {
#[rustc_intrinsic]
pub fn bswap<T>(x: T) -> T;
#[rustc_intrinsic]
pub unsafe fn write_bytes<T>(dst: *mut T, val: u8, count: usize);
pub unsafe fn write_bytes<T, B>(dst: *mut T, val: B, count: usize);
#[rustc_intrinsic]
pub unsafe fn unreachable() -> !;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ impl<T: ?Sized, U: ?Sized> CoerceUnsized<Unique<U>> for Unique<T> where T: Unsiz

unsafe fn zeroed<T>() -> T {
let mut uninit = MaybeUninit { uninit: () };
intrinsics::write_bytes(&mut uninit.value.value as *mut T, 0, 1);
intrinsics::write_bytes(&mut uninit.value.value as *mut T, 0u8, 1);
uninit.value.value
}

Expand Down
8 changes: 7 additions & 1 deletion compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,12 +198,18 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
return Ok(());
}
sym::write_bytes => {
// invalid types may be encountered on dead branches after a size check.
if args[1].layout.size.bytes() != 1 {
bx.unreachable_nonterminator();
return Ok(());
}
let val = bx.from_immediate(args[1].immediate());
memset_intrinsic(
bx,
false,
fn_args.type_at(0),
args[0].immediate(),
args[1].immediate(),
val,
args[2].immediate(),
);
return Ok(());
Expand Down
32 changes: 29 additions & 3 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use std::assert_matches::assert_matches;

use either::Either;
use rustc_abi::{FieldIdx, HasDataLayout, Size};
use rustc_apfloat::ieee::{Double, Half, Quad, Single};
use rustc_middle::mir::interpret::{CTFE_ALLOC_SALT, read_target_uint, write_target_uint};
Expand Down Expand Up @@ -866,18 +867,43 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
count: &OpTy<'tcx, <M as Machine<'tcx>>::Provenance>,
name: &'static str,
) -> InterpResult<'tcx> {
let layout = self.layout_of(dst.layout.ty.builtin_deref(true).unwrap())?;
let dst_layout = self.layout_of(dst.layout.ty.builtin_deref(true).unwrap())?;
let src_layout = self.layout_of(byte.layout.ty)?;

if src_layout.size.bytes_usize() != 1 {
throw_ub_custom!(
fluent::const_eval_scalar_size_mismatch,
target_size = 1,
data_size = src_layout.size.bytes(),
);
}

let dst = self.read_pointer(dst)?;
let byte = self.read_scalar(byte)?.to_u8()?;

let count = self.read_target_usize(count)?;

// `checked_mul` enforces a too small bound (the correct one would probably be target_isize_max),
// but no actual allocation can be big enough for the difference to be noticeable.
let len = self
.compute_size_in_bytes(layout.size, count)
.compute_size_in_bytes(dst_layout.size, count)
.ok_or_else(|| err_ub_custom!(fluent::const_eval_size_overflow, name = name))?;

let byte = match self.read_immediate_raw(byte)? {
Either::Left(src_place) => {
// val is not an immediate, possibly uninit.
self.mem_copy_repeatedly(
src_place.ptr(),
dst,
Size::from_bytes(1),
len.bytes(),
/* nonoverlapping: */ false,
)?;
return interp_ok(());
}
Either::Right(imm) => imm,
};

let byte = byte.to_scalar().to_u8()?;
let bytes = std::iter::repeat(byte).take(len.bytes_usize());
self.write_bytes_ptr(dst, bytes)
}
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,10 @@ pub(crate) fn check_intrinsic_type(
let byte_ptr = Ty::new_imm_ptr(tcx, tcx.types.u8);
(0, 0, vec![byte_ptr, byte_ptr, tcx.types.usize], tcx.types.i32)
}
sym::write_bytes | sym::volatile_set_memory => (
sym::write_bytes => {
(2, 0, vec![Ty::new_mut_ptr(tcx, param(0)), param(1), tcx.types.usize], tcx.types.unit)
}
sym::volatile_set_memory => (
1,
0,
vec![Ty::new_mut_ptr(tcx, param(0)), tcx.types.u8, tcx.types.usize],
Expand Down
4 changes: 3 additions & 1 deletion library/core/src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2882,6 +2882,8 @@ pub const unsafe fn copy_nonoverlapping<T>(src: *const T, dst: *mut T, count: us
pub const unsafe fn copy<T>(src: *const T, dst: *mut T, count: usize);

/// This is an accidentally-stable alias to [`ptr::write_bytes`]; use that instead.
///
/// `val` must be 1 byte wide, it is allowed to be uninit.
// Note (intentionally not in the doc comment): `ptr::write_bytes` adds some extra
// debug assertions; if you are writing compiler tests or code inside the standard library
// that wants to avoid those debug assertions, directly call this intrinsic instead.
Expand All @@ -2890,7 +2892,7 @@ pub const unsafe fn copy<T>(src: *const T, dst: *mut T, count: usize);
#[rustc_const_stable(feature = "const_intrinsic_copy", since = "1.83.0")]
#[rustc_nounwind]
#[rustc_intrinsic]
pub const unsafe fn write_bytes<T>(dst: *mut T, val: u8, count: usize);
pub const unsafe fn write_bytes<T, B>(dst: *mut T, val: B, count: usize);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does have a small chance of breaking stable code as the intrinsic is (accidentally) exposed on stable. Such code would have seen deprecation warnings since Rust 1.86.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whether we can allow this for arbitrary 1-sized types depends on whether LLVM intends to allow memset on arbitrary bytes. @nikic in a future where LLVM has a byte type or something else that has a size of 8 bits but can hold non-integral things such as provenance or poison/undef, do you expect memset will work on such values? It's argument type might have to change for that...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not expect the memset argument type to change, but there is a separate memset.pattern intrinsic which accepts an arbitrary argument type and could be used for that purpose. It's not ready for general usage yet though. And there was some discussion about generalizing memset to effectively become memset.pattern in the future.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, so by landing this we'd be making the bet that

  • memset currently works fine with arbitrary bytes in an i8 (the same bet we're already making when compiling MaybeUninit<u8> to LLVM's i8)
  • if that ever becomes a problem, memset.pattern will be a viable alternative

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does it matter what LLVM does in the future? We can just adapt our toolchain, right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LLVM currently has no explicitly documented way to memset with a byte that is uninit or contains provenance. So there's a risk that this might just not be something LLVM can even express in the future.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If LLVM were to require that the byte for memset must be initialized, couldn't we just change the lowering of this intrinsic to not call memset, and do something else, like a loop?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, we could provide our own implementation. That doesn't sound great for performance.

We can also revert the PR / go with the libs-only approach then, as long as we keep the intrinsic private and don't publicly expose its new capabilities.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it wouldn't be great for performance but that's a problem we could also fix, and in any case I don't think it's useful to think about what internal APIs we should write on the basis of "maybe in the future LLVM decides to be bad at optimizing this".

If this discussion is being driven by the fact that this intrinsic was accidentally exposed, I wonder what libs-api would have to say about that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This subthread is just about the LLVM concerns, since we're reaching slightly deeper into an area that is poorly defined there, which we should only do deliberately IMO. Now that we have deliberated I'm okay with proceeding.

The discussion for the signature change should happen here.


/// Returns the minimum (IEEE 754-2008 minNum) of two `f16` values.
///
Expand Down
36 changes: 35 additions & 1 deletion library/core/src/slice/specialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,43 @@ impl<T: Clone> SpecFill<T> for [T] {
}

impl<T: Copy> SpecFill<T> for [T] {
fn spec_fill(&mut self, value: T) {
default fn spec_fill(&mut self, value: T) {
if size_of::<T>() == 1 {
// SAFETY: The pointer is derived from a reference, so it's writable.
// And we checked that T is 1 byte wide.
unsafe {
// use the intrinsic since it allows any T as long as it's 1 byte wide
crate::intrinsics::write_bytes(self.as_mut_ptr(), value, self.len());
}
return;
}
for item in self.iter_mut() {
*item = value;
}
}
}

macro spec_fill_int {
($($type:ty)*) => {$(
impl SpecFill<$type> for [$type] {
#[inline]
fn spec_fill(&mut self, value: $type) {
if crate::intrinsics::is_val_statically_known(value) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is_val_statically_known yields a non-deterministic result under miri. Thus the specialisation would only apply half the time.

Suggested change
if crate::intrinsics::is_val_statically_known(value) {
if cfg!(miri) || crate::intrinsics::is_val_statically_known(value) {

let bytes = value.to_ne_bytes();
if value == <$type>::from_ne_bytes([bytes[0]; size_of::<$type>()]) {
// SAFETY: The pointer is derived from a reference, so it's writable.
unsafe {
crate::intrinsics::write_bytes(self.as_mut_ptr(), bytes[0], self.len());
}
return;
}
}
for item in self.iter_mut() {
*item = value;
}
}
}
)*}
}

spec_fill_int! { u16 i16 u32 i32 u64 i64 u128 i128 usize isize }
Comment on lines +34 to +57
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't this approach (for i8/u8) suffice to fix the Miri performance issue, without any changes to the intrinsic?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure that'd work for the particular reported case but not for newtypes. slice::fill is generic, so it's nice if it can handle anything that's has a scalar abi, but we can't detect "scalar but always-initialized" in the library, so it has to be more general.

Copy link
Member

@RalfJung RalfJung Oct 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds like the motivation you have in mind goes beyond what is spelled out in the PR description.

(You link to #87891 but not the original PR that landed the change in the first place. So without digging through the history of this code it's not clear what you want this to do and why, apart from the Miri perf issue that you linked. Would be nice to make all that context easily accessible from the PR description.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found the original PR at #83245, but that also doesn't say anything, so I guess it's mostly "because we can".

🤷 The extended intrinsic isn't pretty but it's not terrible either. So if t-libs says this is worth a special case in the compiler that's fine.

Copy link
Member Author

@the8472 the8472 Oct 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously it only covered u8, i8 and bool. That leaves out other cases where people may want to initialize large chunks of AtomicU8, MaybeUninit<u8> or custom newtypes.
It's some amount of "because we can" and some "avoid performance cliffs where approach A gets optimized and B doesn't".

17 changes: 15 additions & 2 deletions library/coretests/tests/intrinsics.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use core::any::TypeId;
use core::hint::black_box;
use core::intrinsics::assume;
use core::mem::MaybeUninit;

#[test]
fn test_typeid_sized_types() {
Expand Down Expand Up @@ -43,7 +45,7 @@ const fn test_write_bytes_in_const_contexts() {
const TEST: [u32; 3] = {
let mut arr = [1u32, 2, 3];
unsafe {
write_bytes(arr.as_mut_ptr(), 0, 2);
write_bytes(arr.as_mut_ptr(), 0u8, 2);
}
arr
};
Expand All @@ -55,14 +57,25 @@ const fn test_write_bytes_in_const_contexts() {
const TEST2: [u32; 3] = {
let mut arr = [1u32, 2, 3];
unsafe {
write_bytes(arr.as_mut_ptr(), 1, 2);
write_bytes(arr.as_mut_ptr(), 1u8, 2);
}
arr
};

assert!(TEST2[0] == 16843009);
assert!(TEST2[1] == 16843009);
assert!(TEST2[2] == 3);

const TEST3: [MaybeUninit<u32>; 2] = {
let mut arr: [MaybeUninit<u32>; 2] = [MaybeUninit::uninit(), MaybeUninit::new(1)];
unsafe {
write_bytes(arr.as_mut_ptr(), MaybeUninit::<u8>::uninit(), 2);
}
arr
};

// can't do much with uninitialized values, just make sure it compiles
black_box(TEST3);
}

#[test]
Expand Down
28 changes: 28 additions & 0 deletions tests/codegen-llvm/lib-optimizations/slice_fill.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//@ compile-flags: -Copt-level=3
#![crate_type = "lib"]

use std::mem::MaybeUninit;

// CHECK-LABEL: @slice_fill_pass_undef
#[no_mangle]
pub fn slice_fill_pass_undef(s: &mut [MaybeUninit<u8>], v: MaybeUninit<u8>) {
// CHECK: tail call void @llvm.memset.{{.*}}(ptr nonnull align 1 %s.0, i8 %v, {{.*}} %s.1, i1 false)
// CHECK: ret
s.fill(v);
}

// CHECK-LABEL: @slice_fill_uninit
#[no_mangle]
pub fn slice_fill_uninit(s: &mut [MaybeUninit<u8>]) {
// CHECK-NOT: call
// CHECK: ret void
s.fill(MaybeUninit::uninit());
}

// CHECK-LABEL: @slice_wide_memset
#[no_mangle]
pub fn slice_wide_memset(s: &mut [u16]) {
// CHECK: tail call void @llvm.memset.{{.*}}(ptr nonnull align 2 %s.0, i8 -1
// CHECK: ret
s.fill(0xFFFF);
}
Loading