Skip to content
This repository was archived by the owner on Jun 10, 2024. It is now read-only.

Commit 583c836

Browse files
committed
fix: invalid alignment of tuple pointers
The issue here presented as tuples printing off the end of their elements array by some large arbitrary amount. This was happening because the memory we allocated for a tuple included space for the pointer metadata (the size) we need to restore fat pointers for tuples in Rust, so we would offset the base pointer of the allocation by `mem::size_of::<usize>()` bytes and return _that_ as the pointer to the tuple, and in cases where we were decoding the tuple, we'd subtract the offset to read the metadata and then use the metadata to restore a fat pointer. The problem here, is that by offsetting the pointer we allocated, we were changing the alignment of the pointer, and the new alignment was not 16 bytes, but 8, and therefore when encoding as an OpaqueTerm, the low 4 bits of the pointer were being masked off, effectively encoding a pointer to the base of the allocation, rather than to the start of the tuple elements. Then, when any mutation of a tuple involving the first element of the tuple occurred, it would overwrite the capacity metadata with an OpaqueTerm value, then any subsequent traversal of the tuple elements would think it was walking a tuple of like 9 billion elements. The solution here is simple, we have no good reason to offset the pointer we're encoding as a term, so instead we change the definition of Tuple to make it clear that the base of the struct includes the capacity metadata, and update all code that cares about the layout of the Tuple struct to be aware of this new layout. The pointers are properly aligned, and we don't have to worry about accidentally overwriting the metadata any longer. NOTE: This problem isn't present with Closure or BinaryData, which are also unsized types, because they already made their metadata field explicit in the struct definition. This fixes the issue with the global_dynamic_call test.
1 parent 5ed075e commit 583c836

File tree

9 files changed

+100
-85
lines changed

9 files changed

+100
-85
lines changed

compiler/mlir/c_src/lib/CIR/ConvertCIRToLLVMPass.cpp

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,11 @@ class CIRTypeConverter : public LLVMTypeConverter {
201201
// This layout matches what our runtime produces/expects, and we rely on this
202202
// to optimize operations on them
203203
Type getTupleType(unsigned arity) {
204+
MLIRContext *context = &getContext();
205+
auto isizeTy = getIsizeType();
204206
auto termTy = getTermType();
205-
return LLVM::LLVMArrayType::get(termTy, arity);
207+
auto elemsTy = LLVM::LLVMArrayType::get(termTy, arity);
208+
return LLVM::LLVMStructType::getLiteral(context, {isizeTy, elemsTy});
206209
}
207210

208211
// This layout is intentionally incomplete, as we don't control the layout of
@@ -293,8 +296,8 @@ class CIRTypeConverter : public LLVMTypeConverter {
293296
if (gcBoxTy.isInitialized())
294297
return gcBoxTy;
295298

296-
auto metadataTy = LLVM::LLVMStructType::getIdentified(
297-
context, "firefly_alloc::Metadata");
299+
auto metadataTy =
300+
LLVM::LLVMStructType::getIdentified(context, "firefly_alloc::Metadata");
298301
auto typeIdTy = getIsizeType();
299302
auto ptrMetadataTy = getIsizeType();
300303
assert(succeeded(metadataTy.setBody({typeIdTy, ptrMetadataTy},
@@ -762,9 +765,9 @@ class CIRConversionPattern : public ConversionPattern {
762765
"__firefly_bigint_from_digits", calleeType);
763766
}
764767

765-
Operation *call = builder.create<LLVM::CallOp>(loc, TypeRange({termTy}),
766-
"__firefly_bigint_from_digits",
767-
ValueRange({fatPtr}));
768+
Operation *call = builder.create<LLVM::CallOp>(
769+
loc, TypeRange({termTy}), "__firefly_bigint_from_digits",
770+
ValueRange({fatPtr}));
768771
return call->getResult(0);
769772
}
770773

@@ -1744,8 +1747,8 @@ struct TypeOfOpLowering : public ConvertCIROpToLLVMPattern<cir::TypeOfOp> {
17441747
if (!callee) {
17451748
auto calleeType =
17461749
LLVM::LLVMFunctionType::get(i32Ty, ArrayRef<Type>{termTy});
1747-
insertFunctionDeclaration(rewriter, loc, module, "__firefly_builtin_typeof",
1748-
calleeType);
1750+
insertFunctionDeclaration(rewriter, loc, module,
1751+
"__firefly_builtin_typeof", calleeType);
17491752
}
17501753

17511754
rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, TypeRange({i32Ty}),
@@ -1913,11 +1916,12 @@ struct IsTaggedTupleOpLowering
19131916
Value tuplePtr = decodePtr(builder, l, tupleTy, input);
19141917
SmallVector<Value> indices;
19151918
// This first index refers to the base of the tuple struct
1916-
// This second index refers to the first element of the tuple data
1919+
// This second index refers to the base of the elements array
1920+
// This third index refers to the first element of the array
19171921
Value zero = createIsizeConstant(builder, l, 0);
19181922
Value first = createIndexAttrConstant(builder, l, i32Ty, 0);
19191923
Value elemPtr = builder.create<LLVM::GEPOp>(
1920-
l, termPtrTy, tuplePtr, ValueRange({zero, first}));
1924+
l, termPtrTy, tuplePtr, ValueRange({zero, first, first}));
19211925
Value elem = builder.create<LLVM::LoadOp>(l, elemPtr);
19221926
Value expectedAtom = createAtom(builder, l, atom.getName(), module);
19231927
Value isEq = builder.create<LLVM::ICmpOp>(l, LLVM::ICmpPredicate::eq,
@@ -2185,8 +2189,8 @@ struct MallocOpLowering : public ConvertCIROpToLLVMPattern<cir::MallocOp> {
21852189
if (!callee) {
21862190
auto calleeType =
21872191
LLVM::LLVMFunctionType::get(i8PtrTy, ArrayRef<Type>{i32Ty, isizeTy});
2188-
insertFunctionDeclaration(rewriter, loc, module, "__firefly_builtin_malloc",
2189-
calleeType);
2192+
insertFunctionDeclaration(rewriter, loc, module,
2193+
"__firefly_builtin_malloc", calleeType);
21902194
}
21912195

21922196
Type outType;
@@ -2521,10 +2525,11 @@ struct GetElementOpLowering
25212525
Value ptr = decodePtr(rewriter, loc, tupleTy, tuple);
25222526
// Then, calculate the pointer to the <index>th element
25232527
Value base = createI32Constant(rewriter, loc, 0);
2528+
Value one = createI32Constant(rewriter, loc, 1);
25242529
Value index =
25252530
createI32Constant(rewriter, loc, adaptor.index().getLimitedValue());
2526-
Value elemPtr = rewriter.create<LLVM::GEPOp>(loc, termPtrTy, ptr,
2527-
ValueRange({base, index}));
2531+
Value elemPtr = rewriter.create<LLVM::GEPOp>(
2532+
loc, termPtrTy, ptr, ValueRange({base, one, index}));
25282533
// Then return the result of loading the value from the calculated pointer
25292534
rewriter.replaceOpWithNewOp<LLVM::LoadOp>(op, elemPtr);
25302535
return success();
@@ -2567,8 +2572,8 @@ struct SetElementOpLowering
25672572
auto tuplePtrTy = LLVM::LLVMPointerType::get(tupleTy);
25682573
auto calleeType = LLVM::LLVMFunctionType::get(
25692574
termTy, ArrayRef<Type>{tuplePtrTy, isizeTy, termTy});
2570-
insertFunctionDeclaration(rewriter, loc, module, "__firefly_set_element",
2571-
calleeType);
2575+
insertFunctionDeclaration(rewriter, loc, module,
2576+
"__firefly_set_element", calleeType);
25722577
}
25732578
rewriter.replaceOpWithNewOp<LLVM::CallOp>(
25742579
op, TypeRange({termTy}), "__firefly_set_element",
@@ -2579,10 +2584,11 @@ struct SetElementOpLowering
25792584
// If we reach here, we are allowed to mutate the original tuple in-place;
25802585
// Then, calculate the pointer to the <index>th element
25812586
Value base = createI32Constant(rewriter, loc, 0);
2587+
Value one = createI32Constant(rewriter, loc, 1);
25822588
Value index32 =
25832589
createI32Constant(rewriter, loc, adaptor.index().getLimitedValue());
2584-
Value elemPtr = rewriter.create<LLVM::GEPOp>(loc, tuplePtrTy, ptr,
2585-
ValueRange({base, index32}));
2590+
Value elemPtr = rewriter.create<LLVM::GEPOp>(
2591+
loc, tuplePtrTy, ptr, ValueRange({base, one, index32}));
25862592
// Then store the input value at the calculated pointer
25872593
rewriter.create<LLVM::StoreOp>(loc, value, elemPtr);
25882594
rewriter.replaceOp(op, ValueRange({tuple}));
@@ -2967,8 +2973,8 @@ struct BinaryMatchStartOpLowering
29672973
if (!callee) {
29682974
auto calleeType =
29692975
LLVM::LLVMFunctionType::get(resultTy, ArrayRef<Type>{termTy});
2970-
insertFunctionDeclaration(rewriter, loc, module, "__firefly_bs_match_start",
2971-
calleeType);
2976+
insertFunctionDeclaration(rewriter, loc, module,
2977+
"__firefly_bs_match_start", calleeType);
29722978
}
29732979
auto callOp = rewriter.create<LLVM::CallOp>(loc, TypeRange({resultTy}),
29742980
"__firefly_bs_match_start",

library/alloc/src/gc/boxed/gcbox.rs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ where
1616
T: ?Sized + 'static,
1717
PtrMetadata: From<<T as Pointee>::Metadata> + TryInto<<T as Pointee>::Metadata>,
1818
{
19-
ptr: NonNull<u8>,
19+
ptr: NonNull<()>,
2020
_marker: PhantomData<T>,
2121
}
2222

@@ -56,7 +56,7 @@ where
5656
let meta = Metadata::new::<T>(&value);
5757
let value_layout = Layout::for_value(&value);
5858
let (layout, value_offset) = Layout::new::<Metadata>().extend(value_layout).unwrap();
59-
let ptr: NonNull<u8> = alloc.allocate(layout)?.cast();
59+
let ptr: NonNull<()> = alloc.allocate(layout)?.cast();
6060
unsafe {
6161
let ptr = NonNull::new_unchecked(ptr.as_ptr().add(value_offset));
6262
let boxed = Self {
@@ -72,7 +72,7 @@ where
7272
pub fn new_uninit_in<A: Allocator>(alloc: A) -> Result<GcBox<MaybeUninit<T>>, AllocError> {
7373
let value_layout = Layout::new::<T>();
7474
let (layout, value_offset) = Layout::new::<Metadata>().extend(value_layout).unwrap();
75-
let ptr: NonNull<u8> = alloc.allocate(layout)?.cast();
75+
let ptr: NonNull<()> = alloc.allocate(layout)?.cast();
7676
unsafe {
7777
let ptr = NonNull::new_unchecked(ptr.as_ptr().add(value_offset));
7878
let meta = Metadata::new::<T>(ptr.as_ptr() as *mut T);
@@ -103,7 +103,7 @@ where
103103
let meta = Metadata::new::<T>(unsized_);
104104
let value_layout = Layout::for_value(&value);
105105
let (layout, value_offset) = Layout::new::<Metadata>().extend(value_layout).unwrap();
106-
let ptr: NonNull<u8> = alloc.allocate(layout)?.cast();
106+
let ptr: NonNull<()> = alloc.allocate(layout)?.cast();
107107
unsafe {
108108
let ptr = NonNull::new_unchecked(ptr.as_ptr().add(value_offset));
109109
let boxed = Self {
@@ -136,7 +136,7 @@ where
136136
};
137137
// Construct a new box
138138
let boxed = GcBox {
139-
ptr: unsafe { NonNull::new_unchecked(ptr as *mut u8) },
139+
ptr: unsafe { NonNull::new_unchecked(ptr as *mut ()) },
140140
_marker: PhantomData,
141141
};
142142
// Write the new metadata to the header for our new box
@@ -157,7 +157,7 @@ where
157157
/// use a combination of this function, a jump table of type ids, and subsequent unchecked casts.
158158
/// This allows for efficient pointer casts while ensuring we don't improperly cast a pointer to
159159
/// the wrong type.
160-
pub unsafe fn type_id(raw: *mut u8) -> TypeId {
160+
pub unsafe fn type_id(raw: *mut ()) -> TypeId {
161161
debug_assert!(!raw.is_null());
162162
let header = &*header(raw);
163163
header.ty
@@ -171,7 +171,7 @@ where
171171
///
172172
/// NOTE: This function is a bit safer than `from_raw` in that it won't panic if the pointee
173173
/// is not of the correct type, instead it returns `Err`.
174-
pub unsafe fn try_from_raw(raw: *mut u8) -> Result<Self, ()> {
174+
pub unsafe fn try_from_raw(raw: *mut ()) -> Result<Self, ()> {
175175
debug_assert!(!raw.is_null());
176176
let meta = &*header(raw);
177177
if meta.is::<T>() {
@@ -215,7 +215,7 @@ where
215215
/// but ensure that the pointee is the correct type.
216216
///
217217
/// NOTE: Seriously, don't use this.
218-
pub unsafe fn from_raw_unchecked(raw: *mut u8) -> Self {
218+
pub unsafe fn from_raw_unchecked(raw: *mut ()) -> Self {
219219
debug_assert!(!raw.is_null());
220220
Self {
221221
ptr: NonNull::new_unchecked(raw),
@@ -229,7 +229,7 @@ where
229229
}
230230

231231
#[inline]
232-
pub fn as_ptr(boxed: &Self) -> *mut u8 {
232+
pub fn as_ptr(boxed: &Self) -> *mut () {
233233
boxed.ptr.as_ptr()
234234
}
235235

@@ -242,7 +242,7 @@ where
242242
ptr::drop_in_place(value);
243243
}
244244
let ptr = NonNull::new_unchecked(boxed.ptr.as_ptr().sub(value_offset));
245-
alloc.deallocate(ptr, layout);
245+
alloc.deallocate(ptr.cast(), layout);
246246
}
247247

248248
#[inline]
@@ -261,7 +261,7 @@ impl<T: ?Sized + 'static + Pointee<Metadata = usize>> GcBox<T> {
261261
{
262262
let raw = Self::into_raw(self) as *mut U;
263263
GcBox {
264-
ptr: unsafe { NonNull::new_unchecked(raw as *mut u8) },
264+
ptr: unsafe { NonNull::new_unchecked(raw as *mut ()) },
265265
_marker: PhantomData,
266266
}
267267
}
@@ -279,7 +279,7 @@ impl GcBox<dyn Any> {
279279
let Some(concrete) = any.downcast_ref::<T>() else { return None; };
280280
let meta = Metadata::new::<T>(concrete);
281281
let boxed = GcBox {
282-
ptr: unsafe { NonNull::new_unchecked(concrete as *const _ as *mut u8) },
282+
ptr: unsafe { NonNull::new_unchecked(concrete as *const _ as *mut ()) },
283283
_marker: PhantomData,
284284
};
285285
let header = header(boxed.ptr.as_ptr());
@@ -302,7 +302,7 @@ impl GcBox<dyn Any + Send> {
302302
let Some(concrete) = any.downcast_ref::<T>() else { return None; };
303303
let meta = Metadata::new::<T>(concrete);
304304
let boxed = GcBox {
305-
ptr: unsafe { NonNull::new_unchecked(concrete as *const _ as *mut u8) },
305+
ptr: unsafe { NonNull::new_unchecked(concrete as *const _ as *mut ()) },
306306
_marker: PhantomData,
307307
};
308308
let header = header(boxed.ptr.as_ptr());
@@ -325,7 +325,7 @@ impl GcBox<dyn Any + Send + Sync> {
325325
let Some(concrete) = any.downcast_ref::<T>() else { return None; };
326326
let meta = Metadata::new::<T>(concrete);
327327
let boxed = GcBox {
328-
ptr: unsafe { NonNull::new_unchecked(concrete as *const _ as *mut u8) },
328+
ptr: unsafe { NonNull::new_unchecked(concrete as *const _ as *mut ()) },
329329
_marker: PhantomData,
330330
};
331331
let header = header(boxed.ptr.as_ptr());
@@ -349,7 +349,7 @@ where
349349
let meta = Metadata::new::<T>(empty);
350350
let value_layout = unsafe { Layout::for_value_raw(empty) };
351351
let (layout, value_offset) = Layout::new::<Metadata>().extend(value_layout).unwrap();
352-
let ptr: NonNull<u8> = alloc.allocate(layout)?.cast();
352+
let ptr: NonNull<()> = alloc.allocate(layout)?.cast();
353353
unsafe {
354354
let ptr = NonNull::new_unchecked(ptr.as_ptr().add(value_offset));
355355
let boxed = Self {
@@ -697,8 +697,8 @@ where
697697
}
698698
}
699699

700-
fn header(ptr: *mut u8) -> *mut Metadata {
701-
unsafe { ptr.sub(mem::size_of::<Metadata>()).cast() }
700+
fn header(ptr: *mut ()) -> *mut Metadata {
701+
unsafe { ptr.byte_sub(mem::size_of::<Metadata>()).cast() }
702702
}
703703

704704
/// This is a marker type used to indicate that the data pointed to by a GcBox has been

library/alloc/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#![feature(slice_ptr_len)]
77
// Used for access to pointer metadata
88
#![feature(ptr_metadata)]
9+
#![feature(pointer_byte_offsets)]
910
// Used for access to size/alignment information for raw pointers
1011
#![feature(layout_for_ptr)]
1112
// Used for the Unsize marker trait

0 commit comments

Comments
 (0)