diff --git a/third_party/move/move-vm/runtime/Cargo.toml b/third_party/move/move-vm/runtime/Cargo.toml index dec7e3b86e9c6..cc96891f9905e 100644 --- a/third_party/move/move-vm/runtime/Cargo.toml +++ b/third_party/move/move-vm/runtime/Cargo.toml @@ -46,16 +46,19 @@ smallbitvec = { workspace = true } test-case = { workspace = true } [features] -default = [] +default = [ + "inline-callstack", +] +testing = [] fuzzing = ["move-vm-types/fuzzing"] failpoints = ["fail/failpoints"] # Enable tracing and debugging also for release builds. By default, it is only enabled for debug builds. debugging = [] -testing = [] stacktrace = [] # Enable this features for tests that check bytecode verification. disable_verifier_cache = [] force-inline = [] +inline-callstack = [] [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] } diff --git a/third_party/move/move-vm/runtime/src/access_control.rs b/third_party/move/move-vm/runtime/src/access_control.rs index 4b147df69254d..4c8b330d76391 100644 --- a/third_party/move/move-vm/runtime/src/access_control.rs +++ b/third_party/move/move-vm/runtime/src/access_control.rs @@ -20,6 +20,8 @@ pub struct AccessControlState { impl AccessControlState { /// Enters a function, applying its access specifier to the state. + // note(inline): do not inline, they are called once per function, and increase `execute_main` + // quite a bit, we want to avoid those compile times #[cfg_attr(feature = "force-inline", inline(always))] pub(crate) fn enter_function( &mut self, @@ -47,6 +49,8 @@ impl AccessControlState { } /// Exit function, restoring access state before entering. + // note(inline): do not inline, they are called once per function, and increase `execute_main` + // quite a bit, we want to avoid those compile times #[cfg_attr(feature = "force-inline", inline(always))] pub(crate) fn exit_function(&mut self, fun: &LoadedFunction) -> PartialVMResult<()> { if !matches!(fun.access_specifier(), AccessSpecifier::Any) { diff --git a/third_party/move/move-vm/runtime/src/frame_type_cache.rs b/third_party/move/move-vm/runtime/src/frame_type_cache.rs index a8a84a6813c97..19202cac076c6 100644 --- a/third_party/move/move-vm/runtime/src/frame_type_cache.rs +++ b/third_party/move/move-vm/runtime/src/frame_type_cache.rs @@ -85,7 +85,11 @@ pub(crate) struct FrameTypeCache { } impl FrameTypeCache { - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): + // needs to always be inlined, closure will be optimized out in this case. When it gets inlined, + // LLVM also inlines `BTreeMap::entry()` with it to optimize, so the final instruction count is pretty big - + // do not inline the dependent functions. + #[inline(always)] fn get_or( map: &mut BTreeMap, idx: K, @@ -103,7 +107,7 @@ impl FrameTypeCache { } } - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): do not inline, increases size a lot, might even decrease the performance pub(crate) fn get_field_type_and_struct_type( &mut self, idx: FieldInstantiationIndex, @@ -120,6 +124,7 @@ impl FrameTypeCache { Ok(((field_ty, *field_ty_count), (struct_ty, *struct_ty_count))) } + // note(inline): do not inline, increases size a lot, might even decrease the performance pub(crate) fn get_variant_field_type_and_struct_type( &mut self, idx: VariantFieldInstantiationIndex, @@ -141,7 +146,7 @@ impl FrameTypeCache { Ok(((field_ty, *field_ty_count), (struct_ty, *struct_ty_count))) } - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): do not inline, increases size a lot, might even decrease the performance pub(crate) fn get_struct_type( &mut self, idx: StructDefInstantiationIndex, @@ -155,7 +160,7 @@ impl FrameTypeCache { Ok((ty, *ty_count)) } - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): do not inline, increases size a lot, might even decrease the performance pub(crate) fn get_struct_variant_type( &mut self, idx: StructVariantInstantiationIndex, @@ -174,7 +179,7 @@ impl FrameTypeCache { Ok((ty, *ty_count)) } - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): do not inline, increases size a lot, might even decrease the performance pub(crate) fn get_struct_fields_types( &mut self, idx: StructDefInstantiationIndex, @@ -196,7 +201,7 @@ impl FrameTypeCache { )?) } - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): do not inline, increases size a lot, might even decrease the performance pub(crate) fn get_struct_variant_fields_types( &mut self, idx: StructVariantInstantiationIndex, @@ -218,7 +223,7 @@ impl FrameTypeCache { )?) } - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): do not inline, increases size a lot, might even decrease the performance pub(crate) fn get_signature_index_type( &mut self, idx: SignatureIndex, diff --git a/third_party/move/move-vm/runtime/src/interpreter.rs b/third_party/move/move-vm/runtime/src/interpreter.rs index dae3242cbad88..54bf2e5942530 100644 --- a/third_party/move/move-vm/runtime/src/interpreter.rs +++ b/third_party/move/move-vm/runtime/src/interpreter.rs @@ -900,7 +900,8 @@ where /// /// Native functions do not push a frame at the moment and as such errors from a native /// function are incorrectly attributed to the caller. - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): single usage + #[inline(always)] fn make_call_frame< RTTCheck: RuntimeTypeCheck, RTRCheck: RuntimeRefCheck, @@ -1789,7 +1790,7 @@ impl Stack { /// Push a `Value` on the stack if the max stack size has not been reached. Abort execution /// otherwise. - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): increases function size 25%, DOES NOT improve performance, do not inline. fn push(&mut self, value: Value) -> PartialVMResult<()> { if self.value.len() < OPERAND_STACK_SIZE_LIMIT { self.value.push(value); @@ -1800,7 +1801,7 @@ impl Stack { } /// Pop a `Value` off the stack or abort execution if the stack is empty. - #[cfg_attr(feature = "force-inline", inline(always))] + #[inline] fn pop(&mut self) -> PartialVMResult { self.value .pop() @@ -1809,8 +1810,8 @@ impl Stack { /// Pop a `Value` of a given type off the stack. Abort if the value is not of the given /// type or if the stack is empty. - // Note(inline): expensive to inline, adds a few seconds of compile time - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): do not inline this, it bloats interpreter loop 20% and does not adds enough perf to justify, + // instead we're inlining `value_as()` and all VM casts. fn pop_as(&mut self) -> PartialVMResult where Value: VMValueCast, @@ -1819,7 +1820,6 @@ impl Stack { } /// Pop n values off the stack. - #[cfg_attr(feature = "force-inline", inline(always))] fn popn(&mut self, n: u16) -> PartialVMResult> { let remaining_stack_size = self .value @@ -1830,7 +1830,6 @@ impl Stack { Ok(args) } - #[cfg_attr(feature = "force-inline", inline(always))] fn last_n(&self, n: usize) -> PartialVMResult + Clone> { if self.value.len() < n { return Err( @@ -1845,7 +1844,7 @@ impl Stack { /// Push a type on the stack if the max stack size has not been reached. Abort execution /// otherwise. - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): bloats runtime_type_checks pub(crate) fn push_ty(&mut self, ty: Type) -> PartialVMResult<()> { if self.types.len() < OPERAND_STACK_SIZE_LIMIT { self.types.push(ty); @@ -1856,7 +1855,7 @@ impl Stack { } /// Pop a type off the stack or abort execution if the stack is empty. - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): bloats runtime_type_checks pub(crate) fn pop_ty(&mut self) -> PartialVMResult { self.types.pop().ok_or_else(|| { PartialVMError::new(StatusCode::EMPTY_VALUE_STACK) @@ -1864,7 +1863,7 @@ impl Stack { }) } - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): bloats runtime_type_checks pub(crate) fn top_ty(&mut self) -> PartialVMResult<&Type> { self.types.last().ok_or_else(|| { PartialVMError::new(StatusCode::EMPTY_VALUE_STACK) @@ -1873,7 +1872,7 @@ impl Stack { } /// Pop n types off the stack. - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): bloats runtime_type_checks pub(crate) fn popn_tys(&mut self, n: u16) -> PartialVMResult> { let remaining_stack_size = self.types.len().checked_sub(n as usize).ok_or_else(|| { PartialVMError::new(StatusCode::EMPTY_VALUE_STACK) @@ -1930,7 +1929,7 @@ impl CallStack { } /// Push a `Frame` on the call stack. - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-callstack", inline(always))] fn push(&mut self, frame: Frame) -> Result<(), Frame> { if self.0.len() < CALL_STACK_SIZE_LIMIT { self.0.push(frame); @@ -1941,7 +1940,7 @@ impl CallStack { } /// Pop a `Frame` off the call stack. - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-callstack", inline(always))] fn pop(&mut self) -> Option { self.0.pop() } diff --git a/third_party/move/move-vm/runtime/src/reentrancy_checker.rs b/third_party/move/move-vm/runtime/src/reentrancy_checker.rs index b6e2d266bd8d8..9434699e32dd2 100644 --- a/third_party/move/move-vm/runtime/src/reentrancy_checker.rs +++ b/third_party/move/move-vm/runtime/src/reentrancy_checker.rs @@ -60,7 +60,8 @@ impl CallType { } impl ReentrancyChecker { - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): as `call_type` is sometimes a fixed value, this inline is very valuable + #[inline(always)] pub fn enter_function( &mut self, caller_module: Option<&ModuleId>, @@ -108,7 +109,7 @@ impl ReentrancyChecker { Ok(()) } - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): bloats code, not a hot path pub fn exit_function( &mut self, caller_module: &ModuleId, diff --git a/third_party/move/move-vm/runtime/src/runtime_type_checks.rs b/third_party/move/move-vm/runtime/src/runtime_type_checks.rs index b379c5b0ac605..9c48ff717eee5 100644 --- a/third_party/move/move-vm/runtime/src/runtime_type_checks.rs +++ b/third_party/move/move-vm/runtime/src/runtime_type_checks.rs @@ -108,6 +108,7 @@ pub(crate) trait RuntimeTypeCheck { ) -> PartialVMResult<()>; } +// note(inline): improves perf a little bit, but increases `post_execution_type_stack_transition` by 20% #[cfg_attr(feature = "force-inline", inline(always))] fn verify_pack<'a>( operand_stack: &mut Stack, @@ -274,7 +275,8 @@ impl RuntimeTypeCheck for NoRuntimeTypeCheck { impl RuntimeTypeCheck for FullRuntimeTypeCheck { /// Note that most of the checks should happen after instruction execution, because gas charging will happen during /// instruction execution and we want to avoid running code without charging proper gas as much as possible. - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): it should not be inlined, function calling overhead + // is not big enough to justify the increase in function size fn pre_execution_type_stack_transition( frame: &Frame, operand_stack: &mut Stack, @@ -419,7 +421,8 @@ impl RuntimeTypeCheck for FullRuntimeTypeCheck { /// This function and `pre_execution_type_stack_transition` should /// constitute the full type stack transition for the paranoid /// mode. - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): it should not be inlined, function calling overhead + // is not big enough to justify the increase in function size fn post_execution_type_stack_transition( frame: &Frame, operand_stack: &mut Stack, diff --git a/third_party/move/move-vm/types/Cargo.toml b/third_party/move/move-vm/types/Cargo.toml index f20ddd1674c66..0267be686c57c 100644 --- a/third_party/move/move-vm/types/Cargo.toml +++ b/third_party/move/move-vm/types/Cargo.toml @@ -37,7 +37,12 @@ proptest = { workspace = true } rand = { workspace = true } [features] -default = [] +default = [ + "inline-vm-casts", + "inline-locals" +] testing = [] fuzzing = ["proptest", "move-binary-format/fuzzing"] force-inline = [] +inline-vm-casts = [] +inline-locals = [] diff --git a/third_party/move/move-vm/types/src/values/values_impl.rs b/third_party/move/move-vm/types/src/values/values_impl.rs index d34ea58f2e7fc..2ab497c7c94cb 100644 --- a/third_party/move/move-vm/types/src/values/values_impl.rs +++ b/third_party/move/move-vm/types/src/values/values_impl.rs @@ -473,7 +473,7 @@ impl Value { impl Value { // Note(inline): recursive function, but `#[cfg_attr(feature = "force-inline", inline(always))]` seems to improve perf slightly // and doesn't add much compile time. - #[cfg_attr(feature = "force-inline", inline(always))] + #[inline(always)] fn copy_value(&self, depth: u64, max_depth: Option) -> PartialVMResult { use Value::*; @@ -991,7 +991,7 @@ impl ContainerRef { } impl IndexedRef { - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): do not inline, too big fn equals(&self, other: &Self, depth: u64, max_depth: Option) -> PartialVMResult { use Container::*; @@ -1326,7 +1326,6 @@ impl IndexedRef { **************************************************************************************/ impl ContainerRef { - #[cfg_attr(feature = "force-inline", inline(always))] fn read_ref(self, depth: u64, max_depth: Option) -> PartialVMResult { Ok(Value::Container( self.container().copy_value(depth, max_depth)?, @@ -2112,7 +2111,7 @@ impl Locals { ))) } - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-locals", inline(always))] pub fn copy_loc(&self, idx: usize) -> PartialVMResult { self.copy_loc_impl(idx, Some(DEFAULT_MAX_VM_VALUE_NESTED_DEPTH)) } @@ -2123,7 +2122,7 @@ impl Locals { self.copy_loc_impl(idx, Some(max_depth)) } - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-locals", inline(always))] fn copy_loc_impl(&self, idx: usize, max_depth: Option) -> PartialVMResult { let v = self.0.borrow(); match v.get(idx) { @@ -2140,7 +2139,7 @@ impl Locals { } } - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-locals", inline(always))] fn swap_loc(&mut self, idx: usize, x: Value) -> PartialVMResult { let mut v = self.0.borrow_mut(); match v.get_mut(idx) { @@ -2153,7 +2152,7 @@ impl Locals { } } - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-locals", inline(always))] pub fn move_loc(&mut self, idx: usize) -> PartialVMResult { match self.swap_loc(idx, Value::Invalid)? { Value::Invalid => Err(PartialVMError::new( @@ -2164,7 +2163,7 @@ impl Locals { } } - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-locals", inline(always))] pub fn store_loc(&mut self, idx: usize, x: Value) -> PartialVMResult<()> { self.swap_loc(idx, x)?; Ok(()) @@ -2172,7 +2171,7 @@ impl Locals { /// Drop all Move values onto a different Vec to avoid leaking memory. /// References are excluded since they may point to invalid data. - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-locals", inline(always))] pub fn drop_all_values(&mut self) -> impl Iterator + use<> { let mut locals = self.0.borrow_mut(); let mut res = vec![]; @@ -2190,7 +2189,7 @@ impl Locals { res.into_iter() } - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-locals", inline(always))] pub fn is_invalid(&self, idx: usize) -> PartialVMResult { let v = self.0.borrow(); match v.get(idx) { @@ -2443,7 +2442,7 @@ pub trait VMValueCast { macro_rules! impl_vm_value_cast { ($ty:ty, $tc:ident) => { impl VMValueCast<$ty> for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult<$ty> { match self { Value::$tc(x) => Ok(x), @@ -2473,7 +2472,7 @@ impl_vm_value_cast!(ContainerRef, ContainerRef); impl_vm_value_cast!(IndexedRef, IndexedRef); impl VMValueCast for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult { match self { Value::DelayedFieldID { id } => Ok(id), @@ -2488,7 +2487,7 @@ impl VMValueCast for Value { } impl VMValueCast for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult { match self { Value::ContainerRef(r) => Ok(Reference(ReferenceImpl::ContainerRef(r))), @@ -2500,7 +2499,7 @@ impl VMValueCast for Value { } impl VMValueCast for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult { match self { Value::Container(c) => Ok(c), @@ -2511,7 +2510,7 @@ impl VMValueCast for Value { } impl VMValueCast for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult { match self { Value::Container(Container::Struct(r)) => Ok(Struct { @@ -2524,14 +2523,14 @@ impl VMValueCast for Value { } impl VMValueCast for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult { Ok(StructRef(VMValueCast::cast(self)?)) } } impl VMValueCast> for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult> { match self { Value::Container(Container::VecU8(r)) => take_unique_ownership(r), @@ -2542,7 +2541,7 @@ impl VMValueCast> for Value { } impl VMValueCast> for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult> { match self { Value::Container(Container::VecU64(r)) => take_unique_ownership(r), @@ -2553,7 +2552,7 @@ impl VMValueCast> for Value { } impl VMValueCast> for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult> { match self { Value::Container(Container::Vec(c)) => { @@ -2587,7 +2586,7 @@ impl VMValueCast> for Value { } impl VMValueCast for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult { match self { Value::ContainerRef(r) => Ok(SignerRef(r)), @@ -2598,7 +2597,7 @@ impl VMValueCast for Value { } impl VMValueCast for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult { match self { Value::ContainerRef(r) => Ok(VectorRef(r)), @@ -2609,7 +2608,7 @@ impl VMValueCast for Value { } impl VMValueCast for Value { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-vm-casts", inline)] fn cast(self) -> PartialVMResult { match self { Value::Container(c) => Ok(Vector(c)), @@ -2620,6 +2619,7 @@ impl VMValueCast for Value { } impl Value { + #[cfg_attr(feature = "inline-vm-casts", inline)] pub fn value_as(self) -> PartialVMResult where Self: VMValueCast, @@ -3433,7 +3433,7 @@ fn check_elem_layout(ty: &Type, v: &Container) -> PartialVMResult<()> { } impl VectorRef { - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): too big and too cold to inline pub fn length_as_usize(&self) -> PartialVMResult { let c: &Container = self.0.container(); @@ -3458,12 +3458,12 @@ impl VectorRef { Ok(len) } - #[cfg_attr(feature = "force-inline", inline(always))] + #[inline] pub fn len(&self) -> PartialVMResult { Ok(Value::u64(self.length_as_usize()? as u64)) } - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): too big and too cold to inline pub fn push_back(&self, e: Value) -> PartialVMResult<()> { let c = self.0.container(); @@ -3509,7 +3509,7 @@ impl VectorRef { } } - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): too big and too cold to inline pub fn pop(&self) -> PartialVMResult { let c = self.0.container(); @@ -3588,7 +3588,6 @@ impl VectorRef { Ok(res) } - #[cfg_attr(feature = "force-inline", inline(always))] pub fn swap(&self, idx1: usize, idx2: usize) -> PartialVMResult<()> { let c = self.0.container(); @@ -3706,7 +3705,7 @@ impl VectorRef { } impl Vector { - #[cfg_attr(feature = "force-inline", inline(always))] + // note(inline): LLVM won't inline it, even with #[inline(always)], and shouldn't, we don't want to bloat execute_code_impl pub fn pack(type_param: &Type, elements: Vec) -> PartialVMResult { let container = match type_param { Type::U8 => Value::vector_u8( @@ -3813,11 +3812,6 @@ impl Vector { Ok(container) } - pub fn empty(type_param: &Type) -> PartialVMResult { - Self::pack(type_param, vec![]) - } - - #[cfg_attr(feature = "force-inline", inline(always))] pub fn unpack_unchecked(self) -> PartialVMResult> { let elements: Vec<_> = match self.0 { Container::VecU8(r) => take_unique_ownership(r)? @@ -3886,7 +3880,6 @@ impl Vector { Ok(elements) } - #[cfg_attr(feature = "force-inline", inline(always))] pub fn unpack(self, expected_num: u64) -> PartialVMResult> { let elements = self.unpack_unchecked()?; if expected_num as usize == elements.len() { @@ -3897,11 +3890,6 @@ impl Vector { } } - pub fn destroy_empty(self) -> PartialVMResult<()> { - self.unpack(0)?; - Ok(()) - } - pub fn to_vec_u8(self) -> PartialVMResult> { check_elem_layout(&Type::U8, &self.0)?; if let Container::VecU8(r) = self.0 { @@ -5189,7 +5177,7 @@ impl Value { // Locals may contain reference values that points to the same cotnainer through Rc, hencing forming // a cycle. Therefore values need to be manually taken out of the Locals in order to not leak memory. impl Drop for Locals { - #[cfg_attr(feature = "force-inline", inline(always))] + #[cfg_attr(feature = "inline-locals", inline(always))] fn drop(&mut self) { _ = self.drop_all_values(); }