diff --git a/Cargo.lock b/Cargo.lock index b7303214d..58c45b04b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -850,6 +850,7 @@ dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "cranelift-entity 0.41.0", "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "minisign 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)", "num-derive 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/lucet-analyze/src/main.rs b/lucet-analyze/src/main.rs index 2c20655e8..e1bd17d3b 100644 --- a/lucet-analyze/src/main.rs +++ b/lucet-analyze/src/main.rs @@ -2,6 +2,7 @@ use lucet_module::{ FunctionSpec, Module, ModuleData, SerializedModule, TableElement, TrapManifest, TrapSite, + VersionInfo, }; use byteorder::{LittleEndian, ReadBytesExt}; @@ -102,7 +103,10 @@ impl<'a> ArtifactSummary<'a> { .unwrap(); let mut rdr = Cursor::new(buffer); + let version = VersionInfo::read_from(&mut rdr).unwrap(); + SerializedModule { + version, module_data_ptr: rdr.read_u64::().unwrap(), module_data_len: rdr.read_u64::().unwrap(), tables_ptr: rdr.read_u64::().unwrap(), @@ -211,6 +215,7 @@ fn load_module<'b, 'a: 'b>( ) }; Module { + version: serialized_module.version.clone(), module_data, tables, function_manifest, diff --git a/lucet-module/Cargo.toml b/lucet-module/Cargo.toml index 16078ab0b..41fc62748 100644 --- a/lucet-module/Cargo.toml +++ b/lucet-module/Cargo.toml @@ -20,3 +20,4 @@ num-traits = "0.2" minisign = "0.5.11" object = "0.12" byteorder = "1.3" +memoffset = "0.5.1" diff --git a/lucet-module/src/lib.rs b/lucet-module/src/lib.rs index 8be0f1797..b1259024e 100644 --- a/lucet-module/src/lib.rs +++ b/lucet-module/src/lib.rs @@ -17,6 +17,7 @@ mod signature; mod tables; mod traps; mod types; +mod version_info; pub use crate::error::Error; pub use crate::functions::{ @@ -32,6 +33,7 @@ pub use crate::signature::{ModuleSignature, PublicKey}; pub use crate::tables::TableElement; pub use crate::traps::{TrapCode, TrapManifest, TrapSite}; pub use crate::types::{Signature, ValueType}; +pub use crate::version_info::VersionInfo; /// Owned variants of the module data types, useful for serialization and testing. pub mod owned { diff --git a/lucet-module/src/module.rs b/lucet-module/src/module.rs index 793a94382..238e2bcb0 100644 --- a/lucet-module/src/module.rs +++ b/lucet-module/src/module.rs @@ -1,12 +1,14 @@ use crate::functions::FunctionSpec; use crate::module_data::ModuleData; use crate::tables::TableElement; +use crate::version_info::VersionInfo; pub const LUCET_MODULE_SYM: &str = "lucet_module"; /// Module is the exposed structure that contains all the data backing a Lucet-compiled object. #[derive(Debug)] pub struct Module<'a> { + pub version: VersionInfo, pub module_data: ModuleData<'a>, pub tables: &'a [&'a [TableElement]], pub function_manifest: &'a [FunctionSpec], @@ -18,6 +20,7 @@ pub struct Module<'a> { #[repr(C)] #[derive(Debug)] pub struct SerializedModule { + pub version: VersionInfo, pub module_data_ptr: u64, pub module_data_len: u64, pub tables_ptr: u64, diff --git a/lucet-module/src/signature.rs b/lucet-module/src/signature.rs index 5e6e52b4f..134bd7256 100644 --- a/lucet-module/src/signature.rs +++ b/lucet-module/src/signature.rs @@ -1,8 +1,9 @@ use crate::error::Error::{self, IOError, ModuleSignatureError}; -use crate::module::LUCET_MODULE_SYM; +use crate::module::{SerializedModule, LUCET_MODULE_SYM}; use crate::module_data::MODULE_DATA_SYM; use crate::ModuleData; use byteorder::{ByteOrder, LittleEndian}; +use memoffset::offset_of; pub use minisign::{PublicKey, SecretKey}; use minisign::{SignatureBones, SignatureBox}; use object::*; @@ -94,8 +95,10 @@ impl RawModuleAndData { format!("`{}` symbol not present", MODULE_DATA_SYM), ))?; - let module_data_len = - LittleEndian::read_u64(&obj_bin[(native_data_symbol_data.offset + 8)..]) as usize; + let module_data_len = LittleEndian::read_u64( + &obj_bin[(native_data_symbol_data.offset + + offset_of!(SerializedModule, module_data_len))..], + ) as usize; Ok(RawModuleAndData { obj_bin, diff --git a/lucet-module/src/version_info.rs b/lucet-module/src/version_info.rs new file mode 100644 index 000000000..2914a9d69 --- /dev/null +++ b/lucet-module/src/version_info.rs @@ -0,0 +1,100 @@ +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use std::cmp::min; +use std::fmt; +use std::io; + +/// VersionInfo is information about a Lucet module to allow the Lucet runtime to determine if or +/// how the module can be loaded, if so requested. The information here describes implementation +/// details in runtime support for `lucetc`-produced modules, and nothing higher level. +#[repr(C)] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VersionInfo { + major: u16, + minor: u16, + patch: u16, + reserved: u16, + /// `version_hash` is either all nulls or the first eight ascii characters of the git commit + /// hash of wherever this Version is coming from. In the case of a compiled lucet module, this + /// hash will come from the git commit that the lucetc producing it came from. In a runtime + /// context, it will be the git commit of lucet-runtime built into the embedder. + /// + /// The version hash will typically populated only in release builds, but may blank even in + /// that case: if building from a packagd crate, or in a build environment that does not have + /// "git" installed, `lucetc` and `lucet-runtime` will fall back to an empty hash. + version_hash: [u8; 8], +} + +impl fmt::Display for VersionInfo { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}.{}.{}", self.major, self.minor, self.patch)?; + if u64::from_ne_bytes(self.version_hash) != 0 { + write!( + fmt, + "-{}", + std::str::from_utf8(&self.version_hash).unwrap_or("INVALID") + )?; + } + Ok(()) + } +} + +impl VersionInfo { + pub fn write_to(&self, w: &mut W) -> io::Result<()> { + w.write_u16::(self.major)?; + w.write_u16::(self.minor)?; + w.write_u16::(self.patch)?; + w.write_u16::(self.reserved)?; + w.write(&self.version_hash).and_then(|written| { + if written != self.version_hash.len() { + Err(io::Error::new( + io::ErrorKind::Other, + "unable to write full version hash", + )) + } else { + Ok(()) + } + }) + } + + pub fn read_from(r: &mut R) -> io::Result { + let mut version_hash = [0u8; 8]; + Ok(VersionInfo { + major: r.read_u16::()?, + minor: r.read_u16::()?, + patch: r.read_u16::()?, + reserved: r.read_u16::()?, + version_hash: { + r.read_exact(&mut version_hash)?; + version_hash + }, + }) + } + + pub fn valid(&self) -> bool { + self.reserved == 0x8000 + } + + pub fn current(current_hash: &'static [u8]) -> Self { + let mut version_hash = [0u8; 8]; + + for i in 0..min(version_hash.len(), current_hash.len()) { + version_hash[i] = current_hash[i]; + } + + // The reasoning for this is as follows: + // `SerializedModule`, in version before version information was introduced, began with a + // pointer - `module_data_ptr`. This pointer would be relocated to somewhere in user space + // for the embedder of `lucet-runtime`. On x86_64, hopefully, that's userland code in some + // OS, meaning the pointer will be a pointer to user memory, and will be below + // 0x8000_0000_0000_0000. By setting `reserved` to `0x8000`, we set what would be the + // highest bit in `module_data_ptr` in an old `lucet-runtime` and guarantee a segmentation + // fault when loading these newer modules with version information. + VersionInfo { + major: env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(), + minor: env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(), + patch: env!("CARGO_PKG_VERSION_PATCH").parse().unwrap(), + reserved: 0x8000u16, + version_hash, + } + } +} diff --git a/lucet-runtime/lucet-runtime-internals/build.rs b/lucet-runtime/lucet-runtime-internals/build.rs index 41323408c..9c5e5418a 100644 --- a/lucet-runtime/lucet-runtime-internals/build.rs +++ b/lucet-runtime/lucet-runtime-internals/build.rs @@ -1,3 +1,7 @@ +use std::env; +use std::fs::File; +use std::path::Path; + use cc; fn main() { @@ -14,4 +18,28 @@ fn main() { cc::Build::new() .file("src/context/tests/c_child.c") .compile("context_tests_c_child"); + + let commit_file_path = Path::new(&env::var("OUT_DIR").unwrap()).join("commit_hash"); + // in debug builds we only need the file to exist, but in release builds this will be used and + // requires mutability. + #[allow(unused_variables, unused_mut)] + let mut f = File::create(&commit_file_path).unwrap(); + + // This is about the closest not-additional-feature-flag way to detect release builds. + // In debug builds, leave the `commit_hash` file empty to allow looser version checking and + // avoid impacting development workflows too much. + #[cfg(not(debug_assertions))] + { + use std::io::Write; + use std::process::Command; + + let last_commit_hash = Command::new("git") + .args(&["log", "-n", "1", "--pretty=format:%H"]) + .output() + .ok(); + + if let Some(last_commit_hash) = last_commit_hash { + f.write_all(&last_commit_hash.stdout).unwrap(); + } + } } diff --git a/lucet-runtime/lucet-runtime-internals/src/module/dl.rs b/lucet-runtime/lucet-runtime-internals/src/module/dl.rs index 350a08640..98fb5bba6 100644 --- a/lucet-runtime/lucet-runtime-internals/src/module/dl.rs +++ b/lucet-runtime/lucet-runtime-internals/src/module/dl.rs @@ -4,7 +4,7 @@ use libc::c_void; use libloading::Library; use lucet_module::{ FunctionHandle, FunctionIndex, FunctionPointer, FunctionSpec, ModuleData, ModuleSignature, - PublicKey, SerializedModule, Signature, LUCET_MODULE_SYM, + PublicKey, SerializedModule, Signature, VersionInfo, LUCET_MODULE_SYM, }; use std::ffi::CStr; use std::mem::MaybeUninit; @@ -61,6 +61,21 @@ impl DlModule { let serialized_module: &SerializedModule = unsafe { serialized_module_ptr.as_ref().unwrap() }; + let version = serialized_module.version.clone(); + + let runtime_version = + VersionInfo::current(include_str!(concat!(env!("OUT_DIR"), "/commit_hash")).as_bytes()); + + if !version.valid() { + return Err(lucet_incorrect_module!("reserved bit is not set. This module is likely too old for this lucet-runtime to load.")); + } else if version != runtime_version { + return Err(lucet_incorrect_module!( + "version mismatch. module has version {}, while this runtime is version {}", + version, + runtime_version, + )); + } + // Deserialize the slice into ModuleData, which will hold refs into the loaded // shared object file in `module_data_slice`. Both of these get a 'static lifetime because // Rust doesn't have a safe way to describe that their lifetime matches the containing @@ -115,6 +130,7 @@ impl DlModule { lib, fbase, module: lucet_module::Module { + version, module_data, tables, function_manifest, diff --git a/lucet-runtime/tests/version_checks.rs b/lucet-runtime/tests/version_checks.rs new file mode 100644 index 000000000..fa5c9a30c --- /dev/null +++ b/lucet-runtime/tests/version_checks.rs @@ -0,0 +1,16 @@ +use lucet_runtime::{DlModule, Error}; + +#[test] +pub fn reject_old_modules() { + let err = DlModule::load("./tests/version_checks/old_module.so") + .err() + .unwrap(); + + if let Error::ModuleError(e) = err { + let msg = format!("{}", e); + assert!(msg.contains("reserved bit is not set")); + assert!(msg.contains("module is likely too old")); + } else { + panic!("unexpected error loading module: {}", err); + } +} diff --git a/lucet-runtime/tests/version_checks/old_module.so b/lucet-runtime/tests/version_checks/old_module.so new file mode 100755 index 000000000..cf04b77a7 Binary files /dev/null and b/lucet-runtime/tests/version_checks/old_module.so differ diff --git a/lucetc/build.rs b/lucetc/build.rs new file mode 100644 index 000000000..96299ec26 --- /dev/null +++ b/lucetc/build.rs @@ -0,0 +1,29 @@ +use std::env; +use std::fs::File; +use std::path::Path; + +fn main() { + let commit_file_path = Path::new(&env::var("OUT_DIR").unwrap()).join("commit_hash"); + // in debug builds we only need the file to exist, but in release builds this will be used and + // requires mutability. + #[allow(unused_variables, unused_mut)] + let mut f = File::create(&commit_file_path).unwrap(); + + // This is about the closest not-additional-feature-flag way to detect release builds. + // In debug builds, leave the `commit_hash` file empty to allow looser version checking and + // avoid impacting development workflows too much. + #[cfg(not(debug_assertions))] + { + use std::io::Write; + use std::process::Command; + + let last_commit_hash = Command::new("git") + .args(&["log", "-n", "1", "--pretty=format:%H"]) + .output() + .ok(); + + if let Some(last_commit_hash) = last_commit_hash { + f.write_all(&last_commit_hash.stdout).unwrap(); + } + } +} diff --git a/lucetc/src/output.rs b/lucetc/src/output.rs index 08d65f997..66b706171 100644 --- a/lucetc/src/output.rs +++ b/lucetc/src/output.rs @@ -1,7 +1,6 @@ use crate::error::LucetcErrorKind; use crate::function_manifest::{write_function_manifest, FUNCTION_MANIFEST_SYM}; use crate::name::Name; -use crate::pointer::NATIVE_POINTER_SIZE; use crate::stack_probe; use crate::table::{link_tables, TABLE_SYM}; use crate::traps::write_trap_tables; @@ -10,7 +9,9 @@ use cranelift_codegen::{ir, isa}; use cranelift_faerie::FaerieProduct; use faerie::{Artifact, Decl, Link}; use failure::{format_err, Error, ResultExt}; -use lucet_module::{FunctionSpec, LUCET_MODULE_SYM, MODULE_DATA_SYM}; +use lucet_module::{ + FunctionSpec, SerializedModule, VersionInfo, LUCET_MODULE_SYM, MODULE_DATA_SYM, +}; use std::collections::HashMap; use std::fs::File; use std::io::{Cursor, Write}; @@ -136,10 +137,15 @@ fn write_module( function_manifest_len: usize, obj: &mut Artifact, ) -> Result<(), Error> { - let mut native_data = Cursor::new(Vec::with_capacity(NATIVE_POINTER_SIZE * 4)); + let mut native_data = Cursor::new(Vec::with_capacity(std::mem::size_of::())); obj.declare(LUCET_MODULE_SYM, Decl::data().global()) .context(format!("declaring {}", LUCET_MODULE_SYM))?; + let version = + VersionInfo::current(include_str!(concat!(env!("OUT_DIR"), "/commit_hash")).as_bytes()); + + version.write_to(&mut native_data)?; + write_relocated_slice( obj, &mut native_data,