diff --git a/src/lib.rs b/src/lib.rs index b1eb179..a8c003f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,7 @@ use { futures::future::FutureExt, heck::ToSnakeCase, indexmap::{IndexMap, IndexSet}, + prelink::{embedded_helper_utils, embedded_python_standard_library}, serde::Deserialize, std::{ collections::{HashMap, HashSet}, @@ -19,7 +20,6 @@ use { str, }, summary::{Escape, Locations, Summary}, - tar::Archive, wasm_convert::IntoValType, wasm_encoder::{ CodeSection, ExportKind, ExportSection, Function, FunctionSection, Instruction, Module, @@ -35,13 +35,13 @@ use { DirPerms, FilePerms, WasiCtx, WasiCtxBuilder, WasiView, }, wit_parser::{Resolve, TypeDefKind, UnresolvedPackageGroup, WorldId, WorldItem, WorldKey}, - zstd::Decoder, }; mod abi; mod bindgen; mod bindings; pub mod command; +mod prelink; #[cfg(feature = "pyo3")] mod python; mod summary; @@ -62,6 +62,12 @@ pub struct Ctx { table: ResourceTable, } +pub struct Library { + name: String, + module: Vec, + dl_openable: bool, +} + impl WasiView for Ctx { fn ctx(&mut self) -> &mut WasiCtx { &mut self.wasi @@ -104,7 +110,7 @@ impl TryFrom<(&Path, RawComponentizePyConfig)> for ComponentizePyConfig { } #[derive(Debug)] -struct ConfigContext { +pub struct ConfigContext { module: String, root: PathBuf, path: PathBuf, @@ -207,29 +213,15 @@ pub async fn componentize( .filter_map(|&s| Path::new(s).exists().then_some(s)) .collect::>(); - // Untar the embedded copy of the Python standard library into a temporary directory - let stdlib = tempfile::tempdir()?; - - Archive::new(Decoder::new(Cursor::new(include_bytes!(concat!( - env!("OUT_DIR"), - "/python-lib.tar.zst" - ))))?) - .unpack(stdlib.path())?; - - // Untar the embedded copy of helper utilities into a temporary directory - let bundled = tempfile::tempdir()?; - - Archive::new(Decoder::new(Cursor::new(include_bytes!(concat!( - env!("OUT_DIR"), - "/bundled.tar.zst" - ))))?) - .unpack(bundled.path())?; + let embedded_python_standard_lib = embedded_python_standard_library().unwrap(); + let embedded_helper_utils = embedded_helper_utils().unwrap(); // Search `python_path` for native extension libraries and/or componentize-py.toml files. Packages containing // the latter may contain their own WIT files defining their own worlds (in addition to what the caller // specified as paramters), which we'll try to match up with `module_worlds` in the next step. - let mut raw_configs = Vec::new(); - let mut library_path = Vec::with_capacity(python_path.len()); + let mut raw_configs: Vec> = Vec::new(); + let mut library_path: Vec<(&str, Vec)> = + Vec::with_capacity(python_path.len()); for path in python_path { let mut libraries = Vec::new(); search_directory( @@ -242,6 +234,8 @@ pub async fn componentize( library_path.push((*path, libraries)); } + let mut libraries = prelink::bundle_libraries(library_path).unwrap(); + // Validate the paths parsed from any componentize-py.toml files discovered above and match them up with // `module_worlds` entries. Note that we use an `IndexMap` to preserve the order specified in `module_worlds`, // which is required to be topologically sorted with respect to package dependencies. @@ -341,108 +335,11 @@ pub async fn componentize( let summary = Summary::try_new(&resolve, &worlds)?; - struct Library { - name: String, - module: Vec, - dl_openable: bool, - } - - let mut libraries = vec![ - Library { - name: "libcomponentize_py_runtime.so".into(), - module: zstd::decode_all(Cursor::new(include_bytes!(concat!( - env!("OUT_DIR"), - "/libcomponentize_py_runtime.so.zst" - ))))?, - dl_openable: false, - }, - Library { - name: "libpython3.12.so".into(), - module: zstd::decode_all(Cursor::new(include_bytes!(concat!( - env!("OUT_DIR"), - "/libpython3.12.so.zst" - ))))?, - dl_openable: false, - }, - Library { - name: "libc.so".into(), - module: zstd::decode_all(Cursor::new(include_bytes!(concat!( - env!("OUT_DIR"), - "/libc.so.zst" - ))))?, - dl_openable: false, - }, - Library { - name: "libwasi-emulated-mman.so".into(), - module: zstd::decode_all(Cursor::new(include_bytes!(concat!( - env!("OUT_DIR"), - "/libwasi-emulated-mman.so.zst" - ))))?, - dl_openable: false, - }, - Library { - name: "libwasi-emulated-process-clocks.so".into(), - module: zstd::decode_all(Cursor::new(include_bytes!(concat!( - env!("OUT_DIR"), - "/libwasi-emulated-process-clocks.so.zst" - ))))?, - dl_openable: false, - }, - Library { - name: "libwasi-emulated-getpid.so".into(), - module: zstd::decode_all(Cursor::new(include_bytes!(concat!( - env!("OUT_DIR"), - "/libwasi-emulated-getpid.so.zst" - ))))?, - dl_openable: false, - }, - Library { - name: "libwasi-emulated-signal.so".into(), - module: zstd::decode_all(Cursor::new(include_bytes!(concat!( - env!("OUT_DIR"), - "/libwasi-emulated-signal.so.zst" - ))))?, - dl_openable: false, - }, - Library { - name: "libc++.so".into(), - module: zstd::decode_all(Cursor::new(include_bytes!(concat!( - env!("OUT_DIR"), - "/libc++.so.zst" - ))))?, - dl_openable: false, - }, - Library { - name: "libc++abi.so".into(), - module: zstd::decode_all(Cursor::new(include_bytes!(concat!( - env!("OUT_DIR"), - "/libc++abi.so.zst" - ))))?, - dl_openable: false, - }, - Library { - name: "libcomponentize_py_bindings.so".into(), - module: bindings::make_bindings(&resolve, &worlds, &summary)?, - dl_openable: false, - }, - ]; - - for (index, (path, libs)) in library_path.iter().enumerate() { - for library in libs { - let path = library - .strip_prefix(path) - .unwrap() - .to_str() - .context("non-UTF-8 path")? - .replace('\\', "/"); - - libraries.push(Library { - name: format!("/{index}/{path}"), - module: fs::read(library)?, - dl_openable: true, - }); - } - } + libraries.push(Library { + name: "libcomponentize_py_bindings.so".into(), + module: bindings::make_bindings(&resolve, &worlds, &summary)?, + dl_openable: false, + }); // Link all the libraries (including any native extensions) into a single component. let mut linker = wit_component::Linker::default().validate(true); @@ -534,8 +431,18 @@ pub async fn componentize( .env("PYTHONUNBUFFERED", "1") .env("COMPONENTIZE_PY_APP_NAME", app_name) .env("PYTHONHOME", "/python") - .preopened_dir(stdlib.path(), "python", DirPerms::all(), FilePerms::all())? - .preopened_dir(bundled.path(), "bundled", DirPerms::all(), FilePerms::all())?; + .preopened_dir( + embedded_python_standard_lib.path(), + "python", + DirPerms::all(), + FilePerms::all(), + )? + .preopened_dir( + embedded_helper_utils.path(), + "bundled", + DirPerms::all(), + FilePerms::all(), + )?; // Generate guest mounts for each host directory in `python_path`. for (index, path) in python_path.iter().enumerate() { @@ -628,7 +535,7 @@ pub async fn componentize( Ok(()) } - replace(bundled.path(), "proxy", &module)?; + replace(embedded_helper_utils.path(), "proxy", &module)?; }; for (mounts, world_dir) in world_dir_mounts.iter() { diff --git a/src/prelink.rs b/src/prelink.rs new file mode 100644 index 0000000..debee07 --- /dev/null +++ b/src/prelink.rs @@ -0,0 +1,140 @@ +#![deny(warnings)] + +use std::{ + fs, + io::{self, Cursor}, +}; + +use anyhow::Context; +use tar::Archive; +use tempfile::TempDir; +use zstd::Decoder; + +use crate::Library; + +pub fn embedded_python_standard_library() -> Result { + // Untar the embedded copy of the Python standard library into a temporary directory + let stdlib = tempfile::tempdir().expect("could not create temp dirfor python stnadard lib"); + + Archive::new(Decoder::new(Cursor::new(include_bytes!(concat!( + env!("OUT_DIR"), + "/python-lib.tar.zst" + ))))?) + .unpack(stdlib.path()) + .unwrap(); + + Ok(stdlib) +} + +pub fn embedded_helper_utils() -> Result { + // Untar the embedded copy of helper utilities into a temporary directory + let bundled = tempfile::tempdir().expect("could not create tempdir for embedded helper utils"); + + Archive::new(Decoder::new(Cursor::new(include_bytes!(concat!( + env!("OUT_DIR"), + "/bundled.tar.zst" + ))))?) + .unpack(bundled.path()) + .unwrap(); + + Ok(bundled) +} + +pub fn bundle_libraries( + library_path: Vec<(&str, Vec)>, +) -> Result, io::Error> { + let mut libraries = vec![ + Library { + name: "libcomponentize_py_runtime.so".into(), + module: zstd::decode_all(Cursor::new(include_bytes!(concat!( + env!("OUT_DIR"), + "/libcomponentize_py_runtime.so.zst" + ))))?, + dl_openable: false, + }, + Library { + name: "libpython3.12.so".into(), + module: zstd::decode_all(Cursor::new(include_bytes!(concat!( + env!("OUT_DIR"), + "/libpython3.12.so.zst" + ))))?, + dl_openable: false, + }, + Library { + name: "libc.so".into(), + module: zstd::decode_all(Cursor::new(include_bytes!(concat!( + env!("OUT_DIR"), + "/libc.so.zst" + ))))?, + dl_openable: false, + }, + Library { + name: "libwasi-emulated-mman.so".into(), + module: zstd::decode_all(Cursor::new(include_bytes!(concat!( + env!("OUT_DIR"), + "/libwasi-emulated-mman.so.zst" + ))))?, + dl_openable: false, + }, + Library { + name: "libwasi-emulated-process-clocks.so".into(), + module: zstd::decode_all(Cursor::new(include_bytes!(concat!( + env!("OUT_DIR"), + "/libwasi-emulated-process-clocks.so.zst" + ))))?, + dl_openable: false, + }, + Library { + name: "libwasi-emulated-getpid.so".into(), + module: zstd::decode_all(Cursor::new(include_bytes!(concat!( + env!("OUT_DIR"), + "/libwasi-emulated-getpid.so.zst" + ))))?, + dl_openable: false, + }, + Library { + name: "libwasi-emulated-signal.so".into(), + module: zstd::decode_all(Cursor::new(include_bytes!(concat!( + env!("OUT_DIR"), + "/libwasi-emulated-signal.so.zst" + ))))?, + dl_openable: false, + }, + Library { + name: "libc++.so".into(), + module: zstd::decode_all(Cursor::new(include_bytes!(concat!( + env!("OUT_DIR"), + "/libc++.so.zst" + ))))?, + dl_openable: false, + }, + Library { + name: "libc++abi.so".into(), + module: zstd::decode_all(Cursor::new(include_bytes!(concat!( + env!("OUT_DIR"), + "/libc++abi.so.zst" + ))))?, + dl_openable: false, + }, + ]; + + for (index, (path, libs)) in library_path.iter().enumerate() { + for library in libs { + let path = library + .strip_prefix(path) + .unwrap() + .to_str() + .context("non-UTF-8 path") + .unwrap() + .replace('\\', "/"); + + libraries.push(Library { + name: format!("/{index}/{path}"), + module: fs::read(library).unwrap(), + dl_openable: true, + }); + } + } + + Ok(libraries) +}