diff --git a/packages/std/Cargo.lock b/packages/std/Cargo.lock index 83169e15..1b397232 100644 --- a/packages/std/Cargo.lock +++ b/packages/std/Cargo.lock @@ -1809,10 +1809,13 @@ dependencies = [ name = "tangram_std" version = "0.0.0" dependencies = [ + "bytes", "futures", "serde", "serde_json", "tangram_client", + "tangram_serialize", + "tempfile", "tracing", "tracing-subscriber", ] diff --git a/packages/std/Cargo.toml b/packages/std/Cargo.toml index 73855f31..d4fbc0db 100644 --- a/packages/std/Cargo.toml +++ b/packages/std/Cargo.toml @@ -26,6 +26,7 @@ pedantic = { level = "warn", priority = -1 } result_large_err = "allow" [workspace.dependencies] +bytes = { version = "1", features = ["serde"] } clap = { version = "4", features = ["derive"] } fnv = "1" futures = "0.3" @@ -35,6 +36,7 @@ libc = "0.2" serde = { version = "1", features = ["derive"] } serde_json = "1" tangram_client = { default-features = false, git = "https://github.com/tangramdotdev/tangram", rev = "58527c57de3217c82d0c54e3dacaba6394245fec" } +tangram_serialize = { default-features = false, git = "https://github.com/tangramdotdev/tangram", rev = "58527c57de3217c82d0c54e3dacaba6394245fec" } tempfile = "3" tokio = { version = "1", default-features = false, features = [ "rt", @@ -77,12 +79,16 @@ path = "packages/std/lib.rs" workspace = true [dependencies] +bytes = { workspace = true } futures = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } tangram_client = { workspace = true } +tangram_serialize = { workspace = true } +tempfile = { workspace = true } tracing = { workspace = true, optional = true } tracing-subscriber = { workspace = true, optional = true } [features] tracing = ["dep:tracing", "dep:tracing-subscriber"] +default = ["tracing"] \ No newline at end of file diff --git a/packages/std/bootstrap/make.tg.ts b/packages/std/bootstrap/make.tg.ts index 383cc904..eda000b5 100644 --- a/packages/std/bootstrap/make.tg.ts +++ b/packages/std/bootstrap/make.tg.ts @@ -23,10 +23,12 @@ export const source = () => { export type Arg = { host?: string; + embedWrapper?: boolean | undefined; }; export const build = async (arg?: Arg) => { const host = arg?.host ?? (await std.triple.host()); + const embedWrapper = arg?.embedWrapper ?? true; const configure = { args: ["--disable-dependency-tracking"], @@ -48,7 +50,14 @@ export const build = async (arg?: Arg) => { install, }; - const env = std.env.arg(sdk(host), { utils: false }); + let envArgs: Array> = [ + sdk(host), + { utils: false }, + ]; + if (embedWrapper) { + envArgs.push({ TGLD_EMBED_WRAPPER: true }); + } + const env = std.env.arg(...envArgs); const output = await autotoolsInternal({ bootstrap: true, diff --git a/packages/std/packages/std/manifest.rs b/packages/std/packages/std/manifest.rs index e133e61e..07ae3f99 100644 --- a/packages/std/packages/std/manifest.rs +++ b/packages/std/packages/std/manifest.rs @@ -1,12 +1,14 @@ use std::{ collections::BTreeMap, - io::{Read, Seek}, - path::Path, + io::{Read, Seek, Write}, + path::{Path, PathBuf}, str::FromStr as _, sync::LazyLock, }; use tangram_client as tg; +use crate::CLOSEST_ARTIFACT_PATH; + /// The magic number used to indicate an executable has a manifest. pub const MAGIC_NUMBER: &[u8] = b"tangram\0"; @@ -14,42 +16,64 @@ pub const MAGIC_NUMBER: &[u8] = b"tangram\0"; pub const VERSION: u64 = 0; /// The Tangram run entrypoint manifest. -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, + Debug, + serde::Serialize, + serde::Deserialize, + tangram_serialize::Serialize, + tangram_serialize::Deserialize, +)] pub struct Manifest { /// The interpreter for the executable. #[serde(skip_serializing_if = "Option::is_none")] + #[tangram_serialize(id = 0, skip_serializing_if = "Option::is_none")] pub interpreter: Option, /// The executable to run. + #[tangram_serialize(id = 1)] pub executable: Executable, /// The environment variable mutations to apply. #[serde(skip_serializing_if = "Option::is_none")] + #[tangram_serialize(id = 2, skip_serializing_if = "Option::is_none")] pub env: Option, /// The command line arguments to pass to the executable. #[serde(skip_serializing_if = "Option::is_none")] + #[tangram_serialize(id = 3, skip_serializing_if = "Option::is_none")] pub args: Option>, } /// An interpreter is another program that is used to launch the executable. -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, + Debug, + serde::Serialize, + serde::Deserialize, + tangram_serialize::Serialize, + tangram_serialize::Deserialize, +)] #[serde(tag = "kind")] pub enum Interpreter { /// A normal interpreter. #[serde(rename = "normal")] + #[tangram_serialize(id = 0)] Normal(NormalInterpreter), /// An ld-linux interpreter. #[serde(rename = "ld-linux")] + #[tangram_serialize(id = 1)] LdLinux(LdLinuxInterpreter), /// An ld-musl interpreter. #[serde(rename = "ld-musl")] + #[tangram_serialize(id = 2)] LdMusl(LdMuslInterpreter), // A dyld interpreter. #[serde(rename = "dyld")] + #[tangram_serialize(id = 3)] DyLd(DyLdInterpreter), } @@ -63,74 +87,127 @@ impl Interpreter { } } -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, + Debug, + serde::Serialize, + serde::Deserialize, + tangram_serialize::Serialize, + tangram_serialize::Deserialize, +)] pub struct NormalInterpreter { /// The path to the file to exec. + #[tangram_serialize(id = 0)] pub path: tg::template::Data, /// Arguments for the interpreter. + #[tangram_serialize(id = 1)] pub args: Vec, } -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, + Debug, + serde::Serialize, + serde::Deserialize, + tangram_serialize::Serialize, + tangram_serialize::Deserialize, +)] #[serde(rename_all = "camelCase")] pub struct LdLinuxInterpreter { /// The path to ld-linux.so. + #[tangram_serialize(id = 0)] pub path: tg::template::Data, /// The paths for the `--library-path` argument. #[serde(skip_serializing_if = "Option::is_none")] + #[tangram_serialize(id = 1, skip_serializing_if = "Option::is_none")] pub library_paths: Option>, /// The paths for the `--preload` argument. #[serde(skip_serializing_if = "Option::is_none")] + #[tangram_serialize(id = 2, skip_serializing_if = "Option::is_none")] pub preloads: Option>, /// Any additional arguments. #[serde(skip_serializing_if = "Option::is_none")] + #[tangram_serialize(id = 3, skip_serializing_if = "Option::is_none")] pub args: Option>, } -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, + Debug, + serde::Serialize, + serde::Deserialize, + tangram_serialize::Serialize, + tangram_serialize::Deserialize, +)] #[serde(rename_all = "camelCase")] pub struct LdMuslInterpreter { /// The path to ld-linux.so. + #[tangram_serialize(id = 0)] pub path: tg::template::Data, /// The paths for the `--library-path` argument. #[serde(skip_serializing_if = "Option::is_none")] + #[tangram_serialize(id = 1, skip_serializing_if = "Option::is_none")] pub library_paths: Option>, /// The paths for the `--preload` argument. #[serde(skip_serializing_if = "Option::is_none")] + #[tangram_serialize(id = 2, skip_serializing_if = "Option::is_none")] pub preloads: Option>, /// Any additional arguments. #[serde(skip_serializing_if = "Option::is_none")] + #[tangram_serialize(id = 3, skip_serializing_if = "Option::is_none")] pub args: Option>, } -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, + Debug, + serde::Serialize, + serde::Deserialize, + tangram_serialize::Serialize, + tangram_serialize::Deserialize, +)] #[serde(rename_all = "camelCase")] pub struct DyLdInterpreter { /// The paths for the `DYLD_LIBRARY_PATH` environment variable. #[serde(skip_serializing_if = "Option::is_none")] + #[tangram_serialize(id = 0, skip_serializing_if = "Option::is_none")] pub library_paths: Option>, /// The paths for the `DYLD_INSERT_LIBRARIES` environment variable. #[serde(skip_serializing_if = "Option::is_none")] + #[tangram_serialize(id = 1, skip_serializing_if = "Option::is_none")] pub preloads: Option>, } /// An executable launched by the entrypoint. -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, + Debug, + serde::Serialize, + serde::Deserialize, + tangram_serialize::Serialize, + tangram_serialize::Deserialize, +)] #[serde(rename_all = "camelCase", tag = "kind", content = "value")] pub enum Executable { /// A path to an executable file. + #[tangram_serialize(id = 0)] Path(tg::template::Data), /// A script which will be rendered to a file and interpreted. + #[tangram_serialize(id = 1)] Content(tg::template::Data), + + /// A virtual address. + #[tangram_serialize(id = 2)] + Address(u64), } impl Manifest { @@ -213,6 +290,110 @@ impl Manifest { Ok(Some(manifest)) } + pub async fn embed(&self, tg: &impl tg::Handle, file: &tg::File) -> tg::Result { + #[cfg(feature = "tracing")] + tracing::debug!(?self, "Embedding manifest"); + + // Get the stub and wrap files. + let stub_bin = TANGRAM_STUB_BIN + .as_ref() + .ok_or_else(|| tg::error!("expected a stub"))?; + let stub_elf = TANGRAM_STUB_ELF + .as_ref() + .ok_or_else(|| tg::error!("expected a stub"))?; + let wrap = TANGRAM_WRAP + .as_ref() + .ok_or_else(|| tg::error!("expected a wrap binary"))?; + + tg::cache::cache( + tg, + tg::cache::Arg { + artifacts: vec![ + file.id().into(), + stub_bin.id().into(), + stub_elf.id().into(), + wrap.id().into(), + ], + }, + ) + .await + .map_err(|source| tg::error!(!source, "failed to cache artifacts"))?; + + // Get their paths on disk. + let path: PathBuf = CLOSEST_ARTIFACT_PATH.clone().into(); + let input = path.join(file.id().to_string()); + let stub_bin = path.join(stub_bin.id().to_string()); + let stub_elf = path.join(stub_elf.id().to_string()); + let wrap = path.join(wrap.id().to_string()); + + // Create a temp file for the manifest. + let mut manifest = tempfile::NamedTempFile::new() + .map_err(|source| tg::error!(!source, "failed to get temp file"))?; + + // Create a random output name. + let tempfile = tempfile::NamedTempFile::new() + .map_err(|source| tg::error!(!source, "failed to create temp file"))?; + let output = tempfile.path(); + + // Create the manifest file. TODO: asyncify. + let contents = serde_json::to_vec(self) + .map_err(|source| tg::error!(!source, "failed to serialize manifest"))?; + manifest + .as_file_mut() + .write_all(&contents) + .map_err(|source| tg::error!(!source, "failed to write manifest"))?; + + // Run the command. + let success = std::process::Command::new(wrap) + .arg(input) + .arg(output) + .arg(stub_elf) + .arg(stub_bin) + .arg(manifest.path()) + .stdout(std::process::Stdio::inherit()) + .stderr(std::process::Stdio::inherit()) + .output() + .map_err(|source| tg::error!(!source, "failed to wrap the binary"))? + .status + .success(); + if !success { + return Err(tg::error!("failed to run the command")); + } + + let bytes = std::fs::read(output) + .map_err(|source| tg::error!(!source, "failed to read the output"))?; + std::fs::remove_file(output) + .map_err(|source| tg::error!(!source, "failed to remove output file"))?; + let cursor = std::io::Cursor::new(bytes); + let blob = tg::Blob::with_reader(tg, cursor) + .await + .map_err(|source| tg::error!(!source, "failed to create blob"))?; + + // Obtain the dependencies from the manifest to add to the file. + // NOTE: We know the wrapper file has no dependencies, so there is no need to merge. + let dependencies = self.dependencies(); + let dependencies = if dependencies.is_empty() { + None + } else { + Some(dependencies) + }; + + // Create a file with the new blob and references. + let mut output_file = tg::File::builder(blob).executable(true); + if let Some(dependencies) = dependencies { + output_file = output_file.dependencies(dependencies); + } + let output_file = output_file.build(); + + #[cfg(feature = "tracing")] + { + let file_id = output_file.id(); + tracing::trace!(?file_id, "created wrapper file"); + } + + Ok(output_file) + } + /// Create a new wrapper from a manifest. Will locate the wrapper file from the `TANGRAM_WRAPPER_ID` environment variable. pub async fn write(&self, tg: &impl tg::Handle) -> tg::Result { #[cfg(feature = "tracing")] @@ -352,6 +533,7 @@ impl Manifest { Executable::Content(template) => { collect_dependencies_from_template_data(template, &mut dependencies); }, + Executable::Address(_) => (), } // Collect the references from the env. @@ -470,3 +652,24 @@ static TANGRAM_WRAPPER: LazyLock = LazyLock::new(|| { let id = tg::file::Id::from_str(&id_value).expect("TANGRAM_WRAPPER_ID is not a valid file ID"); tg::File::with_id(id) }); + +static TANGRAM_STUB_BIN: LazyLock> = LazyLock::new(|| { + std::env::var("TANGRAM_STUB_BIN_ID").ok().map(|id| { + let id = id.parse().expect("TANGRAM_STUB_BIN_ID is not a valid ID"); + tg::File::with_id(id) + }) +}); + +static TANGRAM_STUB_ELF: LazyLock> = LazyLock::new(|| { + std::env::var("TANGRAM_STUB_ELF_ID").ok().map(|id| { + let id = id.parse().expect("TANGRAM_STUB_ELF_ID is not a valid ID"); + tg::File::with_id(id) + }) +}); + +static TANGRAM_WRAP: LazyLock> = LazyLock::new(|| { + std::env::var("TANGRAM_WRAP_ID").ok().map(|id| { + let id = id.parse().expect("TANGRAM_WRAP_ID is not a valid ID"); + tg::File::with_id(id) + }) +}); diff --git a/packages/std/packages/stub/include/aarch64/debug.h b/packages/std/packages/stub/include/aarch64/debug.h new file mode 100644 index 00000000..e4f6ffa4 --- /dev/null +++ b/packages/std/packages/stub/include/aarch64/debug.h @@ -0,0 +1,6 @@ +#pragma once +#ifdef BREAKPOINTS + #define BREAK do { asm volatile ("brk #0"); } while (0) +#else + #define BREAK +#endif diff --git a/packages/std/packages/stub/include/aarch64/syscall.h b/packages/std/packages/stub/include/aarch64/syscall.h new file mode 100644 index 00000000..a6369c0d --- /dev/null +++ b/packages/std/packages/stub/include/aarch64/syscall.h @@ -0,0 +1,136 @@ +#pragma once + +#define __NR_getcwd 17 +#define __NR_openat 56 +#define __NR_close 57 +#define __NR_lseek 62 +#define __NR_write 64 +#define __NR_pread64 67 +#define __NR_readlinkat 78 +#define __NR_fstat 80 +#define __NR_exit 93 +#define __NR_getrlimit 163 +#define __NR_execve 221 +#define __NR_munmap 215 +#define __NR_mmap 222 +#define __NR_getrandom 278 + +static inline long syscall1 ( + long nr, + long arg1 +) { + register long x8 asm("x8") = nr; + register long x0 asm("x0") = arg1; + asm volatile ( + "svc #0" + : "=r"(x0) + : "r"(x8), "r"(x0) + : "memory", "cc" + ); + return x0; +} + +static inline long syscall2 ( + long nr, + long arg1, + long arg2 +) { + register long x8 asm("x8") = nr; + register long x0 asm("x0") = arg1; + register long x1 asm("x1") = arg2; + asm volatile ( + "svc #0" + : "=r"(x0) + : "r"(x8), "r"(x0), "r"(x1) + : "memory", "cc" + ); + return x0; +} + +static inline long syscall3 ( + long nr, + long arg1, + long arg2, + long arg3 +) { + register long x8 asm("x8") = nr; + register long x0 asm("x0") = arg1; + register long x1 asm("x1") = arg2; + register long x2 asm("x2") = arg3; + asm volatile ( + "svc #0" + : "=r"(x0) + : "r"(x8), "r"(x0), "r"(x1), "r"(x2) + : "memory", "cc" + ); + return x0; +} + +static inline long syscall4 ( + long nr, + long arg1, + long arg2, + long arg3, + long arg4 +) { + register long x8 asm("x8") = nr; + register long x0 asm("x0") = arg1; + register long x1 asm("x1") = arg2; + register long x2 asm("x2") = arg3; + register long x3 asm("x3") = arg4; + asm volatile ( + "svc #0" + : "=r"(x0) + : "r"(x8), "r"(x0), "r"(x1), "r"(x2), "r"(x3) + : "memory", "cc" + ); + return x0; +} + +static inline long syscall5 ( + long nr, + long arg1, + long arg2, + long arg3, + long arg4, + long arg5 +) { + register long x8 asm("x8") = nr; + register long x0 asm("x0") = arg1; + register long x1 asm("x1") = arg2; + register long x2 asm("x2") = arg3; + register long x3 asm("x3") = arg4; + register long x4 asm("x4") = arg5; + asm volatile ( + "svc #0" + : "=r"(x0) + : "r"(x8), "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4) + : "memory", "cc" + ); + return x0; +} + +static inline long syscall6 ( + long nr, + long arg1, + long arg2, + long arg3, + long arg4, + long arg5, + long arg6 +) { + register long x8 asm("x8") = nr; + register long x0 asm("x0") = arg1; + register long x1 asm("x1") = arg2; + register long x2 asm("x2") = arg3; + register long x3 asm("x3") = arg4; + register long x4 asm("x4") = arg5; + register long x5 asm("x5") = arg6; + asm volatile ( + "svc #0" + : "=r"(x0) + : "r"(x8), "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5) + : "memory", "cc" + ); + return x0; +} diff --git a/packages/std/packages/stub/include/aarch64/util.h b/packages/std/packages/stub/include/aarch64/util.h new file mode 100644 index 00000000..833c2f4f --- /dev/null +++ b/packages/std/packages/stub/include/aarch64/util.h @@ -0,0 +1,16 @@ +#pragma once + +static void jump_to_entrypoint (void* stack, void* entrypoint) { + register long x0 asm("x0") = (long)stack; + register long x1 asm("x1") = (long)entrypoint; + asm volatile ( + "mov sp, x0;" // set the stack pointer. + "mov x29, xzr;" // clear the frame pointer. + "mov x0, xzr;" // clear atexit pointer + "br x1;" // jump to the entrypoint + : + : "r"(x0), "r"(x1) + : "memory", "cc" + ); + __builtin_unreachable(); +} diff --git a/packages/std/packages/stub/include/arena.h b/packages/std/packages/stub/include/arena.h new file mode 100644 index 00000000..af3d65c2 --- /dev/null +++ b/packages/std/packages/stub/include/arena.h @@ -0,0 +1,139 @@ +// Extremely minimal arena allocator. +#pragma once +#include +#include "debug.h" + +// Compile with -DSTDLIB for debugging only. +#ifdef STDLIB + #define _GNU_SOURCE + #include + #include +#else + #include "syscall.h" +#endif + +// Number of pages per segment in the arena. +#define DEFAULT_NUM_PAGES 16 + +// 1 GiB max +#define MAX_NUM_PAGES 0x40000 + +// Helper to allocate a single T +#define ALLOC(arena, T) (T*)alloc(arena, sizeof(T), _Alignof(T)) + +// Helper to allocate an array of n T. +#define ALLOC_N(arena, n, T) (T*)alloc(arena, ((size_t)(n)) * sizeof(T), _Alignof(T)) + +// Helper to align `m` to `n`. +#define ALIGN(m, n) (((m) + (n) - 1) & ~((n) - 1)) +typedef struct Segment Segment; +typedef struct Arena Arena; + +struct Arena { + Segment* segment; + uint64_t num_pages; + uint64_t page_size; +}; + +struct Segment { + uint64_t offset; + uint64_t length; + Segment* next_segment; + uint8_t memory[]; +}; + +static void create_arena (Arena* arena, uint64_t page_size); +static void destroy_arena (Arena* arena); +static void* alloc (Arena* arena, size_t size, size_t alignment); +static void add_segment (Arena* arena, size_t num_pages); + +static void create_arena (Arena* arena, uint64_t page_size) { + arena->num_pages = 0; + arena->segment = NULL; + arena->page_size = page_size; + add_segment(arena, DEFAULT_NUM_PAGES); +} + +static void destroy_arena (Arena* arena) { + Segment* current = arena->segment; + while(current) { + // Save the next segment. + Segment* next = current->next_segment; + + // Sanity check to detect corruption of the segment itself. + ABORT_IF((current->length % arena->page_size) != 0, "internal error: corrupted segment"); + + // Unmap and error if it fails. + int ec = munmap((void*)current, current->length); + ABORT_IF(ec != 0, "internal error: munmap failed (addr=0x%lx, len=0x%lx)", (uintptr_t)current, (uintptr_t)current->length); + + // Update the current segment. + current = next; + } +} + +static void* alloc (Arena* arena, size_t size, size_t alignment) { + // Sanity check. + ABORT_IF((size % alignment) != 0, "internal error: misaligned allocation"); + + // Compute start/end of the allocation. + size_t start = ALIGN(arena->segment->offset, alignment); + size_t end = start + size; + + // Check if we need to add more space. + if (end > arena->segment->length) { + // Compute the minimum number of pages required by the allocation. + size_t min_size = ALIGN(ALIGN(sizeof(Segment), alignment) + size, arena->page_size); + size_t min_num_pages = min_size / arena->page_size; + + // The number of pages we use is the MAX(min_num_pages, DEFAULT_NUM_PAGES). + size_t num_pages = min_num_pages < DEFAULT_NUM_PAGES ? DEFAULT_NUM_PAGES : min_size; + + // Add a new segment. + add_segment(arena, num_pages); + + // Update start/end range of the allocation, as it will have changed. + start = ALIGN(arena->segment->offset, alignment); + end = start + size; + + ABORT_IF(end > arena->segment->length, "internal error: failed to allocate enough space"); + } + + // Allocate. + uintptr_t pointer = (uintptr_t)arena->segment + start; + arena->segment->offset = end; + + // Return the allocated pointer. + return (void*)pointer; +} + +static void add_segment (Arena* arena, size_t num_pages) { + // Sanity check. + ABORT_IF(num_pages == 0, "internal: invalid argument"); + + // Compute the segment data. + size_t length = num_pages * arena->page_size; + size_t offset = sizeof(Segment); + Segment* next_segment = arena->segment; + + // Allocate a new segment. + Segment* segment = (Segment*)mmap( + NULL, + length, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + -1, + 0 + ); + ABORT_IF((void*)segment == MAP_FAILED, "internal: mmap failed"); + + // Update the segment. + segment->length = length; + segment->offset = offset; + segment->next_segment = next_segment; + + // Update the arena. + arena->segment = segment; + arena->num_pages += num_pages; + ABORT_IF(arena->num_pages >= MAX_NUM_PAGES, "internal error: OOM"); +} diff --git a/packages/std/packages/stub/include/debug.h b/packages/std/packages/stub/include/debug.h new file mode 100644 index 00000000..f7b7bb45 --- /dev/null +++ b/packages/std/packages/stub/include/debug.h @@ -0,0 +1,99 @@ +// Debug helpers. +#pragma once +#include "syscall.h" +#include + +// Software breakpoints. +#include "x86_64/debug.h" + +// Have to provide a putc implementation for nanoprintf. +static void __putc (int ch, void*) { + uint8_t buf = (uint8_t)ch; + write(STDERR_FILENO, (void*)&buf, 1); +} + +// printf/snprintf/etc. +#define NANOPRINTF_IMPLEMENTATION +#define NANOPRINTF_VISIBILITY_STATIC +#include "nanoprintf.h" +#undef NANOPRINTF_IMPLEMENTATION + +// For print debugging. +static void trace (const char* format, ...) { + va_list args; + va_start(args, format); + npf_vpprintf(__putc, NULL, format, args); + va_end(args); +} + +// Abort with an error condition. +#define ABORT(...) do { trace(__VA_ARGS__); trace("\n"); BREAK; exit(111); } while (0) +#define ABORT_IF(cond, ...) if (cond) { ABORT(__VA_ARGS__); } + +// Debug print macro. +#ifdef DEBUG + #define DBG(...) do { trace(__VA_ARGS__); trace("\n"); } while (0) +#else + #define DBG(...) +#endif + +// Convert auxv type to a string. +static inline const char* auxv_type_string (uint64_t a_type) { + switch (a_type) { + case AT_NULL: return "AT_NULL"; + case AT_IGNORE: return "AT_IGNORE"; + case AT_EXECFD: return "AT_EXECFD"; + case AT_PHDR: return "AT_PHDR"; + case AT_PHENT: return "AT_PHENT"; + case AT_PHNUM: return "AT_PHNUM"; + case AT_PAGESZ: return "AT_PAGESZ"; + case AT_BASE: return "AT_BASE"; + case AT_FLAGS: return "AT_FLAGS"; + case AT_ENTRY: return "AT_ENTRY"; + case AT_NOTELF: return "AT_NOTELF"; + case AT_UID: return "AT_UID"; + case AT_EUID: return "AT_EUID"; + case AT_GID: return "AT_GID"; + case AT_EGID: return "AT_EGID"; + case AT_CLKTCK: return "AT_CLKTCK"; + case AT_EXECFN: return "AT_EXECFN"; + case AT_PLATFORM: return "AT_PLATFORM"; + case AT_HWCAP2: return "AT_HWCAP2"; + case AT_HWCAP: return "AT_HWCAP"; + case AT_FPUCW: return "AT_FPUCW"; + case AT_DCACHEBSIZE: return "AT_DCACHEBSIZE"; + case AT_ICACHEBSIZE: return "AT_ICACHEBSIZE"; + case AT_UCACHEBSIZE: return "AT_UCACHEBSIZE"; + case AT_SYSINFO: return "AT_SYSINFO"; + case AT_SYSINFO_EHDR: return "AT_SYSINFO_EHDR"; + case AT_MINSIGSTKSZ: return "AT_MINSIGSTKSZ"; + case AT_SECURE: return "AT_SECURE"; + case AT_RANDOM: return "AT_RANDOM"; + case 27: return "AT_RSEQ_FEATURE_SIZE"; + case 28: return "AT_RSEQ_ALIGN"; + default: return "UNKNOWN"; + } +} + +// Convert a PT_xxx value to a string. +static inline const char* p_type_string (uint64_t p_type) { + switch (p_type) { + case PT_NULL: return "PT_NULL"; + case PT_LOAD: return "PT_LOAD"; + case PT_DYNAMIC: return "PT_DYNAMIC"; + case PT_INTERP: return "PT_INTERP"; + case PT_NOTE: return "PT_NOTE"; + case PT_SHLIB: return "PT_SHLIB"; + case PT_PHDR: return "PT_PHDR"; + case PT_TLS: return "PT_TLS"; + case PT_NUM: return "PT_NUM"; + case PT_GNU_EH_FRAME: return "PT_GNU_EH_FRAME"; + case PT_GNU_STACK: return "PT_GNU_STACK"; + case PT_GNU_RELRO: return "PT_GNU_RELRO"; + case PT_GNU_PROPERTY: return "PT_GNU_PROPERTY"; + case PT_SUNWBSS: return "PT_SUNWBSS"; + case PT_SUNWSTACK: return "PT_SUNWSTACK"; + case PT_HISUNW: return "PT_HISUNW"; + default: return "UNKNOWN"; + } +} diff --git a/packages/std/packages/stub/include/footer.h b/packages/std/packages/stub/include/footer.h new file mode 100644 index 00000000..f87f0eb1 --- /dev/null +++ b/packages/std/packages/stub/include/footer.h @@ -0,0 +1,8 @@ +#pragma once +#include + +typedef struct { + uint64_t size; + uint64_t version; + char magic[8]; +} Footer; diff --git a/packages/std/packages/stub/include/json.h b/packages/std/packages/stub/include/json.h new file mode 100644 index 00000000..f24a9afa --- /dev/null +++ b/packages/std/packages/stub/include/json.h @@ -0,0 +1,533 @@ +/// This parser implements a subset of JSON. Notably, we assume numbers are integers. +#pragma once + +// Standard includes. +#include +#include + +// Internals. +#include "arena.h" +#include "util.h" + +// JSON value types +enum { + JSON_NULL, + JSON_BOOL, + JSON_NUMBER, + JSON_STRING, + JSON_ARRAY, + JSON_OBJECT +}; + +// Parsing errors +enum { + ERROR_OK, + ERROR_INVALID_CHAR, + ERROR_UNEXPECTED_EOF +}; + +// Some forward declarations of types because C. +typedef struct JsonValue JsonValue; +typedef struct JsonArray JsonArray; +typedef struct JsonObject JsonObject; + +// Arrays are implemented as linked lists. Ugh gross, I know, linked lists suck! But they're the +// right call here, since we don't know how many items are going to be in the array before we parse +// it, and we don't have an efficient implementation of realloc() available in our `Arena` +// allocator. +// +// If `value` and `next` are NULL the array is empty. +struct JsonArray { + JsonValue* value; + JsonArray* next; +}; + +// Like above, we use a linked list. Items may appear multiple times in the list (behavior is +// unspecified within JSON). +// +// If `value` and `next` are NULL the object is empty. +struct JsonObject { + String key; + JsonValue* value; + JsonObject* next; +}; + +// Values are a tagged enum. +struct JsonValue { + uint8_t kind; + union { + bool _bool; + double _number; + String _string; + JsonArray _array; + JsonObject _object; + } value; +}; + +// Parser state +typedef struct { + Arena* arena; + String input; + int status; +} JsonParser; + +// Forward declare the parsing functions. +static int parse_json_value (JsonParser* parser, JsonValue* value); +static int parse_json_object (JsonParser* parser, JsonObject* object); +static int parse_json_array (JsonParser* parser, JsonArray* value); +static int parse_json_string (JsonParser* parser, String* value); +static int parse_json_number (JsonParser* parser, double* value); +static int parse_json_keyword (JsonParser* parser, JsonValue* value); + +// Ugly macros for implementing parsing rules. +#define ENSURE_NOT_EMPTY \ + if (parser->input.len == 0) { \ + return ERROR_UNEXPECTED_EOF; \ + } + +#define EAT_CHAR \ + parser->input.ptr++; \ + parser->input.len--; + +#define EAT_WHITESPACE \ + while(parser->input.len) { \ + uint8_t tok = *parser->input.ptr; \ + if (tok == ' ' || tok == '\n' || tok == '\t' || tok == '\f') { \ + EAT_CHAR; \ + continue; \ + } \ + break; \ + } + +// Parse a single JSON value. +static int parse_json_value (JsonParser* parser, JsonValue* value) { + // Consume whitespcae. + EAT_WHITESPACE; + ENSURE_NOT_EMPTY; + + // Peek the next token. + switch (*parser->input.ptr) { + case '{': + value->kind = JSON_OBJECT; + return parse_json_object(parser, &value->value._object); + case '[': + value->kind = JSON_ARRAY; + return parse_json_array(parser, &value->value._array); + case '"': + value->kind = JSON_STRING; + return parse_json_string(parser, &value->value._string); + // 0 is a special case + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + value->kind = JSON_NUMBER; + return parse_json_number(parser, &value->value._number); + case 'n': + case 't': + case 'f': + return parse_json_keyword(parser, value); + // Anything else is invalid JSON! + default: + return ERROR_INVALID_CHAR; + } +} + +static int parse_json_object (JsonParser* parser, JsonObject* object) { + // Eat the leading '{'. + EAT_CHAR; + + // Clear the value object. + memset((void*)object, 0, sizeof(JsonObject)); + + // Parse fields. + while (parser->input.len) { + EAT_WHITESPACE; + ENSURE_NOT_EMPTY; + uint8_t tok = *parser->input.ptr; + if (tok == '}') { + break; + } + if (tok == '"') { + // Parse the key. + int ec = parse_json_string(parser, &object->key); + if (ec) { + return ec; + } + + // Parse the ':' separator + EAT_WHITESPACE; + ENSURE_NOT_EMPTY; + if (*parser->input.ptr != ':') { + return ERROR_INVALID_CHAR; + } + EAT_CHAR; + + // Parse the value. + object->value = ALLOC(parser->arena, JsonValue); + ec = parse_json_value(parser, object->value); + if (ec) { + return ec; + } + + // Parse the ',' if it exists. + EAT_WHITESPACE; + ENSURE_NOT_EMPTY; + if (*parser->input.ptr == ',') { + EAT_CHAR; + + // Allocate the next object in the list. + object->next = ALLOC(parser->arena, JsonObject); + + // Follow the link. + object = object->next; + continue; + } + } + break; + } + EAT_WHITESPACE; + ENSURE_NOT_EMPTY; + if (*parser->input.ptr != '}') { + return ERROR_INVALID_CHAR; + } + EAT_CHAR; + return ERROR_OK; +} + +static int parse_json_array (JsonParser* parser, JsonArray* array) { + // Eat the leading '['. + EAT_CHAR; + + // Clear the value object. + memset((void*)array, 0, sizeof(JsonArray)); + + while(parser->input.len) { + EAT_WHITESPACE; + ENSURE_NOT_EMPTY; + if (*parser->input.ptr == ']') { + break; + } + + // Parse the value. + array->value = ALLOC(parser->arena, JsonValue); + int ec = parse_json_value(parser, array->value); + if (ec) { + return ec; + } + + // Parse the ',' if it exists. + EAT_WHITESPACE; + ENSURE_NOT_EMPTY; + if (*parser->input.ptr == ',') { + EAT_CHAR; + + // Allocate the next object in the list. + array->next = ALLOC(parser->arena, JsonArray); + + // Follow the link. + array = array->next; + continue; + } + } + + EAT_WHITESPACE; + ENSURE_NOT_EMPTY; + if (*parser->input.ptr != ']') { + return ERROR_INVALID_CHAR; + } + EAT_CHAR; + return ERROR_OK; +} + +static int parse_json_string (JsonParser* parser, String* string) { + // Consume the opening '"' + EAT_CHAR; + + // To start we assume the string can be a substring of the argument. + string->ptr = parser->input.ptr; + string->len = 0; + + // That assumption breaks if the string contains escape chars. + bool has_escape_chars = false; + while(parser->input.len) { + // Peek one character to see if we're at the end. + ENSURE_NOT_EMPTY; + if (*parser->input.ptr == '"') { + break; + } + + // If the string contains '\' then we have to deal with escape characters. + has_escape_chars |= (*parser->input.ptr == '\\'); + + // Consume the token. + EAT_CHAR; + + // Increment the length of the string. + string->len++; + } + + // Edge case: if the string contains a '\' then we need to allocate a string with the un- + // escaped characters. + if (has_escape_chars) { + // The new string will always be smaller than the original, so we can use its old + // length as an upper bound. + uint8_t* ptr = (uint8_t*)alloc(parser->arena, string->len, 1); + uint32_t len = 0; + + // Hold onto the last token. The initial value doesn't matter but it can't be '\'. + uint8_t prev_tok = ' '; + + // Create an iterator over the string chars. + uint8_t* itr = string->ptr; + uint8_t* end = itr + string->len; + + for(; itr != end; itr++) { + uint8_t tok = *itr; + + // If the last char was '\' then we're currently escaping. + if (prev_tok == '\\') { + switch (tok) { + case '"': + ptr[len++] = '"'; + break; + case '\\': + ptr[len++] = '\\'; + break; + case '/': + ptr[len++] = '/'; + break; + case 'b': + ptr[len++] = '\b'; + break; + case 'f': + ptr[len++] = '\f'; + break; + case 'n': + ptr[len++] = '\n'; + break; + case 'r': + ptr[len++] = '\r'; + break; + case 't': + ptr[len++] = '\t'; + break; + case 'u': + ABORT("utf code points unsupported"); + default: + return ERROR_INVALID_CHAR; + } + + // Clear the state. The value doesn't matter, but it can't be '\'. + prev_tok = ' '; + continue; + } + + // If we're not escaping, append the token. + if (tok != '\\') { + ptr[len++] = tok; + } + + // Update prev_tok. + prev_tok = tok; + } + + // Use the newly allocated string. + string->ptr = ptr; + string->len = len; + } + ENSURE_NOT_EMPTY; + if (*parser->input.ptr != '"') { + return ERROR_INVALID_CHAR; + } + EAT_CHAR; + return ERROR_OK; +} + +static bool is_digit (uint8_t tok) { + return tok >= 48 && tok <= 57; +} + +static int parse_json_number(JsonParser* parser, double* value) { + // Note to future Mike from Sep. 8 2025: we'll never actually need doubles + int sign = 1; + uint64_t base = 0; + uint64_t pow10 = 1; + + // Parse the sign if it exists. + if (*parser->input.ptr == '-') { + EAT_CHAR; + sign = -1; + } + + // Parse the digits. + while(parser->input.len) { + uint8_t tok = *parser->input.ptr; + if (tok >= 48 && tok <= 57) { + EAT_CHAR; + base *= 10; + base += (tok - 48); + if (base >= (1ul << 53ul)) { + ABORT("overflow"); + } + } else if (tok == '.' || tok == 'E' || tok == 'e') { + ABORT("only integers supported"); + } else if ( + tok == ' ' + || tok == '\n' + || tok == '\r' + || tok == '\t' + || tok == ',' + || tok == ']' + || tok == '}' + ) { + break; + } else { + return ERROR_INVALID_CHAR; + } + } + + // Compute the value. + *value = (double)sign * (double)base; + return ERROR_OK; +} + +static int parse_json_keyword(JsonParser* parser, JsonValue* value) { + String null_ = STRING_LITERAL("null"); + if (starts_with(parser->input, null_)) { + value->kind = JSON_NULL; + parser->input.ptr += null_.len; + parser->input.len -= null_.len; + return ERROR_OK; + } + String true_ = STRING_LITERAL("true"); + if (starts_with(parser->input, true_)) { + value->kind = JSON_BOOL; + value->value._bool = true; + parser->input.ptr += true_.len; + parser->input.len -= true_.len; + return ERROR_OK; + } + String false_ = STRING_LITERAL("false"); + if (starts_with(parser->input, false_)) { + value->kind = JSON_BOOL; + value->value._bool = false; + parser->input.ptr += false_.len; + parser->input.len -= false_.len; + return ERROR_OK; + } + return ERROR_INVALID_CHAR; +} +#undef ENSURE_NOT_EMPTY +#undef EAT_CHAR +#undef EAT_WHITESPACE + +static int print_json_value(JsonValue* value); +static int print_json_object(JsonObject* object); +static int print_json_array(JsonArray* array); +static int print_json_string(String* string); + +static int print_json_value(JsonValue* value) { + switch (value->kind) { + case JSON_NULL: + trace("null"); + break; + case JSON_BOOL: + value->value._bool ? trace("true") : trace("false"); + break; + case JSON_NUMBER: + trace("%ld", (uint64_t)value->value._number); + break; + case JSON_STRING: + print_json_string(&value->value._string); + break; + case JSON_ARRAY: + print_json_array(&value->value._array); + break; + case JSON_OBJECT: + print_json_object(&value->value._object); + break; + default: + break; + } +} + +static int print_json_object(JsonObject* object) { + trace("{"); + while (object) { + if (object->value) { + print_json_string(&object->key); + trace(":"); + print_json_value(object->value); + } + object = object->next; + if (object) { + trace(","); + } + } + trace("}"); +} + +static int print_json_array(JsonArray* array) { + trace("["); + while(array) { + if (array->value) { + print_json_value(array->value); + } + array = array->next; + if (array) { + trace(","); + } + } + trace("]"); +} + +static int print_json_string (String* string) { + trace("\""); + uint8_t* itr = string->ptr; + uint8_t* end = itr + string->len; + for(; itr != end; itr++) { + switch (*itr) { + case '\n': + trace("\\n"); + break; + case '\t': + trace("\\t"); + break; + case '\f': + trace("\\f"); + break; + case '\\': + trace("\\\\"); + break; + case '\r': + trace("\\r"); + break; + default: + trace("%c", (char)*itr); + break; + } + } + trace("\""); +} + +static JsonValue* json_get (JsonObject* object, const char* k) { + while (object) { + if (object->value && cstreq(object->key, k)) { + return object->value; + } + object = object->next; + } + return NULL; +} + +static uint64_t json_array_len (JsonArray* array) { + uint64_t len = 0; + while(array) { + if (!array->value) { + break; + } + array = array->next; + len++; + } + return len; +} diff --git a/packages/std/packages/stub/include/manifest.h b/packages/std/packages/stub/include/manifest.h new file mode 100644 index 00000000..f3adb2c4 --- /dev/null +++ b/packages/std/packages/stub/include/manifest.h @@ -0,0 +1,173 @@ +#pragma once +#include + +#include "arena.h" +#include "json.h" +#include "table.h" +#include "util.h" + +enum { + INTERPRETER_KIND_NORMAL, + INTERPRETER_KIND_LD_LINUX, + INTERPRETER_KIND_LD_MUSL +}; + +typedef struct { + uint64_t entrypoint; + String executable; + String interpreter; + uint64_t interpreter_kind; + size_t num_library_paths; + String* library_paths; + size_t num_preloads; + String* preloads; + size_t argc; + String* argv; + size_t interp_argc; + String* interp_argv; + String ld_library_path; + String ld_preload; + Table env; +} Manifest; + +typedef struct { + Arena* arena; + Manifest* manifest; + String artifacts_dir; +} Cx; + +#define ID_VERSION 0 +typedef struct { + uint8_t version; + uint8_t padding; + uint8_t kind; + uint8_t algorithm; + uint8_t body[]; +} Id; + +void parse_manifest (Arena* arena, Manifest* manifest, uint8_t* data, uint64_t len); +void create_manifest_from_json (Cx*, JsonValue* value); + +static void append_to_string ( + String* dst, + const String* src, + size_t capacity +) { + ABORT_IF(dst->len + src->len >= capacity, "out of capacity"); + memcpy(dst->ptr + dst->len, src->ptr, src->len); + dst->len += src->len; +} + +static void append_ch_to_string ( + String* dst, + char ch, + size_t capacity +) { + ABORT_IF(dst->len + 1 >= capacity, "out of capacity"); + dst->ptr[dst->len] = ch; + dst->len += 1; +} + +static String render_ld_library_path (Arena* arena, Manifest* manifest) { + String* itr = manifest->library_paths; + String* end = itr + manifest->num_library_paths; + String path = {0}; + + // Compute the size of the LD_LIBRARY_PATH env var + size_t len = 0; + for (; itr != end; itr++) { + if (itr != manifest->library_paths) { + len++; + } + len += itr->len; + } + + path.ptr = alloc(arena, len, 1); + path.len = len; + itr = manifest->library_paths; + + size_t offset = 0; + for (; itr != end; itr++) { + if (itr != manifest->library_paths) { + path.ptr[offset++] = ':'; + } + memcpy(path.ptr + offset, itr->ptr, itr->len); + offset += itr->len; + } + return path; +} + +static String render_ld_preload (Arena* arena, Manifest* manifest) { + String* itr = manifest->preloads; + String* end = itr + manifest->num_preloads; + String path = {0}; + + // Compute the size of the LD_LIBRARY_PATH env var + size_t len = 0; + for (; itr != end; itr++) { + if (itr != manifest->preloads) { + len++; + } + len += itr->len; + } + + path.ptr = alloc(arena, len, 1); + path.len = len; + itr = manifest->preloads; + + size_t offset = 0; + for (; itr != end; itr++) { + if (itr != manifest->preloads) { + path.ptr[offset++] = ':'; + } + memcpy(path.ptr + offset, itr->ptr, itr->len); + offset += itr->len; + } + return path; +} + +static void print_manifest (Manifest* manifest) { + if (manifest->executable.ptr) { + trace("executable: "); + print_json_string(&manifest->executable); + trace("\n"); + } + if (manifest->entrypoint) { + trace("entrypoint: %d\n", manifest->entrypoint); + } + trace("interpreter: %s\n", manifest->interpreter.ptr); + trace("libary_paths:\n"); + for(int i = 0; i < manifest->num_library_paths; i++) { + trace("\t"); + for (int n = 0; n < manifest->library_paths[i].len; n++) { + trace("%c", manifest->library_paths[i].ptr[n]); + } + trace("\n"); + } + trace("preloads:\n"); + for(int i = 0; i < manifest->num_preloads; i++) { + trace("\t"); + for (int n = 0; n < manifest->preloads[i].len; n++) { + trace("%c", manifest->preloads[i].ptr[n]); + } + trace("\n"); + } + trace("env:\n"); + for(Node* itr = manifest->env.list; itr != manifest->env.list + manifest->env.capacity; itr++) { + Node* node = itr; + while(node) { + if (node->key.ptr) { + trace("\t"); + for (int c = 0; c < node->key.len; c++) { + trace("%c", node->key.ptr[c]); + } + trace("="); + for (int c = 0; c < node->val.len; c++) { + trace("%c", node->val.ptr[c]); + } + trace("\n"); + } + node = node->next; + } + } +} diff --git a/packages/std/packages/stub/include/nanoprintf.h b/packages/std/packages/stub/include/nanoprintf.h new file mode 100644 index 00000000..64fda4ab --- /dev/null +++ b/packages/std/packages/stub/include/nanoprintf.h @@ -0,0 +1,1137 @@ +/* nanoprintf v0.5.5: a tiny embeddable printf replacement written in C. + https://github.com/charlesnicholson/nanoprintf + charles.nicholson+nanoprintf@gmail.com + dual-licensed under 0bsd and unlicense, take your pick. see eof for details. */ +#ifndef NANOPRINTF_H_INCLUDED +#define NANOPRINTF_H_INCLUDED + +#include +#include + +// Define this to fully sandbox nanoprintf inside of a translation unit. +#ifdef NANOPRINTF_VISIBILITY_STATIC + #define NPF_VISIBILITY static +#else + #define NPF_VISIBILITY extern +#endif + +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + #define NPF_PRINTF_ATTR(FORMAT_INDEX, VARGS_INDEX) \ + __attribute__((format(printf, FORMAT_INDEX, VARGS_INDEX))) +#else + #define NPF_PRINTF_ATTR(FORMAT_INDEX, VARGS_INDEX) +#endif + +// Public API + +#ifdef __cplusplus +extern "C" { +#endif + +// The npf_ functions all return the number of bytes required to express the +// fully-formatted string, not including the null terminator character. +// The npf_ functions do not return negative values, since the lack of 'l' length +// modifier support makes encoding errors impossible. + +NPF_VISIBILITY int npf_snprintf( + char *buffer, size_t bufsz, const char *format, ...) NPF_PRINTF_ATTR(3, 4); + +NPF_VISIBILITY int npf_vsnprintf( + char *buffer, size_t bufsz, char const *format, va_list vlist) NPF_PRINTF_ATTR(3, 0); + +typedef void (*npf_putc)(int c, void *ctx); +NPF_VISIBILITY int npf_pprintf( + npf_putc pc, void *pc_ctx, char const *format, ...) NPF_PRINTF_ATTR(3, 4); + +NPF_VISIBILITY int npf_vpprintf( + npf_putc pc, void *pc_ctx, char const *format, va_list vlist) NPF_PRINTF_ATTR(3, 0); + +#ifdef __cplusplus +} +#endif + +#endif // NANOPRINTF_H_INCLUDED + +/* The implementation of nanoprintf begins here, to be compiled only if + NANOPRINTF_IMPLEMENTATION is defined. In a multi-file library what follows would + be nanoprintf.c. */ + +#ifdef NANOPRINTF_IMPLEMENTATION + +#ifndef NANOPRINTF_IMPLEMENTATION_INCLUDED +#define NANOPRINTF_IMPLEMENTATION_INCLUDED + +#include +#include + +// The conversion buffer must fit at least UINT64_MAX in octal format with the leading '0'. +#ifndef NANOPRINTF_CONVERSION_BUFFER_SIZE + #define NANOPRINTF_CONVERSION_BUFFER_SIZE 23 +#endif +#if NANOPRINTF_CONVERSION_BUFFER_SIZE < 23 + #error The size of the conversion buffer must be at least 23 bytes. +#endif + +// Pick reasonable defaults if nothing's been configured. +#if !defined(NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS) && \ + !defined(NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS) + #define NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS 1 + #define NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS 1 + #define NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS 1 + #define NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS 0 + #define NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS 0 + #define NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS 0 +#endif + +// If anything's been configured, everything must be configured. +#ifndef NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif +#ifndef NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif +#ifndef NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif +#ifndef NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif +#ifndef NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif +#ifndef NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS + #error NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS must be #defined to 0 or 1 +#endif + +// Ensure flags are compatible. +#if (NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1) && \ + (NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 0) + #error Precision format specifiers must be enabled if float support is enabled. +#endif + +// intmax_t / uintmax_t require stdint from c99 / c++11 +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + #ifndef _MSC_VER + #ifdef __cplusplus + #if __cplusplus < 201103L + #error large format specifier support requires C++11 or later. + #endif + #else + #if __STDC_VERSION__ < 199409L + #error nanoprintf requires C99 or later. + #endif + #endif + #endif +#endif + +// Figure out if we can disable warnings with pragmas. +#ifdef __clang__ + #define NANOPRINTF_CLANG 1 + #define NANOPRINTF_GCC_PAST_4_6 0 +#else + #define NANOPRINTF_CLANG 0 + #if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))) + #define NANOPRINTF_GCC_PAST_4_6 1 + #else + #define NANOPRINTF_GCC_PAST_4_6 0 + #endif +#endif + +#if NANOPRINTF_CLANG || NANOPRINTF_GCC_PAST_4_6 + #define NANOPRINTF_HAVE_GCC_WARNING_PRAGMAS 1 +#else + #define NANOPRINTF_HAVE_GCC_WARNING_PRAGMAS 0 +#endif + +#if NANOPRINTF_HAVE_GCC_WARNING_PRAGMAS + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wunused-function" + #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" + #ifdef __cplusplus + #pragma GCC diagnostic ignored "-Wold-style-cast" + #endif + #pragma GCC diagnostic ignored "-Wpadded" + #pragma GCC diagnostic ignored "-Wfloat-equal" + #if NANOPRINTF_CLANG + #pragma GCC diagnostic ignored "-Wc++98-compat-pedantic" + #pragma GCC diagnostic ignored "-Wcovered-switch-default" + #pragma GCC diagnostic ignored "-Wdeclaration-after-statement" + #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant" + #ifndef __APPLE__ + #pragma GCC diagnostic ignored "-Wunsafe-buffer-usage" + #endif + #elif NANOPRINTF_GCC_PAST_4_6 + #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" + #endif +#endif + +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable:4619) // there is no warning number 'number' + // C4619 has to be disabled first! + #pragma warning(disable:4127) // conditional expression is constant + #pragma warning(disable:4505) // unreferenced local function has been removed + #pragma warning(disable:4514) // unreferenced inline function has been removed + #pragma warning(disable:4701) // potentially uninitialized local variable used + #pragma warning(disable:4706) // assignment within conditional expression + #pragma warning(disable:4710) // function not inlined + #pragma warning(disable:4711) // function selected for inline expansion + #pragma warning(disable:4820) // padding added after struct member + #pragma warning(disable:5039) // potentially throwing function passed to extern C function + #pragma warning(disable:5045) // compiler will insert Spectre mitigation for memory load + #pragma warning(disable:5262) // implicit switch fall-through + #pragma warning(disable:26812) // enum type is unscoped +#endif + +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + #define NPF_NOINLINE __attribute__((noinline)) +#elif defined(_MSC_VER) + #define NPF_NOINLINE __declspec(noinline) +#else + #define NPF_NOINLINE +#endif + +#if (NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1) || \ + (NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1) +enum { + NPF_FMT_SPEC_OPT_NONE, + NPF_FMT_SPEC_OPT_LITERAL, + NPF_FMT_SPEC_OPT_STAR, +}; +#endif + +enum { + NPF_FMT_SPEC_LEN_MOD_NONE, + NPF_FMT_SPEC_LEN_MOD_SHORT, // 'h' + NPF_FMT_SPEC_LEN_MOD_LONG_DOUBLE, // 'L' + NPF_FMT_SPEC_LEN_MOD_CHAR, // 'hh' + NPF_FMT_SPEC_LEN_MOD_LONG, // 'l' +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + NPF_FMT_SPEC_LEN_MOD_LARGE_LONG_LONG, // 'll' + NPF_FMT_SPEC_LEN_MOD_LARGE_INTMAX, // 'j' + NPF_FMT_SPEC_LEN_MOD_LARGE_SIZET, // 'z' + NPF_FMT_SPEC_LEN_MOD_LARGE_PTRDIFFT, // 't' +#endif +}; + +enum { + NPF_FMT_SPEC_CONV_NONE, + NPF_FMT_SPEC_CONV_PERCENT, // '%' + NPF_FMT_SPEC_CONV_CHAR, // 'c' + NPF_FMT_SPEC_CONV_STRING, // 's' + NPF_FMT_SPEC_CONV_SIGNED_INT, // 'i', 'd' +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + NPF_FMT_SPEC_CONV_BINARY, // 'b' +#endif + NPF_FMT_SPEC_CONV_OCTAL, // 'o' + NPF_FMT_SPEC_CONV_HEX_INT, // 'x', 'X' + NPF_FMT_SPEC_CONV_UNSIGNED_INT, // 'u' + NPF_FMT_SPEC_CONV_POINTER, // 'p' +#if NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS == 1 + NPF_FMT_SPEC_CONV_WRITEBACK, // 'n' +#endif +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + NPF_FMT_SPEC_CONV_FLOAT_DEC, // 'f', 'F' + NPF_FMT_SPEC_CONV_FLOAT_SCI, // 'e', 'E' + NPF_FMT_SPEC_CONV_FLOAT_SHORTEST, // 'g', 'G' + NPF_FMT_SPEC_CONV_FLOAT_HEX, // 'a', 'A' +#endif +}; + +typedef struct npf_format_spec { +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + int field_width; + uint8_t field_width_opt; + char left_justified; // '-' + char leading_zero_pad; // '0' +#endif +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + int prec; + uint8_t prec_opt; +#endif + char prepend; // ' ' or '+' + char alt_form; // '#' + char case_adjust; // 'a' - 'A' + uint8_t length_modifier; + uint8_t conv_spec; +} npf_format_spec_t; + +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 0 + typedef long npf_int_t; + typedef unsigned long npf_uint_t; +#else + typedef intmax_t npf_int_t; + typedef uintmax_t npf_uint_t; +#endif + +typedef struct npf_bufputc_ctx { + char *dst; + size_t len; + size_t cur; +} npf_bufputc_ctx_t; + +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + typedef char npf_size_is_ptrdiff[(sizeof(size_t) == sizeof(ptrdiff_t)) ? 1 : -1]; + typedef ptrdiff_t npf_ssize_t; +#endif + +#ifdef _MSC_VER + #include +#endif + +static int npf_max(int x, int y) { return (x > y) ? x : y; } + +static int npf_parse_format_spec(char const *format, npf_format_spec_t *out_spec) { + char const *cur = format; + +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + out_spec->left_justified = 0; + out_spec->leading_zero_pad = 0; +#endif + out_spec->case_adjust = 'a' - 'A'; // lowercase + out_spec->prepend = 0; + out_spec->alt_form = 0; + + while (*++cur) { // cur points at the leading '%' character + switch (*cur) { // Optional flags +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + case '-': out_spec->left_justified = '-'; out_spec->leading_zero_pad = 0; continue; + case '0': out_spec->leading_zero_pad = !out_spec->left_justified; continue; +#endif + case '+': out_spec->prepend = '+'; continue; + case ' ': if (out_spec->prepend == 0) { out_spec->prepend = ' '; } continue; + case '#': out_spec->alt_form = '#'; continue; + default: break; + } + break; + } + +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + out_spec->field_width = 0; + out_spec->field_width_opt = NPF_FMT_SPEC_OPT_NONE; + if (*cur == '*') { + out_spec->field_width_opt = NPF_FMT_SPEC_OPT_STAR; + ++cur; + } else { + while ((*cur >= '0') && (*cur <= '9')) { + out_spec->field_width_opt = NPF_FMT_SPEC_OPT_LITERAL; + out_spec->field_width = (out_spec->field_width * 10) + (*cur++ - '0'); + } + } +#endif + +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + out_spec->prec = 0; + out_spec->prec_opt = NPF_FMT_SPEC_OPT_NONE; + if (*cur == '.') { + ++cur; + if (*cur == '*') { + out_spec->prec_opt = NPF_FMT_SPEC_OPT_STAR; + ++cur; + } else { + if (*cur == '-') { + ++cur; + } else { + out_spec->prec_opt = NPF_FMT_SPEC_OPT_LITERAL; + } + while ((*cur >= '0') && (*cur <= '9')) { + out_spec->prec = (out_spec->prec * 10) + (*cur++ - '0'); + } + } + } +#endif + + uint_fast8_t tmp_conv = NPF_FMT_SPEC_CONV_NONE; + out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_NONE; + switch (*cur++) { // Length modifier + case 'h': + out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_SHORT; + if (*cur == 'h') { + out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_CHAR; + ++cur; + } + break; + case 'l': + out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LONG; +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + if (*cur == 'l') { + out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LARGE_LONG_LONG; + ++cur; + } +#endif + break; +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + case 'L': out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LONG_DOUBLE; break; +#endif +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + case 'j': out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LARGE_INTMAX; break; + case 'z': out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LARGE_SIZET; break; + case 't': out_spec->length_modifier = NPF_FMT_SPEC_LEN_MOD_LARGE_PTRDIFFT; break; +#endif + default: --cur; break; + } + + switch (*cur++) { // Conversion specifier + case '%': out_spec->conv_spec = NPF_FMT_SPEC_CONV_PERCENT; +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + out_spec->prec_opt = NPF_FMT_SPEC_OPT_NONE; +#endif + break; + + case 'c': out_spec->conv_spec = NPF_FMT_SPEC_CONV_CHAR; +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + out_spec->prec_opt = NPF_FMT_SPEC_OPT_NONE; +#endif + break; + + case 's': out_spec->conv_spec = NPF_FMT_SPEC_CONV_STRING; +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + out_spec->leading_zero_pad = 0; +#endif + break; + + case 'i': + case 'd': tmp_conv = NPF_FMT_SPEC_CONV_SIGNED_INT; + case 'o': + if (tmp_conv == NPF_FMT_SPEC_CONV_NONE) { tmp_conv = NPF_FMT_SPEC_CONV_OCTAL; } + case 'u': + if (tmp_conv == NPF_FMT_SPEC_CONV_NONE) { tmp_conv = NPF_FMT_SPEC_CONV_UNSIGNED_INT; } + case 'X': + if (tmp_conv == NPF_FMT_SPEC_CONV_NONE) { out_spec->case_adjust = 0; } + case 'x': + if (tmp_conv == NPF_FMT_SPEC_CONV_NONE) { tmp_conv = NPF_FMT_SPEC_CONV_HEX_INT; } + out_spec->conv_spec = (uint8_t)tmp_conv; +#if (NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1) && \ + (NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1) + if (out_spec->prec_opt != NPF_FMT_SPEC_OPT_NONE) { out_spec->leading_zero_pad = 0; } +#endif + break; + +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + case 'F': out_spec->case_adjust = 0; + case 'f': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_FLOAT_DEC; + if (out_spec->prec_opt == NPF_FMT_SPEC_OPT_NONE) { out_spec->prec = 6; } + break; + + case 'E': out_spec->case_adjust = 0; + case 'e': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_FLOAT_SCI; + if (out_spec->prec_opt == NPF_FMT_SPEC_OPT_NONE) { out_spec->prec = 6; } + break; + + case 'G': out_spec->case_adjust = 0; + case 'g': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_FLOAT_SHORTEST; + if (out_spec->prec_opt == NPF_FMT_SPEC_OPT_NONE) { out_spec->prec = 6; } + break; + + case 'A': out_spec->case_adjust = 0; + case 'a': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_FLOAT_HEX; + if (out_spec->prec_opt == NPF_FMT_SPEC_OPT_NONE) { out_spec->prec = 6; } + break; +#endif + +#if NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS == 1 + case 'n': + // todo: reject string if flags or width or precision exist + out_spec->conv_spec = NPF_FMT_SPEC_CONV_WRITEBACK; +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + out_spec->prec_opt = NPF_FMT_SPEC_OPT_NONE; +#endif + break; +#endif + + case 'p': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_POINTER; +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + out_spec->prec_opt = NPF_FMT_SPEC_OPT_NONE; +#endif + break; + +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + case 'B': + out_spec->case_adjust = 0; + case 'b': + out_spec->conv_spec = NPF_FMT_SPEC_CONV_BINARY; + break; +#endif + + default: return 0; + } + + return (int)(cur - format); +} + +static NPF_NOINLINE int npf_utoa_rev( + npf_uint_t val, char *buf, uint_fast8_t base, char case_adj) { + uint_fast8_t n = 0; + do { + int_fast8_t const d = (int_fast8_t)(val % base); + *buf++ = (char)(((d < 10) ? '0' : ('A' - 10 + case_adj)) + d); + ++n; + val /= base; + } while (val); + return (int)n; +} + +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + +#include + +#if (DBL_MANT_DIG <= 11) && (DBL_MAX_EXP <= 16) + typedef uint_fast16_t npf_double_bin_t; + typedef int_fast8_t npf_ftoa_exp_t; +#elif (DBL_MANT_DIG <= 24) && (DBL_MAX_EXP <= 128) + typedef uint_fast32_t npf_double_bin_t; + typedef int_fast8_t npf_ftoa_exp_t; +#elif (DBL_MANT_DIG <= 53) && (DBL_MAX_EXP <= 1024) + typedef uint_fast64_t npf_double_bin_t; + typedef int_fast16_t npf_ftoa_exp_t; +#else + #error Unsupported width of the double type. +#endif + +// The floating point conversion code works with an unsigned integer type of any size. +#ifndef NANOPRINTF_CONVERSION_FLOAT_TYPE + #define NANOPRINTF_CONVERSION_FLOAT_TYPE unsigned int +#endif +typedef NANOPRINTF_CONVERSION_FLOAT_TYPE npf_ftoa_man_t; + +#if (NANOPRINTF_CONVERSION_BUFFER_SIZE <= UINT_FAST8_MAX) && (UINT_FAST8_MAX <= INT_MAX) + typedef uint_fast8_t npf_ftoa_dec_t; +#else + typedef int npf_ftoa_dec_t; +#endif + +enum { + NPF_DOUBLE_EXP_MASK = DBL_MAX_EXP * 2 - 1, + NPF_DOUBLE_EXP_BIAS = DBL_MAX_EXP - 1, + NPF_DOUBLE_MAN_BITS = DBL_MANT_DIG - 1, + NPF_DOUBLE_BIN_BITS = sizeof(npf_double_bin_t) * CHAR_BIT, + NPF_FTOA_MAN_BITS = sizeof(npf_ftoa_man_t) * CHAR_BIT, + NPF_FTOA_SHIFT_BITS = + ((NPF_FTOA_MAN_BITS < DBL_MANT_DIG) ? NPF_FTOA_MAN_BITS : DBL_MANT_DIG) - 1 +}; + +/* Generally, floating-point conversion implementations use + grisu2 (https://bit.ly/2JgMggX) and ryu (https://bit.ly/2RLXSg0) algorithms, + which are mathematically exact and fast, but require large lookup tables. + + This implementation was inspired by Wojciech Muła's (zdjęcia@garnek.pl) + algorithm (http://0x80.pl/notesen/2015-12-29-float-to-string.html) and + extended further by adding dynamic scaling and configurable integer width by + Oskars Rubenis (https://github.com/Okarss). */ + +static int npf_ftoa_rev(char *buf, npf_format_spec_t const *spec, double f) { + char const *ret = NULL; + npf_double_bin_t bin; { // Union-cast is UB pre-C11, compiler optimizes byte-copy loop. + char const *src = (char const *)&f; + char *dst = (char *)&bin; + for (uint_fast8_t i = 0; i < sizeof(f); ++i) { dst[i] = src[i]; } + } + + // Unsigned -> signed int casting is IB and can raise a signal but generally doesn't. + npf_ftoa_exp_t exp = + (npf_ftoa_exp_t)((npf_ftoa_exp_t)(bin >> NPF_DOUBLE_MAN_BITS) & NPF_DOUBLE_EXP_MASK); + + bin &= ((npf_double_bin_t)0x1 << NPF_DOUBLE_MAN_BITS) - 1; + if (exp == (npf_ftoa_exp_t)NPF_DOUBLE_EXP_MASK) { // special value + ret = (bin) ? "NAN" : "FNI"; + goto exit; + } + if (spec->prec > (NANOPRINTF_CONVERSION_BUFFER_SIZE - 2)) { goto exit; } + if (exp) { // normal number + bin |= (npf_double_bin_t)0x1 << NPF_DOUBLE_MAN_BITS; + } else { // subnormal number + ++exp; + } + exp = (npf_ftoa_exp_t)(exp - NPF_DOUBLE_EXP_BIAS); + + uint_fast8_t carry; carry = 0; + npf_ftoa_dec_t end, dec; dec = (npf_ftoa_dec_t)spec->prec; + if (dec || spec->alt_form) { + buf[dec++] = '.'; + } + + { // Integer part + npf_ftoa_man_t man_i; + + if (exp >= 0) { + int_fast8_t shift_i = + (int_fast8_t)((exp > NPF_FTOA_SHIFT_BITS) ? (int)NPF_FTOA_SHIFT_BITS : exp); + npf_ftoa_exp_t exp_i = (npf_ftoa_exp_t)(exp - shift_i); + shift_i = (int_fast8_t)(NPF_DOUBLE_MAN_BITS - shift_i); + man_i = (npf_ftoa_man_t)(bin >> shift_i); + + if (exp_i) { + if (shift_i) { + carry = (bin >> (shift_i - 1)) & 0x1; + } + exp = NPF_DOUBLE_MAN_BITS; // invalidate the fraction part + } + + // Scale the exponent from base-2 to base-10. + for (; exp_i; --exp_i) { + if (!(man_i & ((npf_ftoa_man_t)0x1 << (NPF_FTOA_MAN_BITS - 1)))) { + man_i = (npf_ftoa_man_t)(man_i << 1); + man_i = (npf_ftoa_man_t)(man_i | carry); carry = 0; + } else { + if (dec >= NANOPRINTF_CONVERSION_BUFFER_SIZE) { goto exit; } + buf[dec++] = '0'; + carry = (((uint_fast8_t)(man_i % 5) + carry) > 2); + man_i /= 5; + } + } + } else { + man_i = 0; + } + end = dec; + + do { // Print the integer + if (end >= NANOPRINTF_CONVERSION_BUFFER_SIZE) { goto exit; } + buf[end++] = (char)('0' + (char)(man_i % 10)); + man_i /= 10; + } while (man_i); + } + + { // Fraction part + npf_ftoa_man_t man_f; + npf_ftoa_dec_t dec_f = (npf_ftoa_dec_t)spec->prec; + + if (exp < NPF_DOUBLE_MAN_BITS) { + int_fast8_t shift_f = (int_fast8_t)((exp < 0) ? -1 : exp); + npf_ftoa_exp_t exp_f = (npf_ftoa_exp_t)(exp - shift_f); + npf_double_bin_t bin_f = + bin << ((NPF_DOUBLE_BIN_BITS - NPF_DOUBLE_MAN_BITS) + shift_f); + + // This if-else statement can be completely optimized at compile time. + if (NPF_DOUBLE_BIN_BITS > NPF_FTOA_MAN_BITS) { + man_f = (npf_ftoa_man_t)(bin_f >> ((unsigned)(NPF_DOUBLE_BIN_BITS - + NPF_FTOA_MAN_BITS) % + NPF_DOUBLE_BIN_BITS)); + carry = (uint_fast8_t)((bin_f >> ((unsigned)(NPF_DOUBLE_BIN_BITS - + NPF_FTOA_MAN_BITS - 1) % + NPF_DOUBLE_BIN_BITS)) & 0x1); + } else { + man_f = (npf_ftoa_man_t)((npf_ftoa_man_t)bin_f + << ((unsigned)(NPF_FTOA_MAN_BITS - + NPF_DOUBLE_BIN_BITS) % NPF_FTOA_MAN_BITS)); + carry = 0; + } + + // Scale the exponent from base-2 to base-10 and prepare the first digit. + for (uint_fast8_t digit = 0; dec_f && (exp_f < 4); ++exp_f) { + if ((man_f > ((npf_ftoa_man_t)-4 / 5)) || digit) { + carry = (uint_fast8_t)(man_f & 0x1); + man_f = (npf_ftoa_man_t)(man_f >> 1); + } else { + man_f = (npf_ftoa_man_t)(man_f * 5); + if (carry) { man_f = (npf_ftoa_man_t)(man_f + 3); carry = 0; } + if (exp_f < 0) { + buf[--dec_f] = '0'; + } else { + ++digit; + } + } + } + man_f = (npf_ftoa_man_t)(man_f + carry); + carry = (exp_f >= 0); + dec = 0; + } else { + man_f = 0; + } + + if (dec_f) { + // Print the fraction + for (;;) { + buf[--dec_f] = (char)('0' + (char)(man_f >> (NPF_FTOA_MAN_BITS - 4))); + man_f = (npf_ftoa_man_t)(man_f & ~((npf_ftoa_man_t)0xF << (NPF_FTOA_MAN_BITS - 4))); + if (!dec_f) { break; } + man_f = (npf_ftoa_man_t)(man_f * 10); + } + man_f = (npf_ftoa_man_t)(man_f << 4); + } + if (exp < NPF_DOUBLE_MAN_BITS) { + carry &= (uint_fast8_t)(man_f >> (NPF_FTOA_MAN_BITS - 1)); + } + } + + // Round the number + for (; carry; ++dec) { + if (dec >= NANOPRINTF_CONVERSION_BUFFER_SIZE) { goto exit; } + if (dec >= end) { buf[end++] = '0'; } + if (buf[dec] == '.') { continue; } + carry = (buf[dec] == '9'); + buf[dec] = (char)(carry ? '0' : (buf[dec] + 1)); + } + + return (int)end; +exit: + if (!ret) { ret = "RRE"; } + uint_fast8_t i; + for (i = 0; ret[i]; ++i) { buf[i] = (char)(ret[i] + spec->case_adjust); } + return (int)i; +} + +#endif // NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS + +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 +static int npf_bin_len(npf_uint_t u) { + // Return the length of the binary string format of 'u', preferring intrinsics. + if (!u) { return 1; } + +#ifdef _MSC_VER // Win64, use _BSR64 for everything. If x86, use _BSR when non-large. + #ifdef _M_X64 + #define NPF_HAVE_BUILTIN_CLZ + #define NPF_CLZ _BitScanReverse64 + #elif NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 0 + #define NPF_HAVE_BUILTIN_CLZ + #define NPF_CLZ _BitScanReverse + #endif + #ifdef NPF_HAVE_BUILTIN_CLZ + unsigned long idx; + NPF_CLZ(&idx, u); + return (int)(idx + 1); + #endif +#elif NANOPRINTF_CLANG || NANOPRINTF_GCC_PAST_4_6 + #define NPF_HAVE_BUILTIN_CLZ + #if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + #define NPF_CLZ(X) ((sizeof(long long) * CHAR_BIT) - (size_t)__builtin_clzll(X)) + #else + #define NPF_CLZ(X) ((sizeof(long) * CHAR_BIT) - (size_t)__builtin_clzl(X)) + #endif + return (int)NPF_CLZ(u); +#endif + +#ifndef NPF_HAVE_BUILTIN_CLZ + int n; + for (n = 0; u; ++n, u >>= 1); // slow but small software fallback + return n; +#else + #undef NPF_HAVE_BUILTIN_CLZ + #undef NPF_CLZ +#endif +} +#endif + +static void npf_bufputc(int c, void *ctx) { + npf_bufputc_ctx_t *bpc = (npf_bufputc_ctx_t *)ctx; + if (bpc->cur < bpc->len) { bpc->dst[bpc->cur++] = (char)c; } +} + +static void npf_bufputc_nop(int c, void *ctx) { (void)c; (void)ctx; } + +typedef struct npf_cnt_putc_ctx { + npf_putc pc; + void *ctx; + int n; +} npf_cnt_putc_ctx_t; + +static void npf_putc_cnt(int c, void *ctx) { + npf_cnt_putc_ctx_t *pc_cnt = (npf_cnt_putc_ctx_t *)ctx; + ++pc_cnt->n; + __putc(c, NULL); +} + +#define NPF_PUTC(VAL) do { npf_putc_cnt((int)(VAL), &pc_cnt); } while (0) + +#define NPF_EXTRACT(MOD, CAST_TO, EXTRACT_AS) \ + case NPF_FMT_SPEC_LEN_MOD_##MOD: val = (CAST_TO)va_arg(args, EXTRACT_AS); break + +#define NPF_WRITEBACK(MOD, TYPE) \ + case NPF_FMT_SPEC_LEN_MOD_##MOD: *(va_arg(args, TYPE *)) = (TYPE)pc_cnt.n; break + +int npf_vpprintf(npf_putc pc, void *pc_ctx, char const *format, va_list args) { + npf_format_spec_t fs; + char const *cur = format; + npf_cnt_putc_ctx_t pc_cnt; + pc_cnt.pc = pc; + pc_cnt.ctx = pc_ctx; + pc_cnt.n = 0; + while (*cur) { + int const fs_len = (*cur != '%') ? 0 : npf_parse_format_spec(cur, &fs); + if (!fs_len) { NPF_PUTC(*cur++); continue; } + cur += fs_len; + + // Extract star-args immediately +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + if (fs.field_width_opt == NPF_FMT_SPEC_OPT_STAR) { + fs.field_width = va_arg(args, int); + if (fs.field_width < 0) { + fs.field_width = -fs.field_width; + fs.left_justified = 1; + } + } +#endif +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + if (fs.prec_opt == NPF_FMT_SPEC_OPT_STAR) { + fs.prec = va_arg(args, int); + if (fs.prec < 0) { fs.prec_opt = NPF_FMT_SPEC_OPT_NONE; } + } +#endif + + union { char cbuf_mem[NANOPRINTF_CONVERSION_BUFFER_SIZE]; npf_uint_t binval; } u; + char *cbuf = u.cbuf_mem, sign_c = 0; + int cbuf_len = 0, need_0x = 0; +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + int field_pad = 0; + char pad_c = 0; +#endif +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + int prec_pad = 0; +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + int zero = 0; +#endif +#endif + // Extract and convert the argument to string, point cbuf at the text. + switch (fs.conv_spec) { + case NPF_FMT_SPEC_CONV_PERCENT: + *cbuf = '%'; + cbuf_len = 1; + break; + + case NPF_FMT_SPEC_CONV_CHAR: + *cbuf = (char)va_arg(args, int); + cbuf_len = 1; + break; + + case NPF_FMT_SPEC_CONV_STRING: { + cbuf = va_arg(args, char *); +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + for (char const *s = cbuf; + ((fs.prec_opt == NPF_FMT_SPEC_OPT_NONE) || (cbuf_len < fs.prec)) && cbuf && *s; + ++s, ++cbuf_len); +#else + for (char const *s = cbuf; cbuf && *s; ++s, ++cbuf_len); // strlen +#endif + } break; + + case NPF_FMT_SPEC_CONV_SIGNED_INT: { + npf_int_t val = 0; + switch (fs.length_modifier) { + NPF_EXTRACT(NONE, int, int); + NPF_EXTRACT(SHORT, short, int); + NPF_EXTRACT(LONG_DOUBLE, int, int); + NPF_EXTRACT(CHAR, signed char, int); + NPF_EXTRACT(LONG, long, long); +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + NPF_EXTRACT(LARGE_LONG_LONG, long long, long long); + NPF_EXTRACT(LARGE_INTMAX, intmax_t, intmax_t); + NPF_EXTRACT(LARGE_SIZET, npf_ssize_t, npf_ssize_t); + NPF_EXTRACT(LARGE_PTRDIFFT, ptrdiff_t, ptrdiff_t); +#endif + default: break; + } + + sign_c = (val < 0) ? '-' : fs.prepend; + +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + zero = !val; +#endif + // special case, if prec and value are 0, skip + if (!val && (fs.prec_opt != NPF_FMT_SPEC_OPT_NONE) && !fs.prec) { + cbuf_len = 0; + } else +#endif + { + npf_uint_t uval = (npf_uint_t)val; + if (val < 0) { uval = 0 - uval; } + cbuf_len = npf_utoa_rev(uval, cbuf, 10, fs.case_adjust); + } + } break; + +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + case NPF_FMT_SPEC_CONV_BINARY: +#endif + case NPF_FMT_SPEC_CONV_OCTAL: + case NPF_FMT_SPEC_CONV_HEX_INT: + case NPF_FMT_SPEC_CONV_UNSIGNED_INT: { + npf_uint_t val = 0; + + switch (fs.length_modifier) { + NPF_EXTRACT(NONE, unsigned, unsigned); + NPF_EXTRACT(SHORT, unsigned short, unsigned); + NPF_EXTRACT(LONG_DOUBLE, unsigned, unsigned); + NPF_EXTRACT(CHAR, unsigned char, unsigned); + NPF_EXTRACT(LONG, unsigned long, unsigned long); +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + NPF_EXTRACT(LARGE_LONG_LONG, unsigned long long, unsigned long long); + NPF_EXTRACT(LARGE_INTMAX, uintmax_t, uintmax_t); + NPF_EXTRACT(LARGE_SIZET, size_t, size_t); + NPF_EXTRACT(LARGE_PTRDIFFT, size_t, size_t); +#endif + default: break; + } + +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + zero = !val; +#endif + if (!val && (fs.prec_opt != NPF_FMT_SPEC_OPT_NONE) && !fs.prec) { + // Zero value and explicitly-requested zero precision means "print nothing". + if ((fs.conv_spec == NPF_FMT_SPEC_CONV_OCTAL) && fs.alt_form) { + fs.prec = 1; // octal special case, print a single '0' + } + } else +#endif +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + if (fs.conv_spec == NPF_FMT_SPEC_CONV_BINARY) { + cbuf_len = npf_bin_len(val); u.binval = val; + } else +#endif + { + uint_fast8_t const base = (fs.conv_spec == NPF_FMT_SPEC_CONV_OCTAL) ? + 8u : ((fs.conv_spec == NPF_FMT_SPEC_CONV_HEX_INT) ? 16u : 10u); + cbuf_len = npf_utoa_rev(val, cbuf, base, fs.case_adjust); + } + + if (val && fs.alt_form && (fs.conv_spec == NPF_FMT_SPEC_CONV_OCTAL)) { + cbuf[cbuf_len++] = '0'; // OK to add leading octal '0' immediately. + } + + if (val && fs.alt_form) { // 0x or 0b but can't write it yet. + if (fs.conv_spec == NPF_FMT_SPEC_CONV_HEX_INT) { need_0x = 'X'; } +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + else if (fs.conv_spec == NPF_FMT_SPEC_CONV_BINARY) { need_0x = 'B'; } +#endif + if (need_0x) { need_0x += fs.case_adjust; } + } + } break; + + case NPF_FMT_SPEC_CONV_POINTER: { + cbuf_len = + npf_utoa_rev((npf_uint_t)(uintptr_t)va_arg(args, void *), cbuf, 16, 'a' - 'A'); + need_0x = 'x'; + } break; + +#if NANOPRINTF_USE_WRITEBACK_FORMAT_SPECIFIERS == 1 + case NPF_FMT_SPEC_CONV_WRITEBACK: + switch (fs.length_modifier) { + NPF_WRITEBACK(NONE, int); + NPF_WRITEBACK(SHORT, short); + NPF_WRITEBACK(LONG, long); + NPF_WRITEBACK(LONG_DOUBLE, double); + NPF_WRITEBACK(CHAR, signed char); +#if NANOPRINTF_USE_LARGE_FORMAT_SPECIFIERS == 1 + NPF_WRITEBACK(LARGE_LONG_LONG, long long); + NPF_WRITEBACK(LARGE_INTMAX, intmax_t); + NPF_WRITEBACK(LARGE_SIZET, size_t); + NPF_WRITEBACK(LARGE_PTRDIFFT, ptrdiff_t); +#endif + default: break; + } break; +#endif + +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + case NPF_FMT_SPEC_CONV_FLOAT_DEC: + case NPF_FMT_SPEC_CONV_FLOAT_SCI: + case NPF_FMT_SPEC_CONV_FLOAT_SHORTEST: + case NPF_FMT_SPEC_CONV_FLOAT_HEX: { + double val; + if (fs.length_modifier == NPF_FMT_SPEC_LEN_MOD_LONG_DOUBLE) { + val = (double)va_arg(args, long double); + } else { + val = va_arg(args, double); + } + + sign_c = (val < 0.) ? '-' : fs.prepend; +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + zero = (val == 0.); +#endif + cbuf_len = npf_ftoa_rev(cbuf, &fs, val); + } break; +#endif + default: break; + } + +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + // Compute the field width pad character + if (fs.field_width_opt != NPF_FMT_SPEC_OPT_NONE) { + if (fs.leading_zero_pad) { // '0' flag is only legal with numeric types + if ((fs.conv_spec != NPF_FMT_SPEC_CONV_STRING) && + (fs.conv_spec != NPF_FMT_SPEC_CONV_CHAR) && + (fs.conv_spec != NPF_FMT_SPEC_CONV_PERCENT)) { +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + if ((fs.prec_opt != NPF_FMT_SPEC_OPT_NONE) && !fs.prec && zero) { + pad_c = ' '; + } else +#endif + { pad_c = '0'; } + } + } else { pad_c = ' '; } + } +#endif + + // Compute the number of bytes to truncate or '0'-pad. +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + if (fs.conv_spec != NPF_FMT_SPEC_CONV_STRING) { +#if NANOPRINTF_USE_FLOAT_FORMAT_SPECIFIERS == 1 + // float precision is after the decimal point + if ((fs.conv_spec != NPF_FMT_SPEC_CONV_FLOAT_DEC) && + (fs.conv_spec != NPF_FMT_SPEC_CONV_FLOAT_SCI) && + (fs.conv_spec != NPF_FMT_SPEC_CONV_FLOAT_SHORTEST) && + (fs.conv_spec != NPF_FMT_SPEC_CONV_FLOAT_HEX)) +#endif + { prec_pad = npf_max(0, fs.prec - cbuf_len); } + } +#endif + +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + // Given the full converted length, how many pad bytes? + field_pad = fs.field_width - cbuf_len - !!sign_c; + if (need_0x) { field_pad -= 2; } +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + field_pad -= prec_pad; +#endif + field_pad = npf_max(0, field_pad); + + // Apply right-justified field width if requested + if (!fs.left_justified && pad_c) { // If leading zeros pad, sign goes first. + if (pad_c == '0') { + if (sign_c) { NPF_PUTC(sign_c); sign_c = 0; } + // Pad byte is '0', write '0x' before '0' pad chars. + if (need_0x) { NPF_PUTC('0'); NPF_PUTC(need_0x); } + } + while (field_pad-- > 0) { NPF_PUTC(pad_c); } + // Pad byte is ' ', write '0x' after ' ' pad chars but before number. + if ((pad_c != '0') && need_0x) { NPF_PUTC('0'); NPF_PUTC(need_0x); } + } else +#endif + { if (need_0x) { NPF_PUTC('0'); NPF_PUTC(need_0x); } } // no pad, '0x' requested. + + // Write the converted payload + if (fs.conv_spec == NPF_FMT_SPEC_CONV_STRING) { + for (int i = 0; cbuf && (i < cbuf_len); ++i) { NPF_PUTC(cbuf[i]); } + } else { + if (sign_c) { NPF_PUTC(sign_c); } +#if NANOPRINTF_USE_PRECISION_FORMAT_SPECIFIERS == 1 + while (prec_pad-- > 0) { NPF_PUTC('0'); } // int precision leads. +#endif +#if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1 + if (fs.conv_spec == NPF_FMT_SPEC_CONV_BINARY) { + while (cbuf_len) { NPF_PUTC('0' + ((u.binval >> --cbuf_len) & 1)); } + } else +#endif + { while (cbuf_len-- > 0) { NPF_PUTC(cbuf[cbuf_len]); } } // payload is reversed + } + +#if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1 + if (fs.left_justified && pad_c) { // Apply left-justified field width + while (field_pad-- > 0) { NPF_PUTC(pad_c); } + } +#endif + } + + return pc_cnt.n; +} + +#undef NPF_PUTC +#undef NPF_EXTRACT +#undef NPF_WRITEBACK + +int npf_pprintf(npf_putc pc, void *pc_ctx, char const *format, ...) { + va_list val; + va_start(val, format); + int const rv = npf_vpprintf(pc, pc_ctx, format, val); + va_end(val); + return rv; +} + +int npf_snprintf(char *buffer, size_t bufsz, const char *format, ...) { + va_list val; + va_start(val, format); + int const rv = npf_vsnprintf(buffer, bufsz, format, val); + va_end(val); + return rv; +} + +int npf_vsnprintf(char *buffer, size_t bufsz, char const *format, va_list vlist) { + npf_bufputc_ctx_t bufputc_ctx; + bufputc_ctx.dst = buffer; + bufputc_ctx.len = bufsz; + bufputc_ctx.cur = 0; + + npf_putc const pc = buffer ? npf_bufputc : npf_bufputc_nop; + int const n = npf_vpprintf(pc, &bufputc_ctx, format, vlist); + pc('\0', &bufputc_ctx); + + if (buffer && bufsz) { +#ifdef NANOPRINTF_SNPRINTF_SAFE_EMPTY_STRING_ON_OVERFLOW + if (n >= (int)bufsz) { buffer[0] = '\0'; } +#else + buffer[bufsz - 1] = '\0'; +#endif + } + + return n; +} + +#if NANOPRINTF_HAVE_GCC_WARNING_PRAGMAS + #pragma GCC diagnostic pop +#endif + +#ifdef _MSC_VER + #pragma warning(pop) +#endif + +#endif // NANOPRINTF_IMPLEMENTATION_INCLUDED +#endif // NANOPRINTF_IMPLEMENTATION + +/* + nanoprintf is dual-licensed under both the "Unlicense" and the + "Zero-Clause BSD" (0BSD) licenses. The intent of this dual-licensing + structure is to make nanoprintf as consumable as possible in as many + environments / countries / companies as possible without any + encumberances. + + The text of the two licenses follows below: + + ============================== UNLICENSE ============================== + + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to + + ================================ 0BSD ================================= + + Copyright (C) 2019- by Charles Nicholson + + Permission to use, copy, modify, and/or distribute this software for + any purpose with or without fee is hereby granted. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ diff --git a/packages/std/packages/stub/include/syscall.h b/packages/std/packages/stub/include/syscall.h new file mode 100644 index 00000000..24d659b6 --- /dev/null +++ b/packages/std/packages/stub/include/syscall.h @@ -0,0 +1,153 @@ +#pragma once +#include +#include + +#if defined __aarch64__ +#include "aarch64/syscall.h" +#endif + +#if defined __x86_64__ +#include "x86_64/syscall.h" +#endif + +// open constants +#define O_RDONLY 00 +#define O_WRONLY 01 +#define O_RDWR 02 +#define O_CREAT 0100 + +// mmap constants +#define PROT_READ 0x1 +#define PROT_WRITE 0x2 +#define PROT_EXEC 0x4 +#define MAP_SHARED 0x01 +#define MAP_PRIVATE 0x02 +#define MAP_ANONYMOUS 0x20 +#define MAP_FIXED 0x10 +#define MAP_GROWSDOWN 0x00100 +#define MAP_FIXED_NOREPLACE 0x100000 +#define MAP_FAILED (void*)-1 + +// rlimit constants +#define RLIMIT_STACK 3 + +// getrandom constants +#define GRND_NONBLOCK 0x01 + +// lseek constants +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 + +// stdio fds +#define STDOUT_FILENO 1 +#define STDERR_FILENO 2 + +typedef long off_t; +typedef unsigned long __rlim_t; +typedef struct { + __rlim_t soft; + __rlim_t hard; +} rlimit_t; +typedef struct { + uint8_t buf[256]; +} stat_t; + +static inline long write (int fd, const void *buf, size_t count) { + return syscall3(__NR_write, (long)fd, (long)buf, (long)count); +} + +static inline int open (const char* path, int flags, int mode) { + #if defined __x86_64__ + return (int)syscall3(__NR_open, (long)path, (long)flags, (long)mode); + #endif + + // aarch64 has no "open" syscall. We have to use openat. + #if defined __aarch64__ + return (int)syscall4(__NR_openat, -1, (long)path, (long)flags, (long)mode); + #endif +} + +static inline int close (int fd) { + return (int)syscall1(__NR_close, (long)fd); +} + +static inline int stat (const char* pathname, stat_t* statbuf) { + #if defined __x86_64__ + return (int)syscall2(__NR_stat, (long)pathname, (long)statbuf); + #endif + + // aarch64 has no "stat" syscapp. We have to use fstat. + #if defined __aarch64__ + int fd = open(pathname, O_RDONLY, 0); + if (fd < 0) { + return fd; + } + int status = syscall2(__NR_fstat, fd, (long)statbuf); + close(fd); + return status; + #endif +} + +static inline long lseek (int fd, off_t offset, int whence) { + return syscall3(__NR_lseek, (long)fd, (long)offset, (long)whence); +} + +static inline void* mmap( + void* addr, + uint64_t length, + uint64_t prot, + uint64_t flags, + int64_t fd, + uint64_t offset +) { + return (void*)syscall6( + __NR_mmap, + (long)addr, + (long)length, + (long)prot, + (long)flags, + (long)fd, + (long)offset + ); +} + +static inline int munmap(void* addr, uint64_t len) { + return (int)syscall2(__NR_munmap, (long)addr, (long)len); +} + +static inline int pread64 (int fd, void* buf, size_t count, off_t offset) { + return (int)syscall4(__NR_pread64, (long)fd, (long)buf, (long)count, (long)offset); +} + +static inline int execve (char* pathname, char** argv, char** envp) { + return (int)syscall3(__NR_execve, (long)pathname, (long)argv, (long)envp); +} + +static inline void exit (int status) { + syscall1(__NR_exit, (long)status); + __builtin_unreachable(); +} + +static inline int getcwd(char* buf, size_t size) { + return (int)syscall2(__NR_getcwd, (long)buf, (long)size); +} + +static inline long readlink (const char* pathname, char* buf, size_t bufsiz) { + #if defined __x86_64__ + return syscall3(__NR_readlink, (long)pathname, (long)buf, (long)bufsiz); + #endif + + // aarch64 has no "readlink" syscall. We have to use readlinkat. + #if defined __aarch64__ + return syscall4(__NR_readlinkat, -1, (long)pathname, (long)buf, (long)bufsiz); + #endif +} + +static inline int getrlimit (int resource, rlimit_t* rlim) { + return (int)syscall2(__NR_getrlimit, (long)resource, (long)rlim); +} + +static inline long getrandom (void *buf, size_t buflen, unsigned int flags) { + return (long)syscall3(__NR_getrandom, (long)buf, (long)buflen, (long)flags); +} diff --git a/packages/std/packages/stub/include/table.h b/packages/std/packages/stub/include/table.h new file mode 100644 index 00000000..c5b59bd1 --- /dev/null +++ b/packages/std/packages/stub/include/table.h @@ -0,0 +1,191 @@ +// Extremely simple string:string hash table. +#pragma once + +// Common includes. +#include +#include +#include + +// Internals. +#include "arena.h" +#include "util.h" + +typedef struct Node Node; +struct Node { + String key; + String val; + Node* next; +}; + +/// @brief Extremely simple hash table. +typedef struct +{ + Node* list; + size_t capacity; + size_t size; +} Table; + +static uint64_t fnv1a (String string) { + uint64_t hash = 0xcbf29ce484222325; + const uint8_t* itr = string.ptr; + const uint8_t* end = string.ptr + string.len; + for(; itr != end; itr++) { + hash = hash ^ (uint64_t)*itr; + hash = hash * 0x100000001b3; + } + return hash; +} + +static size_t nextpow2 (size_t n) { + if (n == 0) { + return 1; + } else if ((n & (n - 1)) == 0) { + return n; + } else { + return (size_t)(1U << (32 - __builtin_clz((uint32_t)n))); + } +} + +static int create_table ( + Arena* arena, + Table* table, + size_t capacity +) { + capacity = nextpow2(capacity); + table->list = (Node*)alloc(arena, capacity * sizeof(Node), _Alignof(Node)); + if (!table->list) { + return -1; + } + table->capacity = capacity; + memset(table->list, 0, capacity * sizeof(Node)); + return 0; +} + +static int insert ( + Arena* arena, + Table* table, + String key, + String val +) { + // Compute the hash. + uint64_t hash = fnv1a(key); + + // Search for the key in the table. + uint64_t index = hash % table->capacity; + Node* node = table->list + index; + + for(;;) { + // If this is an empty node, use it. + if (node->key.ptr == 0) { + node->key.ptr = key.ptr; + node->key.len = key.len; + node->val.ptr = val.ptr; + node->val.len = val.len; + table->size++; + return 0; + } + + // If this has the same key, overwrite its value. + if (streq(node->key, key)) { + node->val.ptr = val.ptr; + node->val.len = val.len; + return 0; + } + + if (node->next) { + node = node->next; + } else { + break; + } + } + + // Allocate a new node. + Node* new_node = ALLOC(arena, Node); + new_node->key.ptr = key.ptr; + new_node->key.len = key.len; + new_node->val.ptr = val.ptr; + new_node->val.len = val.len; + new_node->next = NULL; + node->next = new_node; + table->size++; + return 0; +} + +static void remove ( + Table* table, + String key +) { + uint64_t hash = fnv1a(key); + Node* node = table->list + hash % table->capacity; + while(node) { + if (streq(node->key, key)) { + node->key.ptr = NULL; + node->key.len = 0; + node->val.ptr = NULL; + node->val.len = 0; + table->size--; + return; + } + node = node->next; + } +} + +static String lookup ( + Table* table, + String key +) { + uint64_t hash = fnv1a(key); + Node* node = table->list + hash % table->capacity; + while(node) { + if (streq(node->key, key)) { + return node->val; + } + node = node->next; + } + String empty = {0}; + return empty; +} + +static String clookup ( + Table* table, + const char* key +) { + String key_ = STRING_LITERAL(key); + return lookup(table, key_); +} + +static void clear ( + Table* table +) { + Node* itr = table->list; + Node* end = itr + table->capacity; + for(; itr != end; itr++) { + Node* node = itr; + while(node) { + node->key.ptr = NULL; + node->key.len = 0; + node = node->next; + } + } +} + +static void print_table (Table* table) { + Node* itr = table->list; + Node* end = itr + table->capacity; + for(; itr != end; itr++) { + Node* node = itr; + while(node) { + if (node->key.ptr) { + for (int i = 0; i < node->key.len; i++) { + trace("%c", node->key.ptr[i]); + } + trace(" : "); + for (int i = 0; i < node->val.len; i++) { + trace("%c", node->val.ptr[i]); + } + trace("\n"); + } + node = node->next; + } + } +} \ No newline at end of file diff --git a/packages/std/packages/stub/include/util.h b/packages/std/packages/stub/include/util.h new file mode 100644 index 00000000..b5505d6b --- /dev/null +++ b/packages/std/packages/stub/include/util.h @@ -0,0 +1,197 @@ +#pragma once + +// Common includes. +#include +#include +#include +#include +#include + +// Internals. +#include "syscall.h" + +#ifdef __aarch64__ +#include "aarch64/util.h" +#endif + +#ifdef __x86_64__ +#include "x86_64/util.h" +#endif + +// Common string type. +typedef struct { + uint8_t* ptr; + uint64_t len; +} String; + +void* memcpy (void* dst, const void* src, size_t len); +void* memset (void* dst, int c, size_t n); + +// Get the length of a string including the null byte. +static size_t strlen_including_nul (const char* str) { + size_t len = 0; + for(; str[len]; len++) {} + len += 1; + return len; +} + +static size_t strlen (const char* s) { + size_t n = 0; + for (; s[n]; n++) {} + return n; +} + +#define STRING_LITERAL(s) { .ptr = (uint8_t*)s, .len = strlen(s) } + +typedef struct PathComponent PathComponent; +struct PathComponent { + int type; + String contents; +}; + +static String parent_dir (String path) { + // Edge case: root directory. + for(int i = 0; i < 2; i++) { + // Hack off slashes. + for (; path.len > 1; path.len--) { + if (path.ptr[path.len - 1] == '/') { + continue; + } + break; + } + + // If this is the first pass, remove the trailing component. + if (i == 0) { + // Edge case: root directory. + if (path.len == 1 && path.ptr[0] == '/') { + path.ptr = NULL; + path.len = 0; + return path; + } + + for(; path.len > 0; path.len--) { + if (path.ptr[path.len - 1] == '/') { + break; + } + } + } + } + return path; +} + + +static bool streq (String a, String b) { + if (a.len != b.len) { + return false; + } + for (size_t n = 0; n < a.len; n++) { + if (a.ptr[n] != b.ptr[n]) { + return false; + } + } + return true; +} + +static bool cstreq (String s, const char* cstr) { + for (int i = 0; i < s.len; i++) { + if (s.ptr[i] != cstr[i]) { + return false; + } + } + return !cstr[s.len]; +} + +static char* cstr (Arena *arena, String s) { + if (s.ptr[s.len] == 0) { + return s.ptr; + } + char* c = ALLOC_N(arena, s.len + 1, char); + memcpy((void*)c, s.ptr, s.len); + return c; +} + +static bool starts_with (String a, String prefix) { + if (a.len < prefix.len) { + return false; + } + for (size_t n = 0; n < prefix.len; n++) { + if (a.ptr[n] != prefix.ptr[n]) { + return false; + } + } + return true; +} + +static bool cstarts_with (String a, const char* prefix) { + for (size_t n = 0; n < a.len; n++) { + if (!prefix[n]) { + return false; + } + if (a.ptr[n] != prefix[n]) { + return false; + } + } + return true; +} + +static String join (Arena* arena, String separator, String* strings, size_t nstrings) { + // Compute the max length of the string. + size_t len = 0; + for (size_t n = 0; n < nstrings; n++) { + len += strings[n].len; + if (n != (nstrings - 1)) { + len += separator.len; + } + } + + // Allocate the new string. + String out = {0}; + out.ptr = ALLOC_N(arena, len + 1, uint8_t); + + // Append new strings to it. + for (size_t n = 0; n < nstrings; n++) { + if (strings[n].ptr) { + memcpy(out.ptr + out.len, strings[n].ptr, strings[n].len); + out.len += strings[n].len; + if (separator.ptr && n != (nstrings - 1)) { + memcpy(out.ptr + out.len, separator.ptr, separator.len); + out.len += separator.len; + } + } + } + + return out; +} + +static void reverse (String* s) { + int i = 0; + int j = s->len - 1; + while (i < j) { + char buf = s->ptr[i]; + s->ptr[i] = s->ptr[j]; + s->ptr[j] = buf; + i++; + j--; + } +} + +static void double_to_string (Arena* arena, double d, String* s) { + s->ptr = ALLOC_N(arena, 64, uint8_t); + char sign = d >= 0 ? 0 : '-'; + + double mag = d >= 0 ? d : -d; + uint64_t whole = (uint64_t)mag; + double frac = d - (double)whole; + ABORT_IF(frac != 0, "only integer numbers are supported"); + + do { + "012345689"[whole % 10]; + whole /= 10; + } while (whole != 0); + + if (sign) { + s->ptr[s->len++] = sign; + } + + reverse(s); +} diff --git a/packages/std/packages/stub/include/x86_64/debug.h b/packages/std/packages/stub/include/x86_64/debug.h new file mode 100644 index 00000000..b87e5a07 --- /dev/null +++ b/packages/std/packages/stub/include/x86_64/debug.h @@ -0,0 +1,6 @@ +#pragma once +#ifdef BREAKPOINTS + #define BREAK do { asm volatile ("int3"); } while (0) +#else + #define BREAK +#endif diff --git a/packages/std/packages/stub/include/x86_64/syscall.h b/packages/std/packages/stub/include/x86_64/syscall.h new file mode 100644 index 00000000..8e657f6c --- /dev/null +++ b/packages/std/packages/stub/include/x86_64/syscall.h @@ -0,0 +1,142 @@ +#pragma once + +#define __NR_write 1 +#define __NR_open 2 +#define __NR_close 3 +#define __NR_stat 4 +#define __NR_lseek 8 +#define __NR_mmap 9 +#define __NR_munmap 11 +#define __NR_pread64 17 +#define __NR_execve 59 +#define __NR_exit 60 +#define __NR_getcwd 79 +#define __NR_readlink 89 +#define __NR_getrlimit 97 +#define __NR_getrandom 318 + +static inline long syscall1 ( + long nr, + long arg1 +) { + long ret; + register long rax asm("rax") = nr; + register long rdi asm("rdi") = arg1; + asm volatile ( + "syscall" + : "=a"(ret) + : "a"(rax), "D"(rdi) + : "rcx", "r11", "memory" + ); + return ret; +} + +static inline long syscall2 ( + long nr, + long arg1, + long arg2 +) { + long ret; + register long rax asm("rax") = nr; + register long rdi asm("rdi") = arg1; + register long rsi asm("rsi") = arg2; + asm volatile ( + "syscall" + : "=a"(ret) + : "a"(rax), "D"(rdi), "S"(rsi) + : "rcx", "r11", "memory" + ); + return ret; +} + +static inline long syscall3 ( + long nr, + long arg1, + long arg2, + long arg3 +) { + long ret; + register long rax asm("rax") = nr; + register long rdi asm("rdi") = arg1; + register long rsi asm("rsi") = arg2; + register long rdx asm("rdx") = arg3; + asm volatile ( + "syscall" + : "=a"(ret) + : "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx) + : "rcx", "r11", "memory" + ); + return ret; +} + +static inline long syscall4 ( + long nr, + long arg1, + long arg2, + long arg3, + long arg4 +) { + long ret; + register long rax asm("rax") = nr; + register long rdi asm("rdi") = arg1; + register long rsi asm("rsi") = arg2; + register long rdx asm("rdx") = arg3; + register long r10 asm("r10") = arg4; + asm volatile ( + "syscall" + : "=a"(ret) + : "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx), "r"(r10) + : "rcx", "r11", "memory" + ); + return ret; +} + +static inline long syscall5 ( + long nr, + long arg1, + long arg2, + long arg3, + long arg4, + long arg5 +) { + long ret; + register long rax asm("rax") = nr; + register long rdi asm("rdi") = arg1; + register long rsi asm("rsi") = arg2; + register long rdx asm("rdx") = arg3; + register long r10 asm("r10") = arg4; + register long r8 asm("r8") = arg5; + asm volatile ( + "syscall" + : "=a"(ret) + : "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx), "r"(r10), "r"(r8) + : "rcx", "r11", "memory" + ); + return ret; +} + +static inline long syscall6 ( + long nr, + long arg1, + long arg2, + long arg3, + long arg4, + long arg5, + long arg6 +) { + long ret; + register long rax asm("rax") = nr; + register long rdi asm("rdi") = arg1; + register long rsi asm("rsi") = arg2; + register long rdx asm("rdx") = arg3; + register long r10 asm("r10") = arg4; + register long r8 asm("r8") = arg5; + register long r9 asm("r9") = arg6; + asm volatile ( + "syscall" + : "=a"(ret) + : "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx), "r"(r10), "r"(r8), "r"(r9) + : "rcx", "r11", "memory" + ); + return ret; +} diff --git a/packages/std/packages/stub/include/x86_64/util.h b/packages/std/packages/stub/include/x86_64/util.h new file mode 100644 index 00000000..0a7a15ff --- /dev/null +++ b/packages/std/packages/stub/include/x86_64/util.h @@ -0,0 +1,14 @@ +#pragma once +#include + +__attribute__((naked)) +static void jump_to_entrypoint (void* stack, void* entrypoint) { + asm volatile ( + "mov %rdi, %rsp;" // set the stack pointer. + "xor %rax, %rax;" // clear the return value. + "xor %rbp, %rbp;" // clear the frame pointer. + "mov $0, %rdx;" // clear rdx because we have no cleanup code. + "jmp *%rsi;" // jump to the entrypoint. + ); + __builtin_unreachable(); +} diff --git a/packages/std/packages/stub/link.ld b/packages/std/packages/stub/link.ld new file mode 100644 index 00000000..3921c670 --- /dev/null +++ b/packages/std/packages/stub/link.ld @@ -0,0 +1,10 @@ +SECTIONS +{ + . = 0x0; + .text : { + *(.text.start) + *(.text) + } + .data : { *(.data) } + .bss : { *(.bss) } +} \ No newline at end of file diff --git a/packages/std/packages/stub/src/aarch64/start.s b/packages/std/packages/stub/src/aarch64/start.s new file mode 100644 index 00000000..fd4d7bc1 --- /dev/null +++ b/packages/std/packages/stub/src/aarch64/start.s @@ -0,0 +1,7 @@ +.section .text.start,"ax",@progbits +.global _start +.type _start, @function +_start: + mov x29, xzr + mov x0, sp + bl main diff --git a/packages/std/packages/stub/src/manifest.c b/packages/std/packages/stub/src/manifest.c new file mode 100644 index 00000000..d6748f80 --- /dev/null +++ b/packages/std/packages/stub/src/manifest.c @@ -0,0 +1,107 @@ +#include +#include "arena.h" +#include "manifest.h" +#include "json.h" +#include "util.h" + +#define ARTIFACTS_DIR "/.tangram/artifacts" +#define ARTIFACTS_DIR_LEN 19 +#define PATH_MAX 4096 + +static void find_artifacts_dir (Arena* arena, String* path) { + stat_t statbuf; + + // First check the root. + if (stat(ARTIFACTS_DIR, &statbuf) == 0) { + path->ptr = ARTIFACTS_DIR; + path->len = ARTIFACTS_DIR_LEN; + return; + } + + // Get cwd. + path->ptr = alloc(arena, PATH_MAX, 1); + ABORT_IF(getcwd(path->ptr, PATH_MAX - ARTIFACTS_DIR_LEN) - 1 <= 0, "failed to get the cwd"); + path->len = strlen(path->ptr); + + // Walk the parent directory tree. + do { + path->ptr[path->len] = 0; + memcpy(path->ptr + path->len, ARTIFACTS_DIR, ARTIFACTS_DIR_LEN + 1); + if (stat(path->ptr, &statbuf) == 0) { + path->len += ARTIFACTS_DIR_LEN; + break; + } + *path = parent_dir(*path); + } while (path->len > 0); + ABORT_IF(!path->ptr, "failed to find artifacts directory"); +} + +void parse_manifest ( + Arena* arena, + Manifest* manifest, + uint8_t* data, + uint64_t len +) { + // Sanity check. + ABORT_IF(len == 0, "expected a non-zero length"); + + // Find the artifacts directory. + String artifacts_dir; + find_artifacts_dir(arena, &artifacts_dir); + + // Create the context. + Cx cx = { + .arena = arena, + .manifest = manifest, + .artifacts_dir = artifacts_dir + }; + + // Parse json. + JsonParser parser = { + .arena = arena, + .input = { + .ptr = data, + .len = len + }, + }; + JsonValue value; + ABORT_IF(parse_json_value(&parser, &value), "failed to parse manifest JSON"); + create_manifest_from_json(&cx, &value); + + String true_ = STRING_LITERAL("true"); + String clear_ld_library_path = STRING_LITERAL("TANGRAM_CLEAR_LD_LIBRARY_PATH"); + String clear_ld_preload = STRING_LITERAL("TANGRAM_CLEAR_LD_PRELOAD"); + String restore_ld_library_path = STRING_LITERAL("TANGRAM_RESTORE_LD_LIBRARY_PATH"); + String restore_ld_preload = STRING_LITERAL("TANGRAM_RESTORE_LD_PRELOAD"); + + // Render paths. + manifest-> ld_library_path = render_ld_library_path(arena, manifest); + if (manifest->ld_library_path.ptr) { + String key = STRING_LITERAL("LD_LIBRARY_PATH"); + String val = lookup(&manifest->env, key); + if (val.ptr) { + String ss[2] = { val, manifest->ld_library_path }; + String s = STRING_LITERAL(":"); + manifest->ld_library_path = join(arena, s, ss, 2); + insert(arena, &manifest->env, restore_ld_library_path, val); + } else { + insert(arena, &manifest->env, clear_ld_library_path, true_); + } + insert(arena, &manifest->env, key, manifest->ld_library_path); + } + manifest->ld_preload = render_ld_preload(arena, manifest); + if (manifest->ld_preload.ptr) { + String key = STRING_LITERAL("LD_PRELOAD"); + String val = lookup(&manifest->env, key); + if (val.ptr) { + String ss[2] = { val, manifest->ld_preload }; + String s = STRING_LITERAL(":"); + manifest->ld_preload = join(arena, s, ss, 2); + insert(arena, &manifest->env, restore_ld_preload, val); + } else { + } + insert(arena, &manifest->env, clear_ld_preload, true_); + insert(arena, &manifest->env, key, manifest->ld_preload); + } +} +#undef PATH_MAX diff --git a/packages/std/packages/stub/src/manifest/json.c b/packages/std/packages/stub/src/manifest/json.c new file mode 100644 index 00000000..324c3095 --- /dev/null +++ b/packages/std/packages/stub/src/manifest/json.c @@ -0,0 +1,532 @@ +#include "json.h" +#include "manifest.h" + +// Forward decls. +static void create_interpreter (Cx* cx, JsonValue* interpeter); +static void create_executable (Cx* cx, JsonValue* executable); +static void create_env (Cx* cx, JsonValue* env); +static void create_args (Cx* cx, JsonValue* args); +static void create_preloads (Cx* cx, JsonValue* value); +static void create_interp_args (Cx* cx, JsonValue* value); +static void create_loader_paths (Cx* cx, JsonValue* value); +static void apply_env(Cx* cx, JsonObject* map); +static void apply_mutation_to_key (Cx* cx, String* key, JsonObject* mutation); +static void apply_value_to_key (Cx* cx, String* key, JsonValue* val); +static void render_template (Cx* cx, JsonValue* template, String* dst); +static void render_template_to_temp (Cx* cx, JsonValue* template); +static String render_value (Cx* cx, JsonValue* value); +static String render_object (Cx* cx, JsonValue* value); + +void create_manifest_from_json (Cx* cx, JsonValue* value) { + // Validate. + ABORT_IF(value->kind != JSON_OBJECT, "expected an object (1)"); + + // Parse fields. + JsonObject* object = &value->value._object; + while(object) { + if (object->value) { + if (cstreq(object->key, "interpreter")) { + create_interpreter(cx, object->value); + } else if (cstreq(object->key, "executable")) { + create_executable(cx, object->value); + } else if (cstreq(object->key, "env")) { + create_env(cx, object->value); + } else if (cstreq(object->key, "args")) { + create_args(cx, object->value); + } + } + object = object->next; + } +} + +static void create_interpreter (Cx* cx, JsonValue* value) { + ABORT_IF(value->kind != JSON_OBJECT, "expected an object (2), got %d", value->kind); + JsonObject* object = &value->value._object; + JsonValue* kind = json_get(object, "kind"); + ABORT_IF(!kind, "expected a kind string"); + ABORT_IF(kind->kind != JSON_STRING, "expected a string"); + if (cstreq(kind->value._string, "normal")) { + cx->manifest->interpreter_kind = INTERPRETER_KIND_NORMAL; + } else if (cstreq(kind->value._string, "ld-linux")) { + cx->manifest->interpreter_kind = INTERPRETER_KIND_LD_LINUX; + } else if (cstreq(kind->value._string, "ld-musl")) { + cx->manifest->interpreter_kind = INTERPRETER_KIND_LD_MUSL; + } else if (cstreq(kind->value._string, "dyld")) { + ABORT("dyld interpreter is unsupported in this context"); + } else { + char* s = cstr(cx->arena, kind->value._string); + ABORT("unknown interpreter kind %s", s); + } + JsonValue* path = json_get(object, "path"); + JsonValue* library_paths = json_get(object, "libraryPaths"); + JsonValue* preloads = json_get(object, "preloads"); + JsonValue* args = json_get(object, "args"); + ABORT_IF(!path, "expected an interpreter path"); + render_template(cx, path, &cx->manifest->interpreter); + create_loader_paths(cx, library_paths); + create_preloads(cx, preloads); + create_interp_args(cx, args); +} + +static void create_loader_paths (Cx* cx, JsonValue* value) { + // Type check. + if (!value) { return; } + ABORT_IF(value->kind != JSON_ARRAY, "expected an array"); + JsonArray* array = &value->value._array; + + // Count entries. + uint64_t len = json_array_len(array); + + // Reset. + array = &value->value._array; + + // Allocate space for the paths. + cx->manifest->library_paths = ALLOC_N(cx->arena, len, String); cx->manifest->num_library_paths = len; + + // Render each template. + for(size_t n = 0; n < len; n++) { + JsonValue* template = array->value; + render_template(cx, template, &cx->manifest->library_paths[n]); + array = array->next; + } +} + +static void create_preloads (Cx* cx, JsonValue* value) { + // Type check. + if (!value) { return; } + ABORT_IF(value->kind != JSON_ARRAY, "expected an array"); + JsonArray* array = &value->value._array; + + // Count entries. + uint64_t len = json_array_len(array); + + // Allocate space for the paths. + cx->manifest->preloads = ALLOC_N(cx->arena, len, String); + cx->manifest->num_preloads = len; + + // Render each template. + for(size_t n = 0; n < len; n++) { + JsonValue* template = array->value; + render_template(cx, template, &cx->manifest->preloads[n]); + array = array->next; + } +} + +static void create_interp_args (Cx* cx, JsonValue* value) { + if (!value) { + return; + } + ABORT_IF(value->kind != JSON_ARRAY, "expected an array"); + JsonArray* array = &value->value._array; + uint64_t len = json_array_len(array); + cx->manifest->interp_argc = len; + cx->manifest->interp_argv = ALLOC_N(cx->arena, len, String); + for(int n = 0; n < len; n++) { + JsonValue* itr = array->value; + String* arg = cx->manifest->interp_argv + n; + render_template(cx, itr, arg); + array = array->next; + } +} + +static void create_executable (Cx* cx, JsonValue* value) { + ABORT_IF(value->kind != JSON_OBJECT, "expected an object (3)"); + JsonObject* object = &value->value._object; + JsonValue* kind = json_get(object, "kind"); + ABORT_IF(!kind, "missing kind"); + ABORT_IF(kind->kind != JSON_STRING, "expected a string"); + + if (cstreq(kind->value._string, "path")) { + value = json_get(object, "value"); + render_template(cx, value, &cx->manifest->executable); + } else if (cstreq(kind->value._string, "content")) { + value = json_get(object, "value"); + render_template_to_temp(cx, value); + } else if (cstreq(kind->value._string, "address")) { + value = json_get(object, "value"); + ABORT_IF(value->kind != JSON_NUMBER, "expected a number"); + cx->manifest->entrypoint = (uint64_t)value->value._number; + } +} + +static void create_env (Cx* cx, JsonValue* value) { + if (!value) { + return; + } + ABORT_IF(value->kind != JSON_OBJECT, "expected an object (4)"); + JsonObject* object = &value->value._object; + JsonValue* kind = json_get(object, "kind"); + ABORT_IF(!kind, "missing kind"); + ABORT_IF(kind->kind != JSON_STRING, "expected a string"); + + if (cstreq(kind->value._string, "unset")) { + clear(&cx->manifest->env); + } else if (cstreq(kind->value._string, "set")) { + // Extract the inner object. + value = json_get(object, "value"); + ABORT_IF(!value, "expected a value"); + ABORT_IF(value->kind != JSON_OBJECT, "expected an object (5)"); + object = &value->value._object; + + // Get the inner kind. + JsonValue* kind = json_get(object, "kind"); + ABORT_IF(!kind || kind->kind != JSON_STRING, "missing kind (1)"); + ABORT_IF(!cstreq(kind->value._string, "map"), "expected a map (1)"); + + // Get the inner object. + value = json_get(object, "value"); + ABORT_IF(value->kind != JSON_OBJECT, "expected an object (6)"); + object = &value->value._object; + apply_env(cx, &value->value._object); + } else { + ABORT("unsupported mutation type"); + } +} + +static void create_args (Cx* cx, JsonValue* value) { + if (!value) { + return; + } + ABORT_IF(value->kind != JSON_ARRAY, "expected an array"); + JsonArray* array = &value->value._array; + uint64_t len = json_array_len(array); + cx->manifest->argc = (int)len; + cx->manifest->argv = ALLOC_N(cx->arena, len, String); + for(int n = 0; n < len; n++) { + JsonValue* itr = array->value; + String* arg = cx->manifest->argv + n; + render_template(cx, itr, arg); + array = array->next; + } +} + +static bool is_mutation (JsonValue* value) { + if (value->kind != JSON_OBJECT) { + return false; + } + JsonObject* object = &value->value._object; + JsonValue* kind = json_get(object, "kind"); + if (!kind) { + return false; + } + if (kind->kind != JSON_STRING) { + return false; + } + return cstreq(kind->value._string, "unset") + || cstreq(kind->value._string, "set") + || cstreq(kind->value._string, "set-if-unset") + || cstreq(kind->value._string, "prepend") + || cstreq(kind->value._string, "append") + || cstreq(kind->value._string, "prefix") + || cstreq(kind->value._string, "suffix") + || cstreq(kind->value._string, "merge"); +} + +static bool is_template (JsonValue* value) { + if (value->kind != JSON_OBJECT) { + return false; + } + JsonObject* object = &value->value._object; + return json_get(object, "components") != NULL; +} + +static void apply_env (Cx* cx, JsonObject* env) { + while(env) { + if (env->value) { + String* key = &env->key; + if (env->value->kind == JSON_ARRAY) { + JsonArray* array = &env->value->value._array; + while(array) { + if (array->value) { + ABORT_IF(array->value->kind != JSON_OBJECT, "expected an object (7)"); + apply_mutation_to_key(cx, key, &array->value->value._object); + } + array = array->next; + } + } else { + apply_value_to_key(cx, key, env->value); + } + } + env = env->next; + } +} + +static void apply_mutation_to_key (Cx* cx, String* key, JsonObject* mutation) { + JsonValue* kind = json_get(mutation, "kind"); + ABORT_IF(!kind, "missing kind"); + ABORT_IF(kind->kind != JSON_STRING, "expected a string"); + if (cstreq(kind->value._string, "unset")) { + remove(&cx->manifest->env, *key); + } else if (cstreq(kind->value._string, "set")) { + JsonValue* value = json_get(mutation, "value"); + apply_value_to_key(cx, key, value); + } else if (cstreq(kind->value._string, "set_if_unset")) { + if (!lookup(&cx->manifest->env, *key).ptr) { + JsonValue* value = json_get(mutation, "value"); + apply_value_to_key(cx, key, value); + } + } else if (cstreq(kind->value._string, "prepend")) { + JsonValue* values = json_get(mutation, "values"); + ABORT_IF(values->kind != JSON_ARRAY, "expected an array"); + JsonArray* array = &values->value._array; + size_t len = json_array_len(array); + String* ss = ALLOC_N(cx->arena, len + 1, String); + ss[0] = lookup(&cx->manifest->env, *key); + for (size_t n = 0; n < len; n++) { + JsonValue* s = array[n].value; + ABORT_IF(s->kind != JSON_STRING, "expected a string"); + ss[n + 1] = s->value._string; + } + String s = STRING_LITERAL(":"); + insert(cx->arena, &cx->manifest->env, *key, join(cx->arena, s, ss, len + 1)); + } else if (cstreq(kind->value._string, "append")) { + String existing = lookup(&cx->manifest->env, *key); + JsonValue* values = json_get(mutation, "values"); + ABORT_IF(values->kind != JSON_ARRAY, "expected an array"); + JsonArray* array = &values->value._array; + size_t len = json_array_len(array); + String* ss = ALLOC_N(cx->arena, len + 1, String); + + for (size_t n = 0; n < len; n++) { + JsonValue* s = array[n].value; + ABORT_IF(s->kind != JSON_STRING, "expected a string"); + ss[n] = s->value._string; + } + + ss[len] = lookup(&cx->manifest->env, *key); + if (ss[len].ptr) { + len++; + } + + String s = STRING_LITERAL(":"); + insert(cx->arena, &cx->manifest->env, *key, join(cx->arena, s, ss, len)); + } else if (cstreq(kind->value._string, "prefix")) { + // Lookup the existing value. + String a = lookup(&cx->manifest->env, *key); + + // Destructure the value. + JsonValue* template = json_get(mutation, "template"); + JsonValue* separator = json_get(mutation, "separator"); + + // Destructure the object. + ABORT_IF(!template, "expected a template"); + String b = {0}; + render_template(cx, template, &b); + + // Don't join if the value doesn't exist. + if (!a.ptr) { + insert(cx->arena, &cx->manifest->env, *key, b); + return; + } + + // Get the separator if it exists. + String s = {0}; + if (separator) { + ABORT_IF(separator->kind != JSON_STRING, "expected a string"); + s = separator->value._string; + } + + // Update the env. + String ss[2] = { b, a }; + insert(cx->arena, &cx->manifest->env, *key, join(cx->arena, s, ss, 2)); + } else if (cstreq(kind->value._string, "suffix")) { + // Lookup the existing value. + String a = lookup(&cx->manifest->env, *key); + + // Destructure the object. + JsonValue* template = json_get(mutation, "template"); + JsonValue* separator = json_get(mutation, "separator"); + + // Render the template. + ABORT_IF(!template, "expected a template"); + String b = {0}; + render_template(cx, template, &b); + + // Don't join if the value doesn't exist. + if (!a.ptr) { + insert(cx->arena, &cx->manifest->env, *key, b); + return; + } + + // Get the separator if it exists. + String s = {0}; + if (separator) { + ABORT_IF(separator->kind != JSON_STRING, "expected a string"); + s = separator->value._string; + } + + // Update the env. + String ss[2] = { a, b }; + insert(cx->arena, &cx->manifest->env, *key, join(cx->arena, s, ss, 2)); + } else if (cstreq(kind->value._string, "merge")) { + ABORT("merge mutations are not supported for environment variables"); + } else { + ABORT(" unsupported mutation type (%s)", cstr(cx->arena, kind->value._string)); + } +} + +static void apply_value_to_key (Cx* cx, String* key, JsonValue* val) { + // Handle mutations. + if(val->kind == JSON_OBJECT) { + JsonValue* kind = json_get(&val->value._object, "kind"); + if (kind && kind->kind == JSON_STRING && cstreq(kind->value._string, "mutation")) { + val = json_get(&val->value._object, "value"); + ABORT_IF(!val || val->kind != JSON_OBJECT, "expected an object (8)"); + apply_mutation_to_key(cx, key, &val->value._object); + return; + } + } + + // Otherwise render the value and insert it. + String rendered = render_value(cx, val); + insert(cx->arena, &cx->manifest->env, *key, rendered); +} + +static void render_template (Cx* cx, JsonValue* template, String* rendered) { + // Type check. + ABORT_IF(template->kind != JSON_OBJECT, "expected an object (9)") + + // Get the components. + JsonValue* components = json_get(&template->value._object, "components"); + ABORT_IF(!components, "expected components"); + ABORT_IF(components->kind != JSON_ARRAY, "expected an array"); + + // Render components. + size_t capacity = 2048; + rendered->ptr = (uint8_t*)alloc(cx->arena, capacity, 1); + rendered->len = 0; + + JsonArray* array = &components->value._array; + while (array) { + if (array->value) { + ABORT_IF(array->value->kind != JSON_OBJECT, "expected an object (10)"); + JsonObject* object = &array->value->value._object; + JsonValue* kind = json_get(object, "kind"); + JsonValue* value = json_get(object, "value"); + ABORT_IF(!kind, "missing kind"); + ABORT_IF(!value, "missing value"); + ABORT_IF(kind->kind != JSON_STRING, "expected a string"); + ABORT_IF(value->kind != JSON_STRING, "expected a string"); + if (cstreq(kind->value._string, "string")) { + append_to_string(rendered, &value->value._string, capacity); + } else if (cstreq(kind->value._string, "artifact")) { + append_to_string(rendered, &cx->artifacts_dir, capacity); + append_ch_to_string(rendered, '/', capacity); + append_to_string(rendered, &value->value._string, capacity); + } else { + ABORT("unknown template component kind"); + } + } + array = array->next; + } +} + +void mktemp (String* string) { + ABORT_IF(string->len <= 6, "string too small"); + size_t offset = string->len - 6; + const char LOOKUP[256] = + "0123456789abcdefghijklmnopqrstuzwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01" + "23456789abcdefghijklmnopqrstuzwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123" + "456789abcdefghijklmnopqrstuzwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ012345" + "6789abcdefghijklmnopqrstuzwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefgh"; + ABORT_IF(getrandom((void*)&string->ptr[offset], 6, GRND_NONBLOCK) != 6, "getrandom() failed"); + for (; offset < string->len; offset++) { + string->ptr[offset] = LOOKUP[(uint8_t)string->ptr[offset]]; + } +} + +static void render_template_to_temp (Cx* cx, JsonValue* template) { + // Create the path. + String path = { + .ptr = (uint8_t*)alloc(cx->arena, 2048, 1), + .len = 0 + }; + + // Get the TEMP directory. + String temp = clookup(&cx->manifest->env, "TEMP"); + if (temp.ptr) { + ABORT_IF(temp.len > 2000, "TEMP is too long"); + memcpy(path.ptr, temp.ptr, temp.len); + path.len = temp.len; + } else { + memcpy(path.ptr, "/tmp", 4); + path.len = 4; + } + + // Append the template. + memcpy(path.ptr + path.len, "/tmp.XXXXXX", 11); + path.len += 10; + mktemp(&path); + + // Open the file. + int fd = open(path.ptr, O_RDWR | O_CREAT, 0664); + ABORT_IF(fd < 0, "failed to open %s", path.ptr); + + // Render the template. + render_template(cx, template, &cx->manifest->executable); + + // Write the rendered template to the file. + String* rendered = &cx->manifest->executable; + size_t len = 0; + while(len < rendered->len) { + long amt = write(fd, (void*)(rendered->ptr + len), rendered->len - len); + ABORT_IF(amt < 0, "failed to write to temp file"); + if (amt == 0) { + break; + } + len += amt; + } +} + +static String render_value (Cx* cx, JsonValue* value) { + String rendered = {0}; + switch(value->kind) { + case JSON_NULL: return rendered; + case JSON_BOOL: { + rendered.ptr = value->value._bool ? "true" : "false"; + rendered.len = strlen(rendered.ptr); + break; + } + case JSON_NUMBER: { + double_to_string(cx->arena, value->value._number, &rendered); + break; + } + case JSON_STRING: { + rendered = value->value._string; + break; + } + case JSON_OBJECT: { + // Get the kind. + JsonObject* object = &value->value._object; + JsonValue* kind = json_get(object, "kind"); + ABORT_IF(!kind || kind->kind != JSON_STRING, "missing kind (2)"); + + // Get the value. + value = json_get(object, "value"); + ABORT_IF(!value, "expected a value"); + + // Check the type of the value. + if (cstreq(kind->value._string, "map")) { + ABORT("cannot render map in this context"); + } else if (cstreq(kind->value._string, "object")) { + value = json_get(object, "value"); + ABORT_IF(!value || value->kind != JSON_STRING, "expected an ID"); + String ss[2] = { cx->artifacts_dir, value->value._string }; + String s = STRING_LITERAL("/"); + rendered = join(cx->arena, s, ss, 2); + break; + } else if (cstreq(kind->value._string, "bytes")) { + ABORT("cannot render bytes in this context"); + } else if (cstreq(kind->value._string, "mutation")) { + ABORT("cannot render mutation in this context"); + } else if (cstreq(kind->value._string, "template")) { + render_template(cx, value, &rendered); + } else { + ABORT("unknown value type"); + } + break; + } + default: ABORT("malformed manifest (2) kind: %d", value->kind); + } + return rendered; +} diff --git a/packages/std/packages/stub/src/stub.c b/packages/std/packages/stub/src/stub.c new file mode 100644 index 00000000..7fb5b6e9 --- /dev/null +++ b/packages/std/packages/stub/src/stub.c @@ -0,0 +1,965 @@ +#ifdef __aarch64__ +#elif defined __x86_64__ +#else +#error "unknown architecture" +#endif + +#include +#include +#include +#include + +#include "arena.h" +#include "debug.h" +#include "footer.h" +#include "manifest.h" +#include "syscall.h" +#include "util.h" + +// Push a value onto the stack pointer. +#define PUSH(sp, val) do { sp -= sizeof(uintptr_t); *((uintptr_t*)sp) = (uintptr_t)val; } while (0) + +// Data passed to us on the stack by the kernel, as well as some counters. +typedef struct +{ + void* sp; // the stack pointer at the entrypoint. + int argc; // num args + char** argv; // arg vector + int envc; // num env vars + char** envp; // env vector + int auxc; // num aux vals + Elf64_auxv_t* auxv; // the aux vector + uintptr_t auxv_glob[32]; // sorted aux vector, for quick lookup later. +} Stack; + +typedef struct +{ + bool enable_tracing; + bool suppress_args; + bool suppress_env; +} Options; + +// Debugging helper. +static void print_stack (Stack* stack) { + trace("{\n"); + trace("\targc: %d,\n", stack->argc); + trace("\targv: [\n"); + for (int n = 0; n < stack->argc; n++) { + trace("\t\t\"%s\",\n", stack->argv[n]); + } + trace("\t],\n"); + trace("\tenvp: [\n"); + for (int n = 0; n < stack->envc; n++) { + trace("\t\t\"%s\",\n", stack->envp[n]); + } + trace("\t],\n"); + trace("\tauxv: [\n"); + for (int n = 0; n < stack->auxc; n++) { + trace("\t\t{ a_type: \"%s\", a_un: %08lx },\n", + auxv_type_string(stack->auxv[n].a_type), + stack->auxv[n].a_un.a_val + ); + } + trace("\t]\n}\n"); +} + +static void parse_options(Stack* stack, Options* options) { + String TANGRAM_SUPPRESS_ARGS = STRING_LITERAL("TANGRAM_SUPPRESS_ARGS"); + String TANGRAM_SUPPRESS_ENV = STRING_LITERAL("TANGRAM_SUPPRESS_ENV"); + String TANGRAM_TRACING = STRING_LITERAL("TANGRAM_TRACING"); + + options->enable_tracing = false; + options->suppress_args = false; + options->suppress_env = false; + + char **itr, **end; + + // Parse args. + itr = stack->argv; + end = itr + stack->argc; + for(; itr != end; itr++) { + String s = STRING_LITERAL(*itr); + if (cstreq(s, "--tangram-suppress-args")) { + options->suppress_args = true; + } + if (cstreq(s, "--tangram-suppress-env")) { + options->suppress_env = true; + } + } + + // Parse envs. + itr = stack->envp; + end = itr + stack->envc; + for(; itr != end; itr++) { + String s = STRING_LITERAL(*itr); + if (starts_with(s, TANGRAM_SUPPRESS_ARGS)) { + options->suppress_args = true; + } + if (starts_with(s, TANGRAM_SUPPRESS_ENV)) { + options->suppress_env = true; + } + if (starts_with(s, TANGRAM_TRACING)) { + options->enable_tracing = true; + } + } +} + +// Scan the bottom of the stack to extract argv, envp, auxv and their counts. +static void scan_stack (Stack* stack) { + // Validate alignment. + ABORT_IF((uintptr_t)stack->sp % 16 != 0, "misaligned stack"); + + // Scan the arg vector. + stack->argc = (int)*(uint64_t *)stack->sp; + stack->argv = (char**)((int64_t *)stack->sp + 1); + + // Scan the env vector. + stack->envp = (char**)((int64_t *)stack->sp + 1 + stack->argc + 1); + stack->envc = 0; + for (; stack->envp[stack->envc]; stack->envc++){} + + // Scan the aux vector. + stack->auxv = (Elf64_auxv_t *)((int64_t *)stack->sp + 1 + stack->argc + 1 + stack->envc + 1); + stack->auxc = 0; + for(;;) { + Elf64_auxv_t* v = stack->auxv + stack->auxc; + stack->auxc++; + if (v->a_type < 32) { + stack->auxv_glob[v->a_type] = (uintptr_t)v->a_un.a_val; + } + if (v->a_type == AT_NULL) { + break; + } + } +} + +// Push a string to the top of the stack. +static inline void push_str (void** sp, const char* str) { + size_t len = strlen_including_nul(str); + (*sp) -= len; + memcpy(*sp, (const void*)str, len); +} + +// Push an auxv to the top of the stack. +static inline void push_auxv (void** sp, const Elf64_auxv_t* auxv) { + (*sp) -= sizeof(Elf64_auxv_t); + memcpy(*sp, (const void*)auxv, sizeof(Elf64_auxv_t)); +} + +static inline void print_program_header_table (Elf64_Phdr* phdr, size_t count) { + trace("count: %d\n", count); + Elf64_Phdr* itr = phdr; + Elf64_Phdr* end = itr + count; + for(; itr != end; itr++) { + trace("%s flags:%o offset:%lx vaddr:%lx, paddr:%lx, filesz:%lx, memsz:%lx, align: %lx\n" , + p_type_string(itr->p_type), + itr->p_flags, + itr->p_offset, + itr->p_vaddr, + itr->p_paddr, + itr->p_filesz, + itr->p_memsz, + itr->p_align + ); + } +} + +// Create a new execution stack. Currently, this allocates a new stack rather than reusing the existing stack. +static inline void* prepare_stack ( + Arena* arena, + Stack* stack, + Manifest* manifest, + Options* options +) { + // Get the default stack size using ulimit. TODO: how does this work w/ cgroups? + rlimit_t rlim; + ABORT_IF(getrlimit(RLIMIT_STACK, &rlim), "failed to get the stack size"); + size_t stack_size = rlim.soft; + + // Allocate the stack. On x86_64, the stack "grows down" meaning that the address returned by mmap is actually the lowest possible address for the stack. The "top" of the new stack is the address of one page past it. + void* bp = mmap( + 0, + stack_size, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN, + -1, + 0 + ); + void* sp = bp + stack_size; + + // Push environment variables. Order doesn't matter. + int e = 0; + char** envp = ALLOC_N(arena, manifest->env.size + 1, char*); + + // Add envs. + for (int i = 0; i < manifest->env.capacity; i++) { + Node* node = manifest->env.list + i; + while(node) { + if (node->key.ptr) { + // Allocate the string. + size_t len = node->key.len + node->val.len + 2; + char* str = (char*)alloc(arena, len, 1); + memset(str, 0, len); + + // Create the string. + memcpy(str, node->key.ptr, node->key.len); + str[node->key.len] = '='; + memcpy(str + node->key.len + 1, node->val.ptr, node->val.len); + + // Push the string onto the stack. + push_str(&sp, str); + + // Save the address in envp. + envp[e++] = sp; + } + node = node->next; + } + } + + // Push arg vector. Order still does not matter. + int a = 0; + char** argv = ALLOC_N(arena, manifest->argc + 8, char*); + + // Add argv0 + push_str(&sp, stack->argv[0]); + argv[a++] = sp; + + for (int i = 0; i < manifest->argc; i++) { + char* arg = cstr(arena, manifest->argv[i]); + push_str(&sp, arg); + argv[a++] = sp; + } + + // Push 16 null bytes. + PUSH(sp, 0ul); + PUSH(sp, 0ul); + + // Align the stack. + sp = (void*)ALIGN((uintptr_t)sp, 16); + + // If there are an even number of env and arg vals then we need an additional 8 bytes of padding to ensure the top of the stack is aligned. + if ((e + a) % 2 == 0) { + PUSH(sp, 0); + } + + // Push aux vector in reverse order. + int x = stack->auxc; + for (; x >= 0; x--) { + Elf64_auxv_t* v = &stack->auxv[x]; + push_auxv(&sp, v); + } + + // Null separator between envp and auxv. + PUSH(sp, 0); + + // Push envp, in reverse order. + for (int e_ = e - 1; e_ >= 0; e_--) { + ABORT_IF(!envp[e_], "invalid env pointer"); + PUSH(sp, envp[e_]); + } + + // Null separator between argv and envp. + PUSH(sp, NULL); + + // Push argv, in reverse order. + for (int a_ = a - 1; a_ >= 0; a_--) { + PUSH(sp, argv[a_]); + } + + // Push argc. + PUSH(sp, (uint64_t)a); + + // Check alignment. + if ((uintptr_t)sp % 16) { + ABORT("misaligned stack"); + } + + // Return the prepared stack. + return sp; +} + +typedef struct { + uintptr_t phdr; + uintptr_t phnum; + uintptr_t entry; + uintptr_t base_address; +} LoadedInterpreter; + +// Given the absolute path to the interpreter on disk, we load it into memory, returning its entrypoint and base address. +static LoadedInterpreter load_interpreter( + Arena* arena, + const char* path, + uint64_t page_sz, + Options* options +) { + if (options->enable_tracing) { + trace("loading interpreter with path: %s, page_sz: %ld\n", path, page_sz); + } + + // Open the interpreter. + int fd = open(path, O_RDONLY, 0); + ABORT_IF(fd < 0, "failed to open interpreter %s", path); + + // Read the e_hdr + Elf64_Ehdr* ehdr = ALLOC(arena, Elf64_Ehdr); + ABORT_IF(pread64(fd, (void*)ehdr, sizeof(Elf64_Ehdr), 0) < 0, "failed to read ehdr"); + + // Validate + bool is_elf64 = (ehdr->e_ident[EI_MAG0] == ELFMAG0) + && (ehdr->e_ident[EI_MAG1] == ELFMAG1) + && (ehdr->e_ident[EI_MAG2] == ELFMAG2) + && (ehdr->e_ident[EI_MAG3] == ELFMAG3) + && (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + && (ehdr->e_ident[EI_CLASS] == ELFCLASS64); + ABORT_IF(!is_elf64, "invalid ELF file"); + ABORT_IF(ehdr->e_phentsize != sizeof(Elf64_Phdr), + "e_phentsize=%ld, sizeof(Elf64_Phdr)=%ld", + ehdr->e_phentsize, sizeof(Elf64_Phdr) + ); + + // Get the program header table. + Elf64_Phdr* phdr = ALLOC_N(arena, ehdr->e_phnum, Elf64_Phdr); + ABORT_IF( + pread64(fd, (void*)phdr, sizeof(Elf64_Phdr) * ehdr->e_phnum, ehdr->e_phoff) < 0, + "failed to read phdr" + ); + + // We scan the program header table looking for the address range it should be mapped to. + uint64_t minvaddr = (uint64_t)-1; + uint64_t maxvaddr = 0; + switch(ehdr->e_type) { + case ET_DYN: { + // For dynamic interpreters, search for the address range. + Elf64_Phdr* itr = phdr; + Elf64_Phdr* end = itr + ehdr->e_phnum; + for (; itr != end; itr++) { + if (itr->p_type != PT_LOAD) { + continue; + } + uint64_t min = itr->p_vaddr; + uint64_t max = min + itr->p_memsz; + if (min < minvaddr) { + minvaddr = min; + } + if (max > maxvaddr) { + maxvaddr = max; + } + } + break; + } + default: ABORT("invalid interpreter e_type"); // TODO: static interpreters? + } + if (options->enable_tracing) { + trace("loader virtual address range: %08lx..%08lx\n", minvaddr, maxvaddr); + } + + // Create one big mapping for the entire interpreter with PROT_NONE permissions. We'll slice it up in a second. + void* base_address = mmap(0, ALIGN(maxvaddr, page_sz), 0, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (options->enable_tracing) { + trace("mapped %08lx..%08lx", (uintptr_t)base_address, (uintptr_t)base_address + maxvaddr); + } + + // Compute the bias, the logical base address of the interpeter. + void* bias = base_address - minvaddr; + + // Begin mapping PT_LOAD segments. + uint64_t mask = page_sz - 1; + Elf64_Phdr* itr = phdr; + Elf64_Phdr* end = phdr + ehdr->e_phnum; + Elf64_Addr phdr_addr = 0; + for (; itr != end; itr++) { + // Skip non-loadable segments. + if (itr->p_type != PT_LOAD) { continue; } + + // Get the physical offset in the file. + uint64_t offset = itr->p_offset; + + // The file offset may be misaligned. + uint64_t misalignment = (offset & mask); + + // Compute the (aligned) file offset. + off_t file_offset = offset - misalignment; + + // Compute the (aligned) virtual address. + void* segment_address = (void*)((char*)bias + itr->p_vaddr - misalignment); + + // Compute the protection flags for this segment. + uint64_t prot = 0; + if (itr->p_flags & PF_R) { prot |= PROT_READ; }; + if (itr->p_flags & PF_W) { prot |= PROT_WRITE; }; + if (itr->p_flags & PF_X) { prot |= PROT_EXEC; }; + + // Compute the file size that we will map in. + uintptr_t filesz = ALIGN(itr->p_filesz + misalignment, page_sz); + uintptr_t memsz = ALIGN(itr->p_memsz + misalignment, page_sz); + + // If there's a non-zero number of bytes in the file, mmap it in. + size_t mapped = 0; + if (itr->p_filesz) { + uint64_t flags = (prot & PROT_WRITE) ? MAP_PRIVATE : MAP_SHARED; + segment_address = mmap( + segment_address, + filesz, + prot, + MAP_FIXED | flags, + fd, + file_offset + ); + if (segment_address == MAP_FAILED) ABORT("mmap failed"); + mapped += filesz; + } + + // If we need more memory than was mapped from the file, allocate it. + if (memsz > filesz) { + uintptr_t start = (uintptr_t)segment_address + filesz; + uintptr_t end = start + (memsz - filesz); + void* p = mmap( + (void*)start, + (end - start), + prot, + MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, + -1, + 0 + ); + if (p == MAP_FAILED) ABORT("mmap failed"); + mapped += (memsz - filesz); + } + + // If the page is marked writeable, make sure to zero-out any excess between the file end and the end of the segment. + if (prot & PF_W) { + uintptr_t offset = misalignment + itr->p_filesz; + uintptr_t length = mapped - itr->p_filesz - misalignment; + void* dst = (void*)((char*)segment_address + offset); + memset(dst, 0, length); + } + + // Sanity check our work. + ABORT_IF(mapped < itr->p_memsz, "failed to map segment"); + + if (options->enable_tracing) { + trace("LOADER: %08lx..%08lx to %08lx..%08lx %03o\n", + itr->p_vaddr, itr->p_vaddr + itr->p_memsz, + (uintptr_t)segment_address, (uintptr_t)(segment_address + mapped), + prot + ); + } + + // If this segment contains the phdr address, update it. + uint64_t file_start = itr->p_offset; + uint64_t file_end = file_start + itr->p_filesz; + if (file_start <= ehdr->e_phoff && file_end <= (ehdr->e_phoff + ehdr->e_phentsize)) { + // Find the offset from the start of this segment of the program headers. + uint64_t ph_off_from_vaddr = ehdr->e_phoff - file_start; + phdr_addr = (uintptr_t)segment_address + ph_off_from_vaddr; + } + } + + // Get the entrypoint. + LoadedInterpreter loaded = { + .phdr = phdr_addr, + .phnum = ehdr->e_phnum, + .entry = (uintptr_t)ehdr->e_entry, + .base_address = (uintptr_t)bias + }; + + if (options->enable_tracing) { + trace("loaded interpreter: phdr: %lx, phnum: %d, entry: %lx, base_address: %lx\n", + loaded.phdr, + loaded.phnum, + loaded.entry, + loaded.base_address + ); + } + + // Close the file. + close(fd); + + // Return the entrypoint. + return loaded; +} + +typedef struct { + Elf64_Phdr* new; + uint64_t num; +} ProgramHeaders; + +static ProgramHeaders create_program_headers( + Arena* arena, + Manifest* manifest, + void* base_address, + uintptr_t original_entrypoint, + Elf64_Phdr* old, + size_t num +) { + Elf64_Phdr* new = ALLOC_N(arena, num + 1, Elf64_Phdr); + Elf64_Phdr* itr = old; + Elf64_Phdr* end = old + num; + uint64_t n = 0; + for(; itr != end; itr++) { + // If this is the load segment containing the stub, skip it. We don't want the loader to load it. + if (itr->p_type == PT_LOAD + && itr->p_vaddr <= original_entrypoint + && original_entrypoint < itr->p_vaddr + itr->p_memsz + ) { + continue; + } + + // Duplicate the header data. + memcpy(&new[n], itr, sizeof(Elf64_Phdr)); + + // Patch the PT_PHDR virtual address with our new virtual address. + if (itr->p_type == PT_PHDR) { + ABORT_IF(itr != old, "PT_PHDR must appear first"); + new[n].p_vaddr = (uintptr_t)new - (uintptr_t)base_address; + } + + n++; + } + + // Add a pt_interp header at the end. + if (manifest && manifest->interpreter.ptr) { + char* interp = alloc(arena, manifest->interpreter.len + 1, 1); + memcpy(interp, manifest->interpreter.ptr, manifest->interpreter.len); + memset((void*)&new[n], 0, sizeof(Elf64_Phdr)); + new[n].p_type = PT_INTERP; + new[n].p_vaddr = (uintptr_t)interp - (uintptr_t)base_address; + new[n].p_paddr = new[n].p_vaddr; + new[n].p_align = 1; + new[n].p_filesz = 0; + new[n].p_memsz = manifest->interpreter.len; + new[n].p_flags = PF_R; + n++; + } + + // Return the new phdr vector. + ProgramHeaders new_phdrs = { + .new = new, + .num = n + }; + return new_phdrs; +} + +// Handle the manifest. +typedef struct { + Elf64_Ehdr* elf_header; + Elf64_Phdr* program_headers; + Manifest* manifest; + Footer* footer; +} Executable; + +static int read_executable ( + Arena* arena, + Stack* stack, + Options* options, + Executable* executable +) { + // Initialize envp. + create_table(arena, &executable->manifest->env, 4096); + if (options->enable_tracing) { + trace("created env\n"); + } + + // Fill the env table. + if (!options->suppress_env) { + for (int i = 0; i < stack->envc; i++) { + char* e = stack->envp[i]; + + // Find the length and midpoint of the env var. + size_t n = 0; + size_t m = 0; + for (; e[n]; n++) { + if (e[n] == '=') { + m = n; + } + } + + // No '=' found. Skip it. + if (m == 0) { + continue; + } + + // Allocate strings for key/value pair. + String key = {0}; + key.ptr = ALLOC_N(arena, m + 1, uint8_t); + key.len = m; + memcpy(key.ptr, e, m); + + String val = {0}; + val.ptr = ALLOC_N(arena, n - m, uint8_t); + val.len = n - m - 1; + memcpy(val.ptr, e + m + 1, n - m - 1); + + insert(arena, &executable->manifest->env, key, val); + } + if (options->enable_tracing) { + trace("initialized env\n"); + } + } + + // Read the manifest. TODO: use loadable segment? + int fd = open("/proc/self/exe", O_RDONLY, 0); + off_t offset = 0; + + // Read the elf header. We don't need to do any validation here, we assume the kernel didn't lie. + ABORT_IF(pread64(fd, (void*)executable->elf_header, sizeof(Elf64_Ehdr), 0) != sizeof(Elf64_Ehdr), "failed to read the ehdr"); + + // Read the program header table. + offset = executable->elf_header->e_phoff; + size_t size = executable->elf_header->e_phnum * sizeof(Elf64_Phdr); + executable->program_headers = ALLOC_N(arena, executable->elf_header->e_phnum, Elf64_Phdr); + ABORT_IF( + pread64(fd, (void*)executable->program_headers, size, offset) != size, + "failed to read program headers" + ); + + // Get the file size. + offset = lseek(fd, 0, SEEK_END); + if (offset < 0) { + ABORT("failed to seek"); + } + if (options->enable_tracing) { + trace("file size: %d\n", offset); + } + + // Read the manifest footer. + if (pread64(fd, executable->footer, sizeof(Footer), offset - sizeof(Footer)) != sizeof(Footer)) { + ABORT("failed to read footer"); + } + if (options->enable_tracing) { + trace("read footer: size=%d, version=%d\n", + executable->footer->size, + executable->footer->version); + } + // Check the magic number. + int matches = executable->footer->magic[0] == 't' + && executable->footer->magic[1] == 'a' + && executable->footer->magic[2] == 'n' + && executable->footer->magic[3] == 'g' + && executable->footer->magic[4] == 'r' + && executable->footer->magic[5] == 'a' + && executable->footer->magic[6] == 'm' + && executable->footer->magic[7] == '\0'; + if (!matches) { + if (options->enable_tracing) { + trace("mismatched footer\n"); + } + close(fd); + return 0; + } + + // Read the manifest data. + if (options->enable_tracing) { + trace("allocating memory for the data: %ld\n", executable->footer->size); + } + + char* data = (char*)alloc(arena, executable->footer->size, 1); + size_t count = 0; + offset -= (sizeof(Footer) + executable->footer->size); + + while (count < executable->footer->size) { + long amt = pread64(fd, (void*)(data + count), executable->footer->size - count, offset); + if (amt < 0) { + ABORT("failed to read"); + } + if (amt == 0) { + break; + } + offset += amt; + count += amt; + } + + // Close the file. + close(fd); + + // Print the manifest if provided. + if (options->enable_tracing) { + trace("manifest: \n"); + for (int ch = 0; ch < executable->footer->size; ch++) { + trace("%c", data[ch]); + } + trace("\n"); + } + + // Parse the manifest. + parse_manifest(arena, executable->manifest, (uint8_t*)data, executable->footer->size); + + // Append the arg list if necessary. + if (!options->suppress_args) { + // Allocate a new arg vector. + String* argv = ALLOC_N(arena, stack->argc + executable->manifest->argc, String); + size_t argc = 0; + + // Now add the args from the manifest. + for (size_t n = 0; n < executable->manifest->argc; n++) { + argv[argc++] = executable->manifest->argv[n]; + } + + // Finally the stack args, not including argv0. + for (size_t n = 1; n < stack->argc; n++) { + argv[argc].ptr = stack->argv[n]; + argv[argc].len = strlen(stack->argv[n]); + argc++; + } + + // Update the manifest. + executable->manifest->argv = argv; + executable->manifest->argc = argc; + } + + return 1; +} + +static int read_footer(Footer* footer) { + int fd = open("/proc/self/exe", O_RDONLY, 0); + if (fd < 0) { + return 1; + } + off_t sz = lseek(fd, 0, SEEK_END); + if (sz < 0) { + return 1; + } + if (pread64(fd, (void*)footer, sizeof(Footer), sz - sizeof(Footer)) != sizeof(Footer)) { + return 1; + } + close(fd); + return 0; +} + +static void exec (Arena* arena, Manifest* manifest, char* argv0, Options* options) { + // Sanity check. + ABORT_IF(!manifest->executable.ptr, "missing executable"); + ABORT_IF(!argv0, "missing argv0"); + + // Get the executable path. + char* pathname = manifest->interpreter.ptr + ? cstr(arena, manifest->interpreter) + : cstr(arena, manifest->executable); + + // Compute argc. + size_t argc = manifest->argc + + manifest->interp_argc + + 1 // pathname + + 1 // --argv0 + + 1 // argv[0] + + 1 // -- + + 1; // executable + + // Create argv, envp + char** argv = ALLOC_N(arena, argc + 1, char*); + char** envp = ALLOC_N(arena, manifest->env.size + 1, char*); + + // Fill argv. + size_t n = 0; + argv[n++] = pathname; + if (manifest->interpreter.ptr) { + for (int i = 0; i < manifest->interp_argc; i++) { + argv[n++] = cstr(arena, manifest->interp_argv[i]); + } + argv[n++] = "--argv0"; + argv[n++] = argv0; + if (manifest->interpreter_kind == INTERPRETER_KIND_LD_MUSL){ + argv[n++] = "--"; + } + argv[n++] = cstr(arena, manifest->executable); + } + for (int i = 0; i < manifest->argc; i++) { + argv[n++] = cstr(arena, manifest->argv[i]); + } + argv[n++] = NULL; + + // Fill envp. + size_t e = 0; + for (int i = 0; i < manifest->env.capacity; i++) { + Node* node = manifest->env.list + i; + while(node) { + if (node->key.ptr) { + // Allocate the string. + size_t len = node->key.len + node->val.len + 2; + char* str = ALLOC_N(arena, len, char); + memset(str, 0, len); + + // Create the string. + memcpy(str, node->key.ptr, node->key.len); + str[node->key.len] = '='; + memcpy(str + node->key.len + 1, node->val.ptr, node->val.len); + + // Save the address in envp. + envp[e++] = str; + } + node = node->next; + } + } + envp[e++] = NULL; + if (options->enable_tracing) { + trace("about to exec...\n"); + trace("pathname = %s\n", pathname); + for (int i = 0; i < argc; i++) { + trace("argv[%d] = %s\n", i, argv[i]); + } + for (int i = 0; i < e; i++) { + trace("envp[%d] = %s\n", i, envp[i]); + } + } + int ec = execve(pathname, argv, envp); + ABORT("execve failed: %d", ec); +} + +// Main entrypoint. +void main (void *sp) { + // State. + Arena arena = {0}; + Footer footer = {0}; + Stack stack = {0}; + Options options = {0}; + + // Set the stack pointer. + stack.sp = sp; + + // Scan the stack to collect argv/envp/auxiv. + scan_stack(&stack); + + // Parse options. + parse_options(&stack, &options); + if (options.enable_tracing) { + trace( + "options: enable_tracing:%d, suppress_args:%d, suppress_env:%d\n", + options.enable_tracing, options.suppress_args, options.suppress_env + ); + trace("original stack:\n"); + print_stack(&stack); + + } + + // We only grab the page size from the aux vector, we'll read the program headers later. + uint64_t page_sz = (uint64_t)stack.auxv_glob[AT_PAGESZ]; + page_sz = page_sz ? page_sz : 4096; + + // Initialize the arena. + create_arena(&arena, page_sz); + if (options.enable_tracing) { + trace("initialized arena\n"); + } + + // Search for the positions of AT_ENTRY, AT_BASE, AT_PHDR, AT_PHNUM + int nentry = -1; + int nbase = -1; + int nphdr = -1; + int nphnum = -1; + for (int i = 0; i < stack.auxc; i++) { + if (nentry >= 0 && nbase >= 0) { + break; + } + switch(stack.auxv[i].a_type) { + case AT_PHDR: { + ABORT_IF(nphdr >= 0, "duplicate AT_PHDR"); + nphdr = i; + break; + } + case AT_PHNUM: { + ABORT_IF(nphnum >= 0, "duplicate AT_PHNUM"); + nphnum = i; + break; + } + case AT_ENTRY: { + ABORT_IF(nentry >= 0, "duplicate AT_ENTRY"); + nentry = i; + break; + } + case AT_BASE: { + ABORT_IF(nbase >= 0, "duplicate AT_BASE"); + nbase = i; + break; + } + default: break; + } + } + + // Check that we have space to write the new program header table and number of entries later. + ABORT_IF(!nphdr || nentry < 0, "missing AT_PHDR or AT_ENTRY"); + + // Read the executable and manifest. + Executable executable = { + .manifest = ALLOC(&arena, Manifest), + .elf_header = ALLOC(&arena, Elf64_Ehdr), + .program_headers = NULL, + .footer = &footer + }; + if (!read_executable(&arena, &stack, &options, &executable)) { + ABORT("failed to parse manifest"); + } + if (options.enable_tracing) { + trace("read executable\n"); + } + + // Compute the base address. Normally this is computed using the program header table supplied in the aux vector, but this could be garbage if using a patched program header table. + uintptr_t load_address = stack.auxv_glob[AT_ENTRY] - executable.elf_header->e_entry; + + // If "--tangram-print-manifest" was passed to the stub, dump the manifest and exit. + String arg = STRING_LITERAL("--tangram-print-manifest"); + for (int i = 1; i < stack.argc; i++) { + if (cstreq(arg, stack.argv[i])) { + print_manifest(executable.manifest); + exit(0); + } + } + + // If the executable is a string, fallback on execve. + if (executable.manifest->executable.ptr) { + exec(&arena, executable.manifest, stack.argv[0], &options); + } + ABORT_IF(!executable.manifest->entrypoint, "missing entrypoint"); + + // Get the entrypoint. + void* entrypoint = NULL; + if (executable.manifest->interpreter.ptr) { + // If there's an interpreter arg, + stack.auxv[nentry].a_un.a_val = load_address + executable.manifest->entrypoint; + + // Load the interpreter. + LoadedInterpreter loaded = load_interpreter(&arena, executable.manifest->interpreter.ptr, page_sz, &options); + + // Update the AT_BASE entry of the aux vector. + if (nbase >= 0) { + stack.auxv[nbase].a_un.a_val = loaded.base_address; + } + + // Set the entrypoint as the interpreter. + entrypoint = (void*)(loaded.base_address + loaded.entry); + } else { + entrypoint = (void*)((uintptr_t)load_address + executable.manifest->entrypoint); + } + + // Fix program headers. + Arena preserved_memory; + create_arena(&preserved_memory, page_sz); + ProgramHeaders new_phdrs = create_program_headers( + &preserved_memory, + executable.manifest, + (void*)load_address, + stack.auxv[nentry].a_un.a_val, + executable.program_headers, + executable.elf_header->e_phnum + ); + stack.auxv[nphdr].a_un.a_val = (uintptr_t)new_phdrs.new; + stack.auxv[nphnum].a_un.a_val = (uintptr_t)new_phdrs.num; + + // Prepare a new stack. + sp = prepare_stack(&arena, &stack, executable.manifest, &options); + if (options.enable_tracing) { + Stack dbg_stack = { .sp = sp }; + scan_stack(&dbg_stack); + trace("new stack:\n"); + print_stack(&dbg_stack); + } + + // Cleanup all the memory we allocatd. + destroy_arena(&arena); + + // Jump to the new entrypoint. + if (options.enable_tracing) { + trace("about to transfer control\n"); + trace("entrypoint: 0x%lx\n", (uintptr_t)entrypoint); + } + + jump_to_entrypoint(sp, entrypoint); +} diff --git a/packages/std/packages/stub/src/util.c b/packages/std/packages/stub/src/util.c new file mode 100644 index 00000000..7ff04aae --- /dev/null +++ b/packages/std/packages/stub/src/util.c @@ -0,0 +1,69 @@ +/* + gcc appears to have an issue with some functions (memset and memcpy) on some architectures (aarch64) which forbids overriding them in a static function declaration within a single header file. In addition, conversion + of long doubles to doubles is a built-in provided by lib-gcc on aarch64. + + We don't care about any of that, so this source file works around it. +*/ +#include +#include + +#ifdef __x86_64__ +__attribute__((naked)) +void* memcpy ( + void* dst, + const void* src, + size_t len +) { + asm volatile ( + "rep rex.w movsb;" + "ret;" + : "+D"(dst), "+S"(src), "+c"(len) + : + : "memory", "cc" + ); +} + +__attribute__((naked)) +void* memset ( + void* dst, + int c, + size_t n +) { + asm volatile ( + "rep rex.w stosb;" + "ret;" + : "+D"(dst), "+c"(n) + : "a"(c) + : "memory", "cc" + ); +} +#endif + +#ifdef __aarch64__ +#include "syscall.h" +double __trunctfdf2 (long double ld) { + exit(111); + return 0.0; +} + +void* memcpy ( + void* dst, + const void* src, + size_t len +) { + for (size_t i = 0; i < len; i++) { + ((uint8_t*)dst)[i] = ((const uint8_t*)src)[i]; + } +} + +void* memset ( + void* dst, + int c, + size_t n +) { + for (size_t i = 0; i < n; i++) { + ((uint8_t*)dst)[i] = (uint8_t)(c); + } + return dst; +} +#endif diff --git a/packages/std/packages/stub/src/wrap.c b/packages/std/packages/stub/src/wrap.c new file mode 100644 index 00000000..14bb0a54 --- /dev/null +++ b/packages/std/packages/stub/src/wrap.c @@ -0,0 +1,326 @@ +#define _GNU_SOURCE +#include "footer.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __aarch64__ + #define MACHINE EM_AARCH64 +#endif +#ifdef __x86_64__ + #define MACHINE EM_X86_64 +#endif + +static bool TRACING_ENABLED = false; + +#define TRACE(...) if (TRACING_ENABLED) { fprintf(stderr, "wrap: "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); } + +#define ALIGN(m, n) (((m) + (n) - 1) & ~((n) - 1)) + +#define ABORT_IF(cond, ...) if (cond) { fprintf(stderr, __VA_ARGS__); abort(); } + +#define ABORT_IF_ERRNO(cond, ...) if (cond) { \ + char msg[1024]; \ + snprintf(msg, 1024, __VA_ARGS__); \ + perror(msg); \ + abort(); \ +} + +typedef struct File File; +struct File { + int fd; + off_t sz; + const char* path; +}; + +typedef struct Elf Elf; +struct Elf { + off_t sz; + Elf64_Ehdr* ehdr; + Elf64_Phdr* phdr; +}; + +typedef struct Analysis Analysis; +struct Analysis { + Elf64_Phdr* pt_interp; + Elf64_Addr max_vaddr; + Elf64_Addr max_align; +}; + +typedef struct ProgramHeaders ProgramHeaders; +struct ProgramHeaders { + size_t offs; + size_t sz; + Elf64_Phdr* phdr; + size_t num; +}; + +File file_open (const char* path, int flags, int mode) { + File file = { .path = path }; + file.fd = open(path, flags, mode); + ABORT_IF_ERRNO(file.fd < 0, "failed to open %s", path); + file.sz = lseek(file.fd, 0, SEEK_END); + ABORT_IF_ERRNO(file.sz < 0, "failed to get file size %s", path); + TRACE("opened %s (fd:%d, sz:%ld)", file.path, file.fd, file.sz); + return file; +} + +void file_close (File file) { + close(file.fd); +} + +Elf elf_read (File file, bool readonly) { + Elf elf; + int flags = readonly ? PROT_READ : PROT_READ | PROT_WRITE; + elf.ehdr = (Elf64_Ehdr*) mmap (NULL, (size_t) file.sz, flags, MAP_SHARED, file.fd, 0); + ABORT_IF_ERRNO(elf.ehdr == (Elf64_Ehdr*)MAP_FAILED, "failed to load %s (len:%ld, flags:%lx, fd:%d)", file.path, file.sz, flags, file.fd); + bool is_elf = + elf.ehdr->e_ident[EI_MAG0] == ELFMAG0 + && elf.ehdr->e_ident[EI_MAG1] == ELFMAG1 + && elf.ehdr->e_ident[EI_MAG2] == ELFMAG2 + && elf.ehdr->e_ident[EI_MAG3] == ELFMAG3 + && elf.ehdr->e_ident[EI_CLASS] == ELFCLASS64 + && elf.ehdr->e_ident[EI_DATA] == ELFDATA2LSB + && elf.ehdr->e_phentsize == sizeof(Elf64_Phdr); + ABORT_IF(!is_elf, "not a 64 bit LE elf binary"); + ABORT_IF(elf.ehdr->e_machine != MACHINE, "unsupported architecture"); + elf.phdr = (Elf64_Phdr*)((char*)elf.ehdr + elf.ehdr->e_phoff); + elf.sz = file.sz; + return elf; +} + +Elf elf_close (Elf elf) { + munmap((void*)elf.ehdr, elf.sz); +} + +void file_concat (File* dst, File src) { + char buf[2 << 14] = {0}; + ssize_t bytes_read = 0; + ABORT_IF(lseek(src.fd, 0, SEEK_SET) < 0, "failed to seek %s", src.path); + ABORT_IF(lseek(dst->fd, 0, SEEK_END) < 0, "failed to seek %s", dst->path); + while (bytes_read < src.sz) { + ssize_t n = read(src.fd, buf, sizeof(buf)); + if (n == 0) { + break; + } + ABORT_IF_ERRNO(n < 0, "failed to read from %s", src.path); + bytes_read += n; + ssize_t offset = 0; + while (offset < n) { + ssize_t m = write(dst->fd, buf + offset, n - offset); + ABORT_IF(m < 0, "failed to write to %s", dst->path); + offset += m; + } + } + dst->sz += src.sz; +} + +Analysis elf_analyze (Elf elf) { + Analysis analysis = {0}; + Elf64_Phdr* itr = elf.phdr; + Elf64_Phdr* end = itr + elf.ehdr->e_phnum; + int i = 0; + for(; itr != end; itr++) { + if (itr->p_type == PT_LOAD) { + Elf64_Addr end_of_segment = itr->p_vaddr + itr->p_memsz; + TRACE("phdr[%d] vaddr:%lx memsz:%lx, end:%lx", i++, itr->p_vaddr, itr->p_memsz, end_of_segment); + if (end_of_segment > analysis.max_vaddr) { + analysis.max_vaddr = end_of_segment; + } + if (itr->p_align > analysis.max_align) { + analysis.max_align = itr->p_align; + } + } + if (itr->p_type == PT_INTERP) { + ABORT_IF(analysis.pt_interp, "multiple interpreters found"); + analysis.pt_interp = itr; + } + } + TRACE("analysis: pt_interp:%p, max_vaddr:%lx", analysis.pt_interp, analysis.max_vaddr); + return analysis; +} + + +// Bubble sort loadable segments +void elf_sort_segments (Elf64_Phdr* phdr, size_t num) { + TRACE("num segments = %d", num); + Elf64_Addr start_addr, end_addr; + for(;;) { + bool swapped = false; + for (int n = 0; n < (num - 1); n++) { + end_addr = phdr[n].p_vaddr + phdr[n].p_memsz; + start_addr = phdr[n + 1].p_vaddr; + TRACE("phdr[%d].start = %lx, phdr[%d].end = %lx, phdr[%d].start = %lx", n, phdr[n].p_vaddr, n, end_addr, n + 1, start_addr); + ABORT_IF(start_addr >= phdr[n].p_vaddr && start_addr < end_addr, "invalid program headers"); + if (end_addr > start_addr) { + TRACE("swap phdr[%d], phdr[%d]", n, n+1); + Elf64_Phdr tmp = phdr[n]; + phdr[n] = phdr[n+1]; + phdr[n + 1] = tmp; + swapped = true; + TRACE("swapped %d and %d", n, n + 1); + } else { + TRACE("skipping %d", n); + } + } + if (!swapped) { + break; + } + } +} + +int main (int argc, const char** argv) { + TRACING_ENABLED = getenv("TANGRAM_TRACING") != NULL; + + // Check args. + ABORT_IF(argc != 6, "usage is %s "); + + // Open input/output/stub/manifest. + File input = file_open(argv[1], O_RDONLY, 0); + File output = file_open(argv[2], O_RDWR, O_CREAT); + File stub_elf = file_open(argv[3], O_RDONLY, 0); + File stub_bin = file_open(argv[4], O_RDONLY, 0); + File manifest = file_open(argv[5], O_RDONLY, 0); + TRACE( "input:%s, output:%s, stub.elf:%s, stub.bin:%s, manifest:%s", + input.path, output.path, stub_elf.path, stub_bin.path, manifest.path); + + // Copy input to output. + file_concat(&output, input); + TRACE("copied %s to %s", input.path, output.path); + + // Parse the elf files. + Elf output_exe = elf_read(output, false); + TRACE("parsed %s", output.path); + Elf stub_exe = elf_read(stub_elf, true); + TRACE("parsed %s", stub_elf.path); + + // Scan the executable for its pt_interp and max vaddr + Analysis analysis = elf_analyze(output_exe); + TRACE("analyzed %s: pt_interp:%p, max_vaddr:%lx", output.path, analysis.pt_interp, analysis.max_vaddr); + + // If there's a PT_INTERP we'll overwrite it with the stub's LOAD segment. + Elf64_Phdr* stub_segment = analysis.pt_interp; + + // If there's no pt_interp, create new program headers. + ProgramHeaders headers = {0}; + if (!stub_segment) { + headers.offs = ALIGN(output.sz, 64); + headers.sz = output_exe.ehdr->e_phnum + 1 * sizeof(Elf64_Phdr); + headers.phdr = (Elf64_Phdr*)malloc(headers.sz); + + // Copy loadable segments first. + for (int i = 0; i < output_exe.ehdr->e_phnum; i++) { + Elf64_Phdr* phdr = &output_exe.phdr[i]; + ABORT_IF(phdr->p_type == PT_PHDR, "unexpected PT_PHDR"); + if (phdr->p_type != PT_LOAD) { + continue; + } + headers.phdr[headers.num++] = *phdr; + } + + // Save the last loadable segment for the stub. + stub_segment = &headers.phdr[headers.num++]; + + for (int i = 0; i < output_exe.ehdr->e_phnum; i++) { + Elf64_Phdr* phdr = &output_exe.phdr[i]; + if (phdr->p_type == PT_LOAD) { + continue; + } + headers.phdr[headers.num++] = *phdr; + } + TRACE("created new program headers"); + } + + // Compute the offset/size of the stub binary. + size_t stub_offs = headers.phdr + ? ALIGN(headers.offs + headers.sz, analysis.max_align) + : ALIGN(output.sz, analysis.max_align); + size_t stub_sz = ALIGN(stub_bin.sz, analysis.max_align); + + // Create segment for the stub. + stub_segment->p_type = PT_LOAD; + stub_segment->p_flags = PF_R | PF_X; + stub_segment->p_align = analysis.max_align; + stub_segment->p_offset = stub_offs; + stub_segment->p_paddr = ALIGN(analysis.max_vaddr, analysis.max_align); + stub_segment->p_vaddr = ALIGN(analysis.max_vaddr, analysis.max_align); + stub_segment->p_filesz = stub_sz; + stub_segment->p_memsz = stub_sz; + + TRACE("new segment vaddr: %lx, memsz: %lx", stub_segment->p_vaddr, stub_segment->p_memsz); + + // Create the footer. + Footer footer = { + .size = manifest.sz, + .version = 0 + }; + memcpy(footer.magic, "tangram", 8); + + // Update the entrypoint. + TRACE("%s entrypoint:%lx", stub_elf.path, stub_exe.ehdr->e_entry); + output_exe.ehdr->e_entry = stub_segment->p_vaddr + stub_exe.ehdr->e_entry; + + // Patch the program header table if necessary. + if (headers.phdr) { + output_exe.ehdr->e_phoff = headers.offs; + output_exe.ehdr->e_phnum = headers.num; + } else { + // Sort program headers. + Elf64_Phdr* start = NULL; + size_t num = 0; + for(int i = 0; i < output_exe.ehdr->e_phnum; i++) { + if (output_exe.phdr[i].p_type != PT_LOAD) { + continue; + } + if (!start) { + start = &output_exe.phdr[i]; + } + num++; + } + elf_sort_segments(start, num); + } + + // Close elf objects. + elf_close(output_exe); + elf_close(stub_exe); + + // Resize the output. + ABORT_IF_ERRNO(ftruncate(output.fd, stub_offs) < 0, "failed to resize %s", output.path); + TRACE("resized output %ld", stub_offs); + + // Append the new program header table if necessary. + if (headers.phdr) { + ABORT_IF_ERRNO(lseek(output.fd, 0, SEEK_END) < 0, "failed to seek %s", output.path); + ABORT_IF_ERRNO( + write(output.fd, (void*)headers.phdr, (size_t)headers.sz) != headers.sz, + "failed to write new program headers to %s", output.path + ); + TRACE("appended new program header table"); + } + + // Append the stub and manifest. + file_concat(&output, stub_bin); + TRACE("appended stub to binary"); + + file_concat(&output, manifest); + TRACE("appended manifest to binary"); + + // Append teh footer. + ABORT_IF_ERRNO( + write(output.fd, (void*)&footer, sizeof(footer)) != sizeof(footer), + "failed to append footer to %s", output.path + ); + TRACE("appended footer to binary"); + + // Close files. + file_close(input); + file_close(output); + file_close(stub_elf); + file_close(stub_bin); + file_close(manifest); +} diff --git a/packages/std/packages/stub/src/x86_64/start.s b/packages/std/packages/stub/src/x86_64/start.s new file mode 100644 index 00000000..232f1e3d --- /dev/null +++ b/packages/std/packages/stub/src/x86_64/start.s @@ -0,0 +1,7 @@ +.section .text.start,"ax",@progbits +.global _start +.type _start, @function +_start: + xor %rbp, %rbp + mov %rsp, %rdi + call main diff --git a/packages/std/packages/tgld/src/main.rs b/packages/std/packages/tgld/src/main.rs index e7e1b247..c4e323ca 100644 --- a/packages/std/packages/tgld/src/main.rs +++ b/packages/std/packages/tgld/src/main.rs @@ -75,6 +75,9 @@ struct Options { /// If any NEEDED libraries are missing at the end, should we still produce a wrapper?. Will warn if false, error if true. Default: false. disallow_missing: bool, + /// If enabled, the wrapper will be embedded into the binary. + embed: bool, + /// The interpreter used by the output executable. interpreter_path: Option, @@ -122,6 +125,9 @@ fn read_options() -> tg::Result { // Get the interpreter path. let interpreter_path = std::env::var("TGLD_INTERPRETER_PATH").ok(); + // Get the wrap binary. + let mut embed = std::env::var("TGLD_EMBED_WRAPPER").is_ok(); + // Get additional interpreter args, if any. let interpreter_args = std::env::var("TGLD_INTERPRETER_ARGS") .ok() @@ -176,6 +182,8 @@ fn read_options() -> tg::Result { passthrough = true; } else if arg.starts_with("--tg-disallow-missing") { disallow_missing = true; + } else if arg.starts_with("--tg-embed-wrapper") { + embed = true; } else { command_args.push(arg.clone()); } @@ -223,6 +231,7 @@ fn read_options() -> tg::Result { command_path, command_args, disallow_missing, + embed, interpreter_path, interpreter_args, injection_path, @@ -250,11 +259,12 @@ async fn create_wrapper(options: &Options) -> tg::Result<()> { interpreter, name, needed_libraries: initial_needed_libraries, + entrypoint, } = analyze_output_file(&options.output_path).await?; tracing::debug!(?is_executable, ?interpreter, ?initial_needed_libraries); // If the file is executable but does not need an interpreter, it is static or static-PIE linked. Abort here. - if is_executable && matches!(interpreter, InterpreterRequirement::None) { + if !options.embed && is_executable && matches!(interpreter, InterpreterRequirement::None) { tracing::info!("No interpreter needed for static executable. Exiting without wrapping."); return Ok(()); } @@ -440,12 +450,22 @@ async fn create_wrapper(options: &Options) -> tg::Result<()> { let output_artifact_id = output_file.id().clone().into(); // Create the manifest. - let manifest = + let mut manifest = create_manifest(output_artifact_id, options, interpreter, library_paths).await?; tracing::trace!(?manifest); + // If requested, emebd the wrapper. + let new_wrapper = if options.embed { + if let Some(entrypoint) = entrypoint { + manifest.executable = tangram_std::manifest::Executable::Address(entrypoint); + } + manifest.embed(&tg, &output_file).await? + } else { + manifest.write(&tg).await? + }; + // Write the manifest to a wrapper. - let new_wrapper = manifest.write(&tg).await?; + let new_wrapper_id = new_wrapper.id(); tracing::trace!(?new_wrapper_id); @@ -726,6 +746,8 @@ struct AnalyzeOutputFileOutput { name: Option, /// Does the output file specify libraries required at runtime? needed_libraries: Vec, + /// The entrypoint of the executable. + entrypoint: Option, } /// The possible interpreter requirements of an output file. @@ -1255,6 +1277,7 @@ fn analyze_executable(bytes: &[u8]) -> tg::Result { interpreter: InterpreterRequirement::None, name: None, needed_libraries: vec![], + entrypoint: None, }, // Handle an ELF file. @@ -1275,12 +1298,16 @@ fn analyze_executable(bytes: &[u8]) -> tg::Result { .map(std::string::ToString::to_string) .collect_vec(); + let entrypoint = (elf.entry != 0).then_some(elf.entry); + // Check whether or not the object requires an interpreter: // - If the object has an interpreter field. // - If the object is a PIE and has 1 or more NEEDS. let interpreter = if elf.interpreter.is_some() || (is_pie && !needed_libraries.is_empty()) { - let interpreter = elf.interpreter.unwrap(); + let interpreter = elf + .interpreter + .ok_or_else(|| tg::error!("missing interpreter in ELF"))?; if interpreter.starts_with("/lib") { if interpreter.contains("musl") { InterpreterRequirement::Default(InterpreterFlavor::Musl) @@ -1300,6 +1327,7 @@ fn analyze_executable(bytes: &[u8]) -> tg::Result { interpreter, name, needed_libraries, + entrypoint, } }, @@ -1314,12 +1342,13 @@ fn analyze_executable(bytes: &[u8]) -> tg::Result { .map(extract_filename) .filter(|file_name| name.as_ref().is_none_or(|n| n != file_name)) .collect_vec(); - + let entrypoint = mach.entry; AnalyzeOutputFileOutput { is_executable, interpreter: InterpreterRequirement::Default(InterpreterFlavor::Dyld), name, needed_libraries, + entrypoint: Some(entrypoint), } }, goblin::mach::Mach::Fat(mach) => { @@ -1346,6 +1375,7 @@ fn analyze_executable(bytes: &[u8]) -> tg::Result { interpreter: InterpreterRequirement::Default(InterpreterFlavor::Dyld), name, needed_libraries, + entrypoint: None, } }, }, diff --git a/packages/std/packages/wrapper/src/main.rs b/packages/std/packages/wrapper/src/main.rs index a1fb7c50..f16c84b1 100644 --- a/packages/std/packages/wrapper/src/main.rs +++ b/packages/std/packages/wrapper/src/main.rs @@ -56,9 +56,11 @@ fn main_inner() -> std::io::Result<()> { // Render the interpreter. let interpreter = handle_interpreter(manifest.interpreter.as_ref(), arg0.as_os_str())?; - let interpreter_path = interpreter.as_ref().map(|(path, _)| path).cloned(); #[cfg(feature = "tracing")] - tracing::debug!(?interpreter_path); + { + let interpreter_path = interpreter.as_ref().map(|(path, _)| path).cloned(); + tracing::debug!(?interpreter_path); + } // Render the executable. let executable_path = match &manifest.executable { @@ -66,6 +68,7 @@ fn main_inner() -> std::io::Result<()> { manifest::Executable::Content(template) => { content_executable(&tangram_std::render_template_data(template)?)? }, + manifest::Executable::Address(_) => return Err(std::io::Error::other("invalid manifest")), }; // Create the command. diff --git a/packages/std/sdk.tg.ts b/packages/std/sdk.tg.ts index 08f955c7..ccfc3c43 100644 --- a/packages/std/sdk.tg.ts +++ b/packages/std/sdk.tg.ts @@ -21,6 +21,7 @@ export * as proxy from "./sdk/proxy.tg.ts"; /** An SDK combines a compiler, a linker, a libc, and a set of basic utilities. */ export async function sdk(...args: std.Args): Promise { let { + embedWrapper, host, proxyCompiler, proxyLinker, @@ -84,6 +85,7 @@ export async function sdk(...args: std.Args): Promise { // Proxy the host toolchain. let proxyArg: proxy.Arg = { compiler: proxyCompiler, + embedWrapper, linker: proxyLinker, strip: proxyStrip, toolchain: toolchain, @@ -101,6 +103,7 @@ export namespace sdk { export type Arg = undefined | ArgObject; export type ArgObject = { + embedWrapper?: boolean | undefined; /** The machine this SDK will compile on. */ host?: string; /** An alternate linker to use. */ @@ -119,6 +122,7 @@ export namespace sdk { export const arg = async (...args: std.Args) => { let { + embedWrapper, host: host_, linker, proxyCompiler = false, @@ -165,6 +169,7 @@ export namespace sdk { } return { + embedWrapper, host, proxyCompiler, proxyLinker, @@ -899,13 +904,6 @@ export namespace sdk { } tg.assert(metadata.format === "elf"); - const expectedInterpreter = libc.interpreterName(expectedTarget); - const actualInterpreter = metadata.interpreter; - tg.assert(actualInterpreter, "File should have been dynamically linked."); - tg.assert( - actualInterpreter.includes(expectedInterpreter), - `Expected interpreter named ${expectedInterpreter} but got ${actualInterpreter}.`, - ); } else if (metadata.format === "mach-o") { tg.assert(metadata.arches.includes(expectedArch as string)); } else { diff --git a/packages/std/sdk/gnu/gcc.tg.ts b/packages/std/sdk/gnu/gcc.tg.ts index 2a594ff5..aceff572 100644 --- a/packages/std/sdk/gnu/gcc.tg.ts +++ b/packages/std/sdk/gnu/gcc.tg.ts @@ -197,6 +197,10 @@ export const build = async (arg: tg.Unresolved) => { preConfigureHook = tg`${preConfigureHook}\nexport LD_LIBRARY_PATH=${sysrootLibDir}\nexport WATERMARK=3`; } + if (variant === "stage2_full") { + preConfigureHook = tg`${preConfigureHook}\nunset LD_PRELOAD\n`; + } + // Set up phases. const configure = { pre: preConfigureHook, diff --git a/packages/std/sdk/gnu/toolchain.tg.ts b/packages/std/sdk/gnu/toolchain.tg.ts index d2e419a8..36eb0939 100644 --- a/packages/std/sdk/gnu/toolchain.tg.ts +++ b/packages/std/sdk/gnu/toolchain.tg.ts @@ -135,12 +135,12 @@ export const canadianCross = async (arg?: CanadianCrossArg) => { }); // Build a fully native GCC toolchain. - const nativeGcc = gcc.build({ + const nativeGcc = tg.build(gcc.build, { bootstrap: true, build: host, bundledSources: true, // Build gmp/isl/mpfr/mpc inline crossNative: true, // Include workaround for configuring target libraries with an unproxied compiler. - env: stage1HostSdk, + env: std.env.arg(stage1HostSdk), host, sysroot, target, @@ -229,7 +229,7 @@ export const crossToolchain = async (arg: tg.Unresolved) => { }); // Produce a toolchain containing the sysroot and a cross-compiler. - const crossGcc = await gcc.build({ + const crossGcc = await tg.build(gcc.build, { bootstrap: true, build: buildTriple, env: buildEnv, @@ -300,7 +300,7 @@ export const buildSysroot = async (arg: tg.Unresolved) => { }); // Produce the initial gcc required to build the standard C library. - const initialGccDir = await gcc.build({ + const initialGccDir = await tg.build(gcc.build, { bootstrap: true, build: buildTriple, env: buildEnv, diff --git a/packages/std/sdk/proxy.tg.ts b/packages/std/sdk/proxy.tg.ts index 64a366e6..e46c2de8 100644 --- a/packages/std/sdk/proxy.tg.ts +++ b/packages/std/sdk/proxy.tg.ts @@ -2,6 +2,7 @@ import * as bootstrap from "../bootstrap.tg.ts"; import * as std from "../tangram.ts"; import * as sdk from "../sdk.tg.ts"; import { injection } from "../wrap/injection.tg.ts"; +import * as stub from "../wrap/stub.tg.ts"; import * as workspace from "../wrap/workspace.tg.ts"; import * as gnu from "./gnu.tg.ts"; import * as llvmToolchain from "./llvm.tg.ts"; @@ -13,7 +14,8 @@ export type Arg = { build?: string; /** Should the compiler get proxied? Default: false. */ compiler?: boolean; - + /** Should the ld proxy embed wrappers? Default: false. */ + embedWrapper?: boolean | undefined; /** Should the linker get proxied? Default: true. */ linker?: boolean; /** Optional linker to use. If omitted, the linker provided by the toolchain matching the requested arguments will be used. */ @@ -85,6 +87,7 @@ export const env = async (arg?: Arg): Promise => { const ldProxyArtifact = await ldProxy({ buildToolchain: buildToolchainDir, build, + embedWrapper: arg?.embedWrapper, linker: arg.linkerExe === undefined ? os === "linux" && isLlvm @@ -234,7 +237,7 @@ type CcProxyArg = { host?: string; }; -export const ccProxy = async (arg: CcProxyArg) => { +const ccProxy = async (arg: CcProxyArg) => { const host = arg.host ?? (await std.triple.host()); const build = arg.build ?? host; const tgcc = workspace.ccProxy({ @@ -256,6 +259,7 @@ export const ccProxy = async (arg: CcProxyArg) => { type LdProxyArg = { buildToolchain: tg.Directory; build?: string; + embedWrapper?: boolean | undefined; interpreter?: tg.File | undefined; interpreterArgs?: Array; linker: tg.File | tg.Symlink | tg.Template; @@ -263,13 +267,24 @@ type LdProxyArg = { host?: string; }; -export const ldProxy = async (arg: LdProxyArg) => { +const ldProxy = async (arg: LdProxyArg) => { // Prepare the Tangram tools. const host = arg.host ?? (await std.triple.host()); const build = arg.build ?? host; const buildToolchain = arg.buildToolchain; - - // Obtain wrapper components. + const embedWrapper = arg.embedWrapper ?? std.triple.os(build) === "linux"; + + // Get the embedded wrapper artifacts. + let wrapBin = undefined; + let stubBin = undefined; + let stubElf = undefined; + + if (std.triple.os(build) === "linux") { + const stub_ = await stub.workspace(arg); + wrapBin = await stub_.get("wrap"); + stubBin = await stub_.get("stub.bin"); + stubElf = await stub_.get("stub.elf"); + } // The linker proxy is built for the build machine. const buildLinkerProxy = await workspace.ldProxy({ @@ -278,7 +293,7 @@ export const ldProxy = async (arg: LdProxyArg) => { }); // The injection library and wrapper are built for the host machine. - const hostInjectionLibrary = await injection({ + const hostInjectionLibrary = await tg.build(injection, { buildToolchain, build, host, @@ -302,6 +317,16 @@ export const ldProxy = async (arg: LdProxyArg) => { arg.interpreter ?? "none", ), TANGRAM_WRAPPER_ID: tg.Mutation.setIfUnset(hostWrapper.id), + TANGRAM_STUB_BIN_ID: stubBin + ? tg.Mutation.setIfUnset(stubBin.id) + : undefined, + TANGRAM_STUB_ELF_ID: stubElf + ? tg.Mutation.setIfUnset(stubElf.id) + : undefined, + TANGRAM_WRAP_ID: wrapBin ? tg.Mutation.setIfUnset(wrapBin.id) : undefined, + TGLD_EMBED_WRAPPER: embedWrapper + ? tg.Mutation.setIfUnset("true") + : undefined, }; // Create the linker proxy. @@ -404,11 +429,11 @@ export const testBasic = async (target?: string) => { const wrapperDeps = await output.dependencies(); const os = std.triple.os(await std.triple.host()); // This file should have dependencies for the preload and the underlying executable. On Linux, it should alos have a library path for libc and an interpreter. - const expectedLength = os === "darwin" ? 2 : 4; + const expectedLength = os === "darwin" ? 2 : 3; console.log("WRAPPER DEPS", wrapperDeps); tg.assert( Object.keys(wrapperDeps).length === expectedLength, - "expected exactly 4 dependencies", + `expected exactly 4 dependencies, got ${Object.keys(wrapperDeps).length}`, ); if (target === undefined) { @@ -724,10 +749,10 @@ export const testTransitive = async (optLevel?: OptLevel, target?: string) => { ); const libraryPaths = interpreter.libraryPaths; tg.assert(libraryPaths !== undefined); - console.log("manifest library paths", libraryPaths); + // NOTE - the input has six paths: libc, greeta, constantsa, greetb, constantsb, empty. The output will differ based on the opt level and OS. const numLibraryPaths = libraryPaths.length; - console.log("numLibraryPaths", numLibraryPaths); + switch (opt) { case "none": { // All the paths are retained. @@ -895,7 +920,7 @@ export const testSamePrefix = async (target?: string) => { ) .then(tg.File.expect); await output.store(); - console.log("wrapped_exe", output.id); + await std.assert.stdoutIncludes(output, "Hello from the shared library!"); return output; }; diff --git a/packages/std/tangram.ts b/packages/std/tangram.ts index bdb3971e..2ab90959 100644 --- a/packages/std/tangram.ts +++ b/packages/std/tangram.ts @@ -16,10 +16,12 @@ export * as packages from "./packages.tg.ts"; export * as phases from "./phases.tg.ts"; export { $, run } from "./run.tg.ts"; export { sdk } from "./sdk.tg.ts"; +export * as sdkModule from "./sdk.tg.ts"; export * as triple from "./triple.tg.ts"; export * as utils from "./utils.tg.ts"; export { wrap } from "./wrap.tg.ts"; export { stripProxy } from "./sdk/proxy.tg.ts"; +export * as bootstrap from "./bootstrap.tg.ts"; import * as bootstrap from "./bootstrap.tg.ts"; import * as bootstrapSdk from "./bootstrap/sdk.tg.ts"; diff --git a/packages/std/utils/bash.tg.ts b/packages/std/utils/bash.tg.ts index 269cd035..babc4fa8 100644 --- a/packages/std/utils/bash.tg.ts +++ b/packages/std/utils/bash.tg.ts @@ -4,6 +4,9 @@ import { autotoolsInternal, prerequisites } from "../utils.tg.ts"; import guardedGettextPatch from "./bash-use-guarded-gettext-header.patch" with { type: "file", }; +import envRestorePatch from "./patch-bash-env-restore.patch" with { + type: "file", +}; export const metadata = { homepage: "https://www.gnu.org/software/bash/", @@ -28,7 +31,7 @@ export const source = async () => { const checksum = "sha256:9599b22ecd1d5787ad7d3b7bf0c59f312b3396d1e281175dd1f8a4014da621ff"; let source = await std.download.fromGnu({ name, version, checksum }); - source = await bootstrap.patch(source, guardedGettextPatch); + source = await bootstrap.patch(source, guardedGettextPatch, envRestorePatch); return source; }; diff --git a/packages/std/utils/patch-bash-env-restore.patch b/packages/std/utils/patch-bash-env-restore.patch new file mode 100644 index 00000000..ca8670f5 --- /dev/null +++ b/packages/std/utils/patch-bash-env-restore.patch @@ -0,0 +1,54 @@ +diff --git a/shell.c b/shell.c +index ebd8965..74a66c8 100644 +--- a/shell.c ++++ b/shell.c +@@ -359,6 +359,39 @@ _cygwin32_check_tmp () + } + #endif /* __CYGWIN__ */ + ++static void tangram_restore () { ++ const char* k= NULL; ++ const char* v = NULL; ++ ++ k = "TANGRAM_CLEAR_LD_LIBRARY_PATH"; ++ v = getenv(k); ++ if (v) { ++ unsetenv("LD_LIBRARY_PATH"); ++ } ++ unsetenv(k); ++ ++ k = "TANGRAM_CLEAR_LD_PRELOAD"; ++ v = getenv(k); ++ if (v) { ++ unsetenv("LD_PRELOAD"); ++ } ++ unsetenv(k); ++ ++ k = "TANGRAM_RESTORE_LD_LIBRARY_PATH"; ++ v = getenv(k); ++ if (v) { ++ setenv(k, v, 1); ++ } ++ unsetenv(k); ++ ++ k = "TANGRAM_RESTORE_LD_PRELOAD"; ++ v = getenv(k); ++ if (v) { ++ setenv(k, v, 1); ++ } ++ unsetenv(k); ++} ++ + #if defined (NO_MAIN_ENV_ARG) + /* systems without third argument to main() */ + int +@@ -1980,6 +2013,9 @@ shell_initialize () + initialize_shell_variables (shell_environment, privileged_mode||running_setuid); + #endif + ++ /* We must do this after the shell has been initialized to use the overriden setenv/unsetenv */ ++ tangram_restore(); ++ + /* Initialize the data structures for storing and running jobs. */ + initialize_job_control (jobs_m_flag); + diff --git a/packages/std/wrap.tg.ts b/packages/std/wrap.tg.ts index 8076e186..a8639328 100644 --- a/packages/std/wrap.tg.ts +++ b/packages/std/wrap.tg.ts @@ -14,22 +14,22 @@ export { ccProxy, ldProxy, wrapper } from "./wrap/workspace.tg.ts"; /** Wrap an executable. */ export async function wrap(...args: std.Args): Promise { const arg = await wrap.arg(...args); - tg.assert(arg.executable !== undefined, "No executable was provided."); // Check if the executable is already a wrapper and get its manifest - const existingManifest = await wrap.existingManifestFromExecutableArg( - arg.executable, - ); + const [binary, existingManifest] = await wrap + .splitManifestFromExecutableArg(arg.executable) + .then((r) => (r ? r : [undefined, undefined])); const executable = existingManifest?.executable ?? (await manifestExecutableFromArg(arg.executable)); - const host = arg.host ?? (await std.triple.host()); std.triple.assert(host); + const buildTriple = arg.build ?? host; std.triple.assert(buildTriple); + const buildToolchain = arg.buildToolchain ? arg.buildToolchain : std.triple.os(host) === "linux" @@ -39,18 +39,36 @@ export async function wrap(...args: std.Args): Promise { ) : await bootstrap.sdk.env(host); - // Construct the interpreter. When an explicit interpreter is provided, - // we should prioritize it over any interpreter that might be derived from the executable. - const manifestInterpreter = await manifestInterpreterFromWrapArgObject({ - buildToolchain, - build: buildTriple, - host, - interpreter: arg.interpreter, - executable: arg.interpreter ? undefined : arg.executable, - libraryPaths: arg.libraryPaths, - libraryPathStrategy: arg.libraryPathStrategy, - preloads: arg.preloads, - }); + // Construct the interpreter. + // Cases: + // - the user provided an interpreter argument. + // - the interpreter argument is incomplete, and we need to infer the interpreter. + // - there was an interpreter in the original manifest. + // - there is no interpreter arg and no original manifest. + let manifestInterpreter = undefined; + if (arg.interpreter) { + manifestInterpreter = await manifestInterpreterFromWrapArgObject({ + buildToolchain, + build: buildTriple, + host, + interpreter: arg.interpreter, + executable: undefined, + libraryPaths: arg.libraryPaths, + libraryPathStrategy: arg.libraryPathStrategy, + }); + } else if (existingManifest?.interpreter) { + manifestInterpreter = existingManifest?.interpreter; + } else if (arg.executable && typeof arg.executable !== "number") { + manifestInterpreter = await manifestInterpreterFromWrapArgObject({ + buildToolchain, + build: buildTriple, + host, + interpreter: undefined, + executable: arg.executable, + libraryPaths: arg.libraryPaths, + libraryPathStrategy: arg.libraryPathStrategy, + }); + } // Use existing manifest values as defaults if we're wrapping a wrapper const manifestEnv = await wrap.manifestEnvFromEnvObject( @@ -81,13 +99,21 @@ export async function wrap(...args: std.Args): Promise { detectedOs === "linux" ? await bootstrap.toolchainTriple(buildTriple) : buildTriple; - const wrapper = await workspace.wrapper({ - build, - host, - }); - // Write the manifest to the wrapper and return. - return await wrap.Manifest.write(wrapper, manifest); + // If there's an existing binary, use it. + if (binary) { + return wrap.Manifest.write(binary, manifest); + } else { + // We can't wrap a non-existent binary with a manifest specifying an address. + if (manifest.executable.kind === "address") { + throw new Error("invalid manifest"); + } + let wrapper = await workspace.wrapper({ + build, + host, + }); + return wrap.Manifest.write(wrapper, manifest); + } } export default wrap; @@ -105,11 +131,14 @@ export namespace wrap { /** The build toolchain to use to produce components. Will use the default for the system if not provided. */ buildToolchain?: std.env.Arg | undefined; + /** Experimental: embed the manifest and wrapper logic into the binary. */ + embed?: boolean; + /** Environment variables to bind to the wrapper. If the executable is wrapped, they will be merged. */ env?: std.env.Arg; /** The executable to wrap. */ - executable?: string | tg.Template | tg.File | tg.Symlink; + executable?: string | tg.Template | tg.File | tg.Symlink | number; /** The host system to produce a wrapper for. */ host?: string; @@ -345,9 +374,13 @@ export namespace wrap { interpreter = existingInterpreter; } - executable = await wrap.executableFromManifestExecutable( - existingManifest.executable, - ); + // TODO: figure this API out a little better. + if (existingManifest.executable.kind !== "address") { + executable = await wrap.executableFromManifestExecutable( + existingManifest.executable, + ); + } + args_ = (args_ ?? []).concat( await Promise.all( (existingManifest.args ?? []).map(templateFromManifestTemplate), @@ -582,9 +615,39 @@ export namespace wrap { } }; + /** Utility to split a wrapped binary into its original executable and manifest, if it exists. */ + export const splitManifestFromExecutableArg = async ( + executable: + | undefined + | number + | string + | tg.Template + | tg.File + | tg.Symlink, + ): Promise<[tg.File, wrap.Manifest] | undefined> => { + let ret = undefined; + + if (executable instanceof tg.File || executable instanceof tg.Symlink) { + const f = + executable instanceof tg.Symlink + ? await executable.resolve() + : executable; + if (f instanceof tg.File) { + ret = wrap.Manifest.split(f); + } + } + return ret; + }; + /** Utility to retrieve the existing manifest from an exectuable arg, if it's a wrapper. If not, returns `undefined`. */ export const existingManifestFromExecutableArg = async ( - executable: undefined | string | tg.Template | tg.File | tg.Symlink, + executable: + | undefined + | number + | string + | tg.Template + | tg.File + | tg.Symlink, ): Promise => { let ret = undefined; if (executable instanceof tg.File || executable instanceof tg.Symlink) { @@ -715,11 +778,13 @@ export namespace wrap { export const executableFromManifestExecutable = async ( manifestExecutable: wrap.Manifest.Executable, - ): Promise => { + ): Promise => { if (manifestExecutable.kind === "content") { return templateFromManifestTemplate(manifestExecutable.value); - } else { + } else if (manifestExecutable.kind === "path") { return fileOrSymlinkFromManifestTemplate(manifestExecutable.value); + } else { + return manifestExecutable.value; } }; @@ -773,11 +838,15 @@ export namespace wrap { ); const wrappedExecutable = manifest.executable; tg.assert( - wrappedExecutable.kind !== "content", + wrappedExecutable.kind === "path", "cannot determine needed libraries for a content executable", ); + if (wrappedExecutable.kind !== "path") { + return []; + } + tg.assert(manifest.executable.kind !== "address"); const wrappedExecutableFile = await fileOrSymlinkFromManifestTemplate( - manifest.executable.value, + manifest.executable.value ); tg.assert( wrappedExecutableFile instanceof tg.File, @@ -801,14 +870,19 @@ export namespace wrap { export const unwrap = async ( file: tg.File, ): Promise => { - const manifest = await wrap.Manifest.read(file); - if (!manifest) { - throw new Error(`Cannot unwrap ${file.id}: not a Tangram wrapper.`); + const fileAndManifest = await wrap.Manifest.split(file); + if (!fileAndManifest) { + throw new Error(`Cannot unwrap ${file.id}: not wrapped executable.`); } + const [bin, manifest] = fileAndManifest; if (manifest.executable.kind === "content") { return templateFromManifestTemplate(manifest.executable.value); - } else { + } else if (manifest.executable.kind == "path") { return fileOrSymlinkFromManifestTemplate(manifest.executable.value); + } else if (manifest.executable.kind == "address") { + return bin; + } else { + throw new Error("could not extract original executable"); } }; @@ -848,6 +922,7 @@ export namespace wrap { }; export type Executable = + | { kind: "address"; value: number } | { kind: "path"; value: Manifest.Template } | { kind: "content"; value: Manifest.Template }; @@ -902,6 +977,68 @@ export namespace wrap { // The non-serializeable type of a normalized env. export type Env = tg.Mutation; + /** Split a manifest from the end of a file. */ + export const split = async ( + file: tg.File, + ): Promise<[tg.File, wrap.Manifest] | undefined> => { + // Read the magic number. + const magicNumberBytes = await file.read({ + position: `end.-8`, + length: 8, + }); + for (let i = 0; i < MANIFEST_MAGIC_NUMBER.length; i++) { + if (magicNumberBytes[i] !== MANIFEST_MAGIC_NUMBER[i]) { + return undefined; + } + } + + // Read the version. + const versionBytes = await file.read({ + position: `end.-16`, + length: 8, + }); + const version = Number( + new DataView(versionBytes.buffer).getBigUint64(0, true), + ); + + if (version === MANIFEST_VERSION_0) { + // Read the manifest length. + const lengthBytes = await file.read({ + position: `end.-24`, + length: 8, + }); + const length = Number( + new DataView(lengthBytes.buffer).getBigUint64(0, true), + ); + + // Read the manifest. + const manifestBytes = await file.read({ + position: `end.-${length + 24}`, + length, + }); + + // Deserialize the manifest. + const manifestString = tg.encoding.utf8.decode(manifestBytes); + const manifest = tg.encoding.json.decode( + manifestString, + ) as wrap.Manifest; + + // Reconstruct the original file. + let bytes = (await file.bytes()).slice( + 0, + (await file.length()) - (length + 24), + ); + let new_file = await tg.file(bytes, { + executable: true, + dependencies: file.dependencies(), + }); + + return [new_file, manifest]; + } else { + return undefined; + } + }; + /** Read a manifest from the end of a file. */ export const read = async ( file: tg.File, @@ -931,27 +1068,30 @@ export namespace wrap { const version = Number( new DataView(headerBytes.buffer).getBigUint64(position, true), ); - if (version !== MANIFEST_VERSION) { - return undefined; - } - - // Read the manifest length. - position -= 8; - const manifestLength = Number( - new DataView(headerBytes.buffer).getBigUint64(position, true), - ); - - // Read the manifest. - const manifestBytes = await file.read({ - position: `end.-${headerLength + manifestLength}`, - length: manifestLength, - }); + if (version === MANIFEST_VERSION_0) { + // Read the manifest length. + position -= 8; + const manifestLength = Number( + new DataView(headerBytes.buffer).getBigUint64(position, true), + ); - // Deserialize the manifest. - const manifestString = tg.encoding.utf8.decode(manifestBytes); - const manifest = tg.encoding.json.decode(manifestString) as wrap.Manifest; + // Read the manifest. + const manifestBytes = await file.read({ + position: `end.-${headerLength + manifestLength}`, + length: manifestLength, + }); - return manifest; + // Deserialize the manifest. + const manifestString = tg.encoding.utf8.decode(manifestBytes); + const manifest = tg.encoding.json.decode( + manifestString, + ) as wrap.Manifest; + return manifest; + } else { + throw new Error( + `unknown manifest version number ${MANIFEST_VERSION_0}`, + ); + } }; /** Write a manifest to a file. */ @@ -985,7 +1125,7 @@ export namespace wrap { // Write the version. new DataView(newBytes.buffer).setBigUint64( newBytesPosition, - BigInt(MANIFEST_VERSION), + BigInt(MANIFEST_VERSION_0), littleEndian, ); newBytesPosition += 8; @@ -1043,12 +1183,23 @@ const MANIFEST_MAGIC_NUMBER: Uint8Array = new Uint8Array([ 116, 97, 110, 103, 114, 97, 109, 0, ]); -const MANIFEST_VERSION = 0; +const MANIFEST_VERSION_0 = 0; const manifestExecutableFromArg = async ( - arg: string | tg.Template | tg.File | tg.Symlink | wrap.Manifest.Executable, + arg: + | number + | string + | tg.Template + | tg.File + | tg.Symlink + | wrap.Manifest.Executable, ): Promise => { - if (isManifestExecutable(arg)) { + if (typeof arg === "number") { + return { + kind: "address", + value: arg, + }; + } else if (isManifestExecutable(arg)) { return arg; } else if (arg instanceof tg.File || arg instanceof tg.Symlink) { const value = await manifestTemplateFromArg(arg); @@ -1220,7 +1371,7 @@ const interpreterFromArg = async ( arg instanceof tg.Symlink || arg instanceof tg.Template ) { - const executable = await std.wrap({ + const executable = await tg.build(std.wrap, { buildToolchain: buildToolchainArg, build: buildTriple, host, @@ -1266,7 +1417,7 @@ const interpreterFromArg = async ( const buildToolchain = buildToolchainArg ? buildToolchainArg : await std.env.arg(await tg.build(gnu.toolchain, { host })); - const injectionLibrary = await injection.default({ + const injectionLibrary = await tg.build(injection.injection, { buildToolchain, build: buildArg ?? detectedBuild, host, @@ -1310,7 +1461,7 @@ const interpreterFromArg = async ( const host = `${arch}-linux-musl`; const buildToolchain = bootstrap.sdk.env(host); const detectedBuild = await std.triple.host(); - const injectionLibrary = await injection.default({ + const injectionLibrary = await tg.build(injection.injection, { buildToolchain, build: buildArg ?? detectedBuild, host, @@ -1336,7 +1487,7 @@ const interpreterFromArg = async ( const buildToolchain = buildToolchainArg ? buildToolchainArg : bootstrap.sdk.env(host); - const injectionLibrary = await injection.default({ + const injectionLibrary = await tg.build(injection.injection, { buildToolchain, build: buildArg, host, @@ -1399,7 +1550,7 @@ const interpreterFromExecutableArg = async ( const host = hostArg ?? std.triple.create({ os: "darwin", arch }); const buildTriple = buildArg ?? host; const buildToolchain = bootstrap.sdk.env(host); - const injectionDylib = await injection.default({ + const injectionDylib = await tg.build(injection.injection, { buildToolchain, build: buildTriple, host, @@ -1468,7 +1619,7 @@ const interpreterFromElf = async ( ); // Obtain injection library. - const injectionLib = await injection.default({ + const injectionLib = await tg.build(injection.injection, { buildToolchain, build: buildTriple, host, @@ -2375,6 +2526,9 @@ async function* manifestMutationDependencies( async function* manifestExecutableDependencies( executable: wrap.Manifest.Executable, ): AsyncGenerator { + if (executable.kind === "address") { + return; + } yield* manifestTemplateDependencies(executable.value); } @@ -2495,13 +2649,6 @@ export const testSingleArgObjectNoMutations = async () => { const origManifest = await wrap.Manifest.read(executable); tg.assert(origManifest); const origManifestExecutable = origManifest.executable; - tg.assert(origManifestExecutable.kind === "path"); - const origExecutable = await wrap - .executableFromManifestExecutable(origManifestExecutable) - .then(tg.File.expect); - await origExecutable.store(); - const origExecutableId = origExecutable.id; - console.log("origExecutable", origExecutableId); const buildToolchain = await bootstrap.sdk.env(await std.triple.host()); @@ -2541,6 +2688,13 @@ export const testSingleArgObjectNoMutations = async () => { "Expected argv[0] to be set to the wrapper that was invoked", ); } else if (os === "darwin") { + tg.assert(origManifestExecutable.kind === "path"); + const origExecutable = await wrap + .executableFromManifestExecutable(origManifestExecutable) + .then(tg.File.expect); + await origExecutable.store(); + const origExecutableId = origExecutable.id; + console.log("origExecutable", origExecutableId); tg.assert( text.match( new RegExp(`_NSGetExecutablePath: .*\\.tangram/artifacts/${wrapperID}`), diff --git a/packages/std/wrap/injection.tg.ts b/packages/std/wrap/injection.tg.ts index bcb60923..58226a36 100644 --- a/packages/std/wrap/injection.tg.ts +++ b/packages/std/wrap/injection.tg.ts @@ -13,6 +13,7 @@ type Arg = { export const injection = async (unresolved: tg.Unresolved) => { const arg = await tg.resolve(unresolved); + const host = arg.host ?? (await std.triple.host()); const build = arg.build ?? host; const os = std.triple.os(host); @@ -33,7 +34,7 @@ export const injection = async (unresolved: tg.Unresolved) => { if (std.triple.os(build) === "linux") { additionalArgs.push("-Wl,--no-as-needed", "-s"); } - const injection = dylib({ + const injection = tg.build(dylib, { build, buildToolchain, env, @@ -55,8 +56,6 @@ export const injection = async (unresolved: tg.Unresolved) => { } }; -export default injection; - type MacOsInjectionArg = { buildToolchain: std.env.Arg; env?: std.env.Arg; @@ -85,7 +84,7 @@ export const macOsInjection = async (arg: MacOsInjectionArg) => { // Compile arm64 dylib. const arm64Args = additionalArgs.concat(["--target=aarch64-apple-darwin"]); - const arm64injection = dylib({ + const arm64injection = await tg.build(dylib, { ...arg, source, additionalArgs: arm64Args, @@ -94,7 +93,7 @@ export const macOsInjection = async (arg: MacOsInjectionArg) => { // Compile amd64 dylib. const amd64Args = additionalArgs.concat(["--target=x86_64-apple-darwin"]); - const amd64injection = dylib({ + const amd64injection = await tg.build(dylib, { ...arg, source, additionalArgs: amd64Args, @@ -127,7 +126,6 @@ export const dylib = async (arg: DylibArg): Promise => { const build = arg.build ?? host; // On macOS builds, the compiler is clang, so no triple prefix. const useTriplePrefix = std.triple.os(build) === "linux" && build !== host; - let args: Array> = [ "-shared", "-fPIC", diff --git a/packages/std/wrap/injection/linux/lib.c b/packages/std/wrap/injection/linux/lib.c index 1b512a60..eb0f6d47 100644 --- a/packages/std/wrap/injection/linux/lib.c +++ b/packages/std/wrap/injection/linux/lib.c @@ -1,211 +1,70 @@ #define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include #include +#include +#include #include -#include - -static char* IDENTITY_PATH = NULL; - -__attribute__((constructor)) -void tangram_injection() { - // Set the identity path. - char* value = getenv("TANGRAM_INJECTION_IDENTITY_PATH"); - if (value == NULL) { - fprintf(stderr, "Error: TANGRAM_INJECTION_IDENTITY_PATH is not set.\n"); - exit(1); +#include +#define TRACE(...) if (tracing_enabled) { fprintf(stderr, "injection: "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); } + +static bool CALLED = false; +extern char** environ; + +static void unsetenv_internal (bool tracing_enabled, const char* name) { + TRACE("clearing %s", name); + char** itr = environ; + for (; *itr; itr++) { + char* e = *itr; + size_t len = strlen(e); + int starts_with = strncmp(e, name, len) == 0; + if (starts_with && e[len] == '=') { + e[len+1] = 0; // set this to the empty string. + } } - IDENTITY_PATH = (char*)malloc(strlen(value) + 1); - strcpy(IDENTITY_PATH, value); - unsetenv("TANGRAM_INJECTION_IDENTITY_PATH"); -} - -// Return true if `path` is "/proc/self/exe" or "/proc/$current_pid/exe". -static bool path_is_proc_self_exe(const char* path) { - // Immediately bail on relative paths. - if (path[0] != '/') { - return false; - } - - // Check if the path is "/proc/self/exe". - if (strcmp(path, "/proc/self/exe") == 0) { - return true; + if (unsetenv(name)) { + TRACE("warning: could not unset %s errno %d", name, errno); } - - // Check if the path is "/proc/$current_pid/exe". - char path_with_pid[PATH_MAX]; - snprintf(path_with_pid, sizeof(path_with_pid), "/proc/%d/exe", getpid()); - if (strcmp(path, path_with_pid) == 0) { - return true; - } - - // Otherwise, it's a different path. - return false; -} - -/** - * Write the value of `proc_self_exe_path()` into `buf` and return the number of bytes written. This function mimics the behavior of the `readlink` family of functions. - * - * Unlike `readlink`, the program will exit if an error is encountered. Any errors are assumed to be bugs in the implementation of this function. - */ -ssize_t proc_self_exe_readlink(char* buf, size_t bufsiz) { - char* path = IDENTITY_PATH; - size_t path_length = strlen(path); - size_t copy_length = MIN(path_length, bufsiz); - memcpy(buf, path, copy_length); - return copy_length; -} - -/*********** readlink **********/ - -typedef ssize_t (*_real_readlink_t)( - const char* pathname, - char* buf, - size_t bufsiz -); - -ssize_t _real_readlink(const char* pathname, char* buf, size_t bufsiz) { - return ((_real_readlink_t)dlsym(RTLD_NEXT, "readlink"))(pathname, buf, bufsiz); -} - -ssize_t readlink(const char* pathname, char* buf, size_t bufsiz) { - if (path_is_proc_self_exe(pathname)) { - return proc_self_exe_readlink(buf, bufsiz); - } - - return _real_readlink(pathname, buf, bufsiz); -} - -/*********** readlinkat **********/ - -typedef ssize_t (*_real_readlinkat_t)( - int dirfd, - const char* pathname, - char* buf, - size_t bufsiz -); - -ssize_t _real_readlinkat(int dirfd, const char* pathname, char* buf, size_t bufsiz) { - return ((_real_readlinkat_t)dlsym(RTLD_NEXT, "readlinkat"))( - dirfd, - pathname, - buf, - bufsiz - ); -} - -ssize_t readlinkat(int dirfd, const char* pathname, char* buf, size_t bufsiz) { - if (path_is_proc_self_exe(pathname)) { - // NOTE: Since `pathname` is absolute, `dirfd` is ignored. - return proc_self_exe_readlink(buf, bufsiz); + if (getenv(name)) { + TRACE("warning: could not unset %s, value still set", name); } - - return _real_readlinkat(dirfd, pathname, buf, bufsiz); } -/*********** glibc __readlink **********/ - -ssize_t __readlink(const char* pathname, char* buf, size_t bufsiz) { - if (path_is_proc_self_exe(pathname)) { - return proc_self_exe_readlink(buf, bufsiz); +__attribute__((constructor)) +static void restore () { + if (CALLED) { + return; } + CALLED = true; + bool tracing_enabled = getenv("TANGRAM_TRACING") != NULL; + TRACE("restoring environment"); - return _real_readlink(pathname, buf, bufsiz); -} - -/*********** open **********/ - -typedef ssize_t (*_real_open_t)( - const char* pathname, - int flags, - mode_t mode -); + const char* k= NULL; + const char* v = NULL; -int _real_open(const char* pathname, int flags, mode_t mode) { - return ((_real_open_t)dlsym(RTLD_NEXT, "open"))(pathname, flags, mode); -} - -int open(const char* pathname, int flags, mode_t mode) { - if (path_is_proc_self_exe(pathname)) { - return _real_open(IDENTITY_PATH, flags, mode); + k = "TANGRAM_CLEAR_LD_LIBRARY_PATH"; + v = getenv(k); + if (v) { + unsetenv_internal(tracing_enabled, "LD_LIBRARY_PATH"); } + unsetenv_internal(tracing_enabled, k); - return _real_open(pathname, flags, mode); -} - -/*********** open64 **********/ - -typedef ssize_t (*_real_open64_t)( - const char* pathname, - int flags, - mode_t mode -); - -int _real_open64(const char* pathname, int flags, mode_t mode) { - return ((_real_open64_t)dlsym(RTLD_NEXT, "open64"))(pathname, flags, mode); -} - -int open64(const char* pathname, int flags, mode_t mode) { - if (path_is_proc_self_exe(pathname)) { - return _real_open64(IDENTITY_PATH, flags, mode); + k = "TANGRAM_CLEAR_LD_PRELOAD"; + v = getenv(k); + if (v) { + unsetenv_internal(tracing_enabled, "LD_PRELOAD"); } + unsetenv_internal(tracing_enabled, k); - return _real_open64(pathname, flags, mode); -} - -/*********** openat **********/ - -typedef ssize_t (*_real_openat_t)( - int dirfd, - const char* pathname, - int flags, - mode_t mode -); - -int _real_openat(int dirfd, const char* pathname, int flags, mode_t mode) { - return ((_real_openat_t)dlsym(RTLD_NEXT, "openat"))( - dirfd, - pathname, - flags, - mode - ); -} - -int openat(int dirfd, const char* pathname, int flags, mode_t mode) { - if (path_is_proc_self_exe(pathname)) { - return _real_openat(dirfd, IDENTITY_PATH, flags, mode); + k = "TANGRAM_RESTORE_LD_LIBRARY_PATH"; + v = getenv(k); + if (v) { + setenv(k, v, 1); } + unsetenv_internal(tracing_enabled, k); - return _real_openat(dirfd, pathname, flags, mode); -} - -/*********** openat64 **********/ - -typedef ssize_t (*_real_openat64_t)( - int dirfd, - const char* pathname, - int flags, - mode_t mode -); - -int _real_openat64(int dirfd, const char* pathname, int flags, mode_t mode) { - return ((_real_openat64_t)dlsym(RTLD_NEXT, "openat64"))( - dirfd, - pathname, - flags, - mode - ); -} - -int openat64(int dirfd, const char* pathname, int flags, mode_t mode) { - if (path_is_proc_self_exe(pathname)) { - return _real_openat64(dirfd, IDENTITY_PATH, flags, mode); + k = "TANGRAM_RESTORE_LD_PRELOAD"; + v = getenv(k); + if (v) { + setenv(k, v, 1); } - - return _real_openat64(dirfd, pathname, flags, mode); + unsetenv_internal(tracing_enabled, k); } diff --git a/packages/std/wrap/stub.tg.ts b/packages/std/wrap/stub.tg.ts new file mode 100644 index 00000000..020c56a0 --- /dev/null +++ b/packages/std/wrap/stub.tg.ts @@ -0,0 +1,300 @@ +import * as bootstrap from "../bootstrap.tg.ts"; +import * as gnu from "../sdk/gnu.tg.ts"; +import * as llvm from "../sdk/llvm.tg.ts"; +import * as std from "../tangram.ts"; +import * as ogWorkspace from "./workspace.tg.ts"; +import packages from "../packages" with { type: "directory" }; + +type WorkspaceArg = { + host?: string; + target?: string; + release?: boolean; + source?: tg.Directory; + verbose?: boolean; +}; + +type BuildArg = { + host?: string; + release?: boolean; + source: tg.Directory; + target?: string; + verbose?: boolean; +}; + +type WrapArg = { + workspace?: WorkspaceArg; + executable: tg.File; +}; + +export const embedWrapper = async (arg: WrapArg) => { + // Build the wrap workspace. + const workspace_ = workspace(arg.workspace ?? {}); + const unwrapped = std.wrap.unwrap(arg.executable); + const manifest = tg.file( + std.wrap.Manifest.read(arg.executable).then(JSON.stringify), + ); + const env: Array> = [{ utils: false }]; + let build = { + command: tg` + ${workspace_}/wrap ${unwrapped} $OUTPUT ${workspace_}/stub.elf ${workspace_}/stub.bin ${manifest} + `, + }; + return tg.build(std.phases.run, { + bootstrap: true, + env: std.env.arg(...env), + phases: { build }, + }); +}; + +export const workspace = async (arg: WorkspaceArg) => { + const { + target: target_, + host: host_, + release = true, + source: source_, + verbose = false, + } = await tg.resolve(arg); + const host = host_ ?? (await std.triple.host()); + + // Ensure we're only building for Linux. + const target = target_ ?? host; + + if (std.triple.os(target) !== "linux") { + throw new Error("embeded wrapper support is limited to linux targets"); + } + + // Get the source. + const source: tg.Directory = source_ ? source_ : packages; + return build({ + host, + verbose, + target, + source, + }).then(tg.Directory.expect); +}; + +export const bootstrapToolchain = async (host?: string) => { + let host_ = host ?? (await std.triple.host()); + return bootstrap.sdk.env(host_); +}; + +export const build = async (unresolved: tg.Unresolved) => { + const arg = await tg.resolve(unresolved); + const release = arg.release ?? true; + const source = arg.source; + let host_ = arg.host ?? (await std.triple.host()); + const host = standardizeTriple(host_); + let target_ = arg.target ?? host; + const target = standardizeTriple(target_); + const system = std.triple.archAndOs(host); + const hostOs = std.triple.os(system); + let verbose = arg.verbose; + + const isCross = + std.triple.arch(host_) !== std.triple.arch(target_) || + std.triple.os(host_) !== std.triple.os(target_); + let prefix = ``; + let suffix = tg``; + if (hostOs === "linux" && isCross) { + prefix = `${target}-`; + } + + // Get the appropriate toolchain directory. + // You need a build toolchian AND a host toolchain. These may be the same. + let buildToolchain = undefined; + let hostToolchain = undefined; + if (hostOs === "linux") { + if (!isCross) { + buildToolchain = await bootstrap.sdk.env(host_); + host_ = await bootstrap.toolchainTriple(host_); + target_ = host_; + } else { + buildToolchain = await bootstrap.sdk.env(host_); + hostToolchain = await tg.build(llvm.toolchain, { host: host_, target }); + } + } else { + if (isCross) { + buildToolchain = await bootstrap.sdk.env(host_); + hostToolchain = await tg + .build(llvm.toolchain, { host, target }) + .then(tg.Directory.expect); + const { directory: targetDirectory } = await std.sdk.toolchainComponents({ + env: await std.env.arg(hostToolchain, { utils: false }), + host: host_, + }); + suffix = tg.Template + .raw` -target ${target} --sysroot ${targetDirectory}/${target}/sysroot`; + } else { + buildToolchain = await bootstrap.sdk.env(host_); + } + } + const env: Array> = [ + { utils: false }, + buildToolchain, + hostToolchain, + { + [`AR_${tripleToEnvVar(target)}`]: `${prefix}ar`, + [`CC_${tripleToEnvVar(target)}`]: tg`${prefix}cc${suffix}`, + [`LD_${tripleToEnvVar(target)}`]: tg`${prefix}ld${suffix}`, + }, + ]; + + // Compile the stub binary. + const arch = std.triple.arch(target_); + const releaseArgs = release ? "-Os" : ""; + const verboseArgs = verbose ? "-v" : ""; + let buildPhase = { + command: tg` + # Create output directory. + mkdir $OUTPUT + + # Compile our sources + $CC_${tripleToEnvVar(target)} \ + ${source}/stub/src/${arch}/start.s \ + ${source}/stub/src/stub.c \ + ${source}/stub/src/manifest.c \ + ${source}/stub/src/manifest/json.c \ + ${source}/stub/src/util.c \ + -I${source}/stub/include \ + -nostdlib \ + -nolibc \ + -ffreestanding \ + -fno-stack-protector \ + -static \ + -static-libgcc \ + -fno-asynchronous-unwind-tables \ + -fPIC \ + -Werror \ + -Os \ + -Wl,-T${source}/stub/link.ld \ + -o $OUTPUT/stub.elf + + # Compile the stub. + echo "compiled stub.elf" + + # Extract the binary. + objcopy -O binary $OUTPUT/stub.elf $OUTPUT/stub.bin + + # Compile the wrap binary. + $CC_${tripleToEnvVar(host)} \ + ${source}/stub/src/wrap.c \ + -I${source}/stub/include \ + -static \ + -o $OUTPUT/wrap ${releaseArgs} ${verboseArgs} + echo "compiled wrap" + `, + }; + return await tg.build(std.phases.run, { + bootstrap: true, + env: std.env.arg(...env), + phases: { prepare: undefined, build: buildPhase, install: undefined }, + command: { + host: system, + }, + network: false, + }); +}; + +/* Ensure the passed triples are what we expect, musl on linux and standard for macOS. */ +const standardizeTriple = (triple: string): string => { + const components = std.triple.components(triple); + const os = components.os; + + if (os === "darwin") { + return std.triple.create({ + ...components, + vendor: "apple", + }); + } else if (os === "linux") { + return std.triple.create({ + ...components, + vendor: "unknown", + environment: "musl", + }); + } else { + return tg.unreachable(); + } +}; + +const tripleToEnvVar = (triple: string, upcase?: boolean) => { + const allCaps = upcase ?? false; + let result = triple.replace(/-/g, "_"); + if (allCaps) { + result = result.toUpperCase(); + } + return result; +}; + +export const test = async () => { + // Detect the host triple. + const host = await std.triple.host(); + + // Determine the target triple with differing architecture from the host. + const hostArch = std.triple.arch(host); + tg.assert(hostArch); + + // const buildToolchain = await bootstrap.sdk.env(host); + return workspace({ host }); +}; + +export const testCompile = async () => { + const toolchain = std.bootstrap.sdk(); + const source = tg.directory({ + "main.c": tg.file(` + #include + extern char** environ; + int main(int argc, const char** argv) { + for (int i = 0; i < 2; i++) { + const char* var = i ? "envp" : "argv"; + const char** s = i ? (const char**)environ : argv; + int j = 0; + for (; *s; s++, j++) { + printf("%s[%d] = %s\\n", var, j, *s); + } + } + return 0; + } + `), + }); + return std.run` + gcc ${source}/main.c -o $OUTPUT + ` + .bootstrap(true) + .env( + toolchain, + { utils: false }, + { + TANGRAM_TRACING: "true", + TANGRAM_LINKER_TRACING: "tangram_ld_proxy=trace", + }, + ); +}; + +export const testFull = async () => { + const toolchain = std.sdk(); + const source = tg.directory({ + "main.c": tg.file(` + #include + extern char** environ; + int main(int argc, const char** argv) { + for (int i = 0; i < 2; i++) { + const char* var = i ? "envp" : "argv"; + const char** s = i ? (const char**)environ : argv; + int j = 0; + for (; *s; s++, j++) { + printf("%s[%d] = %s\\n", var, j, *s); + } + } + return 0; + } + `), + }); + let file = std.$` + gcc ${source}/main.c -o $OUTPUT + ` + .env(toolchain, { utils: false }, { TANGRAM_TRACING: "true" }) + .then(tg.File.expect); + return std.wrap(file, { + env: { CUSTOM_ENV: "true", TANGRAM_SUPPRESS_ENV: "true" }, + }); +}; diff --git a/packages/std/wrap/workspace.tg.ts b/packages/std/wrap/workspace.tg.ts index 78be3277..1146e6d3 100644 --- a/packages/std/wrap/workspace.tg.ts +++ b/packages/std/wrap/workspace.tg.ts @@ -417,7 +417,7 @@ export const build = async (unresolved: tg.Unresolved) => { .then(tg.Directory.expect); }; -/* Ensure the passed triples are what we expect, musl on linxu and standard for macOS. */ +/* Ensure the passed triples are what we expect, musl on linux and standard for macOS. */ const standardizeTriple = (triple: string): string => { const components = std.triple.components(triple); const os = components.os; @@ -489,7 +489,6 @@ export const testCross = async () => { os: "linux", environment: "gnu", }); - const crossWorkspace = await tg.build(workspace, { build: host, host: target,