From 1006d50798a17751d129298c3ebea05b1fcab3b2 Mon Sep 17 00:00:00 2001 From: Adrian Taylor Date: Sun, 16 Feb 2025 16:25:28 +0000 Subject: [PATCH] Distinguish char16_t. With a new command-line option, this ensures that char16_t is distinct from uint16_t in generated bindings. On some platforms these are distinct types, so it can be important for downstream post processors to spot the difference. See the documentation on the new command-line option for expected behavior and usage here. Part of https://github.com/google/autocxx/issues/124. --- .../tests/expectations/tests/char16_t.rs | 7 ++++++ bindgen-tests/tests/headers/char16_t.hpp | 4 ++++ bindgen/codegen/helpers.rs | 6 +++++ bindgen/ir/context.rs | 3 +++ bindgen/ir/int.rs | 9 +++++--- bindgen/options/cli.rs | 5 +++++ bindgen/options/mod.rs | 22 +++++++++++++++++++ 7 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 bindgen-tests/tests/expectations/tests/char16_t.rs create mode 100644 bindgen-tests/tests/headers/char16_t.hpp diff --git a/bindgen-tests/tests/expectations/tests/char16_t.rs b/bindgen-tests/tests/expectations/tests/char16_t.rs new file mode 100644 index 0000000000..82d30fe517 --- /dev/null +++ b/bindgen-tests/tests/expectations/tests/char16_t.rs @@ -0,0 +1,7 @@ +#![allow(dead_code, non_snake_case, non_camel_case_types, non_upper_case_globals)] +#[repr(transparent)] +pub struct bindgen_cchar16_t(u16); +unsafe extern "C" { + #[link_name = "\u{1}_Z16receive_char16_tDs"] + pub fn receive_char16_t(input: bindgen_cchar16_t); +} diff --git a/bindgen-tests/tests/headers/char16_t.hpp b/bindgen-tests/tests/headers/char16_t.hpp new file mode 100644 index 0000000000..35e1f16dd3 --- /dev/null +++ b/bindgen-tests/tests/headers/char16_t.hpp @@ -0,0 +1,4 @@ +// bindgen-flags: --use-distinct-char16-t --raw-line '#[repr(transparent)] pub struct bindgen_cchar16_t(u16);' -- -x c++ -std=c++14 + +void receive_char16_t(char16_t input) { +} diff --git a/bindgen/codegen/helpers.rs b/bindgen/codegen/helpers.rs index 7b09ed7cfb..70f0125931 100644 --- a/bindgen/codegen/helpers.rs +++ b/bindgen/codegen/helpers.rs @@ -187,6 +187,12 @@ pub(crate) mod ast_ty { match ik { IntKind::Bool => syn::parse_quote! { bool }, IntKind::Char { .. } => raw_type(ctx, "c_char"), + // The following is used only when an unusual command-line + // argument is used. bindgen_cchar16_t is not a real type; + // but this allows downstream postprocessors to distinguish + // this case and do something special for C++ bindings + // containing char16_t. + IntKind::Char16 => syn::parse_quote! { bindgen_cchar16_t }, IntKind::SChar => raw_type(ctx, "c_schar"), IntKind::UChar => raw_type(ctx, "c_uchar"), IntKind::Short => raw_type(ctx, "c_short"), diff --git a/bindgen/ir/context.rs b/bindgen/ir/context.rs index 78790d61c4..99c75d63d8 100644 --- a/bindgen/ir/context.rs +++ b/bindgen/ir/context.rs @@ -1980,6 +1980,9 @@ If you encounter an error missing from this list, please file an issue or a PR!" CXType_Short => TypeKind::Int(IntKind::Short), CXType_UShort => TypeKind::Int(IntKind::UShort), CXType_WChar => TypeKind::Int(IntKind::WChar), + CXType_Char16 if self.options().use_distinct_char16_t => { + TypeKind::Int(IntKind::Char16) + } CXType_Char16 => TypeKind::Int(IntKind::U16), CXType_Char32 => TypeKind::Int(IntKind::U32), CXType_Long => TypeKind::Int(IntKind::Long), diff --git a/bindgen/ir/int.rs b/bindgen/ir/int.rs index 4b49931ed8..4caa6b2d06 100644 --- a/bindgen/ir/int.rs +++ b/bindgen/ir/int.rs @@ -54,9 +54,12 @@ pub enum IntKind { /// A 16-bit signed integer. I16, - /// Either a `char16_t` or a `wchar_t`. + /// A 16-bit integer, used only for enum size representation. U16, + /// Either a `char16_t` or a `wchar_t`. + Char16, + /// A 32-bit signed integer. I32, @@ -94,7 +97,7 @@ impl IntKind { // to know whether it is or not right now (unlike char, there's no // WChar_S / WChar_U). Bool | UChar | UShort | UInt | ULong | ULongLong | U8 | U16 | - WChar | U32 | U64 | U128 => false, + Char16 | WChar | U32 | U64 | U128 => false, SChar | Short | Int | Long | LongLong | I8 | I16 | I32 | I64 | I128 => true, @@ -110,7 +113,7 @@ impl IntKind { use self::IntKind::*; Some(match *self { Bool | UChar | SChar | U8 | I8 | Char { .. } => 1, - U16 | I16 => 2, + U16 | I16 | Char16 => 2, U32 | I32 => 4, U64 | I64 => 8, I128 | U128 => 16, diff --git a/bindgen/options/cli.rs b/bindgen/options/cli.rs index 8c4c05bc84..1efddb02f3 100644 --- a/bindgen/options/cli.rs +++ b/bindgen/options/cli.rs @@ -441,6 +441,9 @@ struct BindgenCommand { /// Always output explicit padding fields. #[arg(long)] explicit_padding: bool, + /// Use distinct char16_t + #[arg(long)] + use_distinct_char16_t: bool, /// Enables generation of vtable functions. #[arg(long)] vtable_generation: bool, @@ -629,6 +632,7 @@ where translate_enum_integer_types, c_naming, explicit_padding, + use_distinct_char16_t, vtable_generation, sort_semantically, merge_extern_blocks, @@ -926,6 +930,7 @@ where translate_enum_integer_types, c_naming, explicit_padding, + use_distinct_char16_t, vtable_generation, sort_semantically, merge_extern_blocks, diff --git a/bindgen/options/mod.rs b/bindgen/options/mod.rs index 9d1d195980..1a675401a4 100644 --- a/bindgen/options/mod.rs +++ b/bindgen/options/mod.rs @@ -153,6 +153,28 @@ macro_rules! options { } options! { + /// Whether we should distinguish between 'char16_t' and 'u16'. + /// As standard, bindgen represents `char16_t` as `u16`. + /// Rust does not have a `std::os::raw::c_char16_t` type, and thus + /// we can't use a built-in Rust type in the generated bindings. + /// But for some uses of bindgen, especially when downstream + /// post-processing occurs, it's important to distinguish `char16_t` + /// from normal `uint16_t`. When this option is enabled, bindgen + /// generates a fake type called `bindgen_cchar16_t`. Downstream + /// code post-processors should arrange to replace this with a + /// real type. + use_distinct_char16_t: bool { + methods: { + /// If this is true, denote 'char16_t' as a separate type from 'u16' + /// Disabled by default. + pub fn use_distinct_char16_t(mut self, doit: bool) -> Builder { + self.options.use_distinct_char16_t = doit; + self + } + }, + as_args: "--use-distinct-char16-t", + }, + /// Types that have been blocklisted and should not appear anywhere in the generated code. blocklisted_types: RegexSet { methods: {