Skip to content

Commit 4e859ff

Browse files
committed
Fix ns_string! duplication and inefficiency
1 parent e4c018e commit 4e859ff

File tree

9 files changed

+146
-1007
lines changed

9 files changed

+146
-1007
lines changed

objc2-foundation/src/__string_macro.rs

Lines changed: 74 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -257,94 +257,95 @@ const fn decode_utf8(s: &[u8], i: usize) -> (usize, u32) {
257257
#[macro_export]
258258
macro_rules! ns_string {
259259
($s:expr) => {{
260-
// Note that this always uses full paths to items from `$crate`. This
261-
// does not import any items because doing so could cause ambiguity if
262-
// the same names are exposed at the call site of this macro.
260+
// Note: We create both the ASCII + NUL and the UTF-16 + NUL versions
261+
// of the string, since we can't conditionally create a static.
263262
//
264-
// The only names directly used are expressions, whose names shadow any
265-
// other names outside of this macro.
263+
// Since we don't add the `#[used]` attribute, Rust can fairly easily
264+
// figure out that one of the variants are never used, and simply
265+
// exclude it.
266266

267267
const INPUT: &[u8] = $s.as_bytes();
268268

269-
if $crate::__string_macro::is_ascii_no_nul(INPUT) {
270-
// Convert the input slice to an array with known length so that
271-
// we can add a NUL byte to it.
272-
//
273-
// The section is the same as what clang sets, see:
274-
// https://github.com/llvm/llvm-project/blob/release/13.x/clang/lib/CodeGen/CodeGenModule.cpp#L5192
275-
#[link_section = "__TEXT,__cstring,cstring_literals"]
276-
static ASCII: [u8; INPUT.len() + 1] = {
277-
// Zero-fill with INPUT.len() + 1
278-
let mut res: [u8; INPUT.len() + 1] = [0; INPUT.len() + 1];
279-
let mut i = 0;
280-
// Fill with data from INPUT
281-
while i < INPUT.len() {
282-
res[i] = INPUT[i];
283-
i += 1;
284-
}
285-
// Now contains INPUT + '\0'
286-
res
287-
};
269+
// Convert the input slice to a C-style string with a NUL byte.
270+
//
271+
// The section is the same as what clang sets, see:
272+
// https://github.com/llvm/llvm-project/blob/release/13.x/clang/lib/CodeGen/CodeGenModule.cpp#L5192
273+
#[link_section = "__TEXT,__cstring,cstring_literals"]
274+
static ASCII: [u8; INPUT.len() + 1] = {
275+
// Zero-fill with INPUT.len() + 1
276+
let mut res: [u8; INPUT.len() + 1] = [0; INPUT.len() + 1];
277+
let mut i = 0;
278+
// Fill with data from INPUT
279+
while i < INPUT.len() {
280+
res[i] = INPUT[i];
281+
i += 1;
282+
}
283+
// Now contains INPUT + '\0'
284+
res
285+
};
288286

289-
#[link_section = "__DATA,__cfstring"]
290-
static CFSTRING: $crate::__string_macro::CFConstString = unsafe {
291-
$crate::__string_macro::CFConstString::new_ascii(
292-
&$crate::__string_macro::__CFConstantStringClassReference,
293-
&ASCII,
294-
)
295-
};
287+
// The full UTF-16 contents along with the written length.
288+
const UTF16_FULL: (&[u16; INPUT.len()], usize) = {
289+
let mut out = [0u16; INPUT.len()];
290+
let mut iter = $crate::__string_macro::EncodeUtf16Iter::new(INPUT);
291+
let mut written = 0;
296292

297-
CFSTRING.as_nsstring()
298-
} else {
299-
// The full UTF-16 contents along with the written length.
300-
const UTF16_FULL: (&[u16; INPUT.len()], usize) = {
301-
let mut out = [0u16; INPUT.len()];
302-
let mut iter = $crate::__string_macro::EncodeUtf16Iter::new(INPUT);
303-
let mut written = 0;
304-
305-
while let Some((state, chars)) = iter.next() {
306-
iter = state;
307-
out[written] = chars.repr[0];
308-
written += 1;
293+
while let Some((state, chars)) = iter.next() {
294+
iter = state;
295+
out[written] = chars.repr[0];
296+
written += 1;
309297

310-
if chars.len > 1 {
311-
out[written] = chars.repr[1];
312-
written += 1;
313-
}
298+
if chars.len > 1 {
299+
out[written] = chars.repr[1];
300+
written += 1;
314301
}
302+
}
315303

316-
(&{ out }, written)
317-
};
318-
319-
// Convert the slice to an array with known length so that we can
320-
// add a NUL byte to it.
321-
//
322-
// The section is the same as what clang sets, see:
323-
// https://github.com/llvm/llvm-project/blob/release/13.x/clang/lib/CodeGen/CodeGenModule.cpp#L5193
324-
#[link_section = "__TEXT,__ustring"]
325-
static UTF16: [u16; UTF16_FULL.1 + 1] = {
326-
// Zero-fill with UTF16_FULL.1 + 1
327-
let mut res: [u16; UTF16_FULL.1 + 1] = [0; UTF16_FULL.1 + 1];
328-
let mut i = 0;
329-
// Fill with data from UTF16_FULL.0 up until UTF16_FULL.1
330-
while i < UTF16_FULL.1 {
331-
res[i] = UTF16_FULL.0[i];
332-
i += 1;
333-
}
334-
// Now contains UTF16_FULL.1 + NUL
335-
res
336-
};
304+
(&{ out }, written)
305+
};
337306

338-
#[link_section = "__DATA,__cfstring"]
339-
static CFSTRING: $crate::__string_macro::CFConstString = unsafe {
307+
// Convert the slice to an UTF-16 array + a final NUL byte.
308+
//
309+
// The section is the same as what clang sets, see:
310+
// https://github.com/llvm/llvm-project/blob/release/13.x/clang/lib/CodeGen/CodeGenModule.cpp#L5193
311+
#[link_section = "__TEXT,__ustring"]
312+
static UTF16: [u16; UTF16_FULL.1 + 1] = {
313+
// Zero-fill with UTF16_FULL.1 + 1
314+
let mut res: [u16; UTF16_FULL.1 + 1] = [0; UTF16_FULL.1 + 1];
315+
let mut i = 0;
316+
// Fill with data from UTF16_FULL.0 up until UTF16_FULL.1
317+
while i < UTF16_FULL.1 {
318+
res[i] = UTF16_FULL.0[i];
319+
i += 1;
320+
}
321+
// Now contains UTF16_FULL.1 + NUL
322+
res
323+
};
324+
325+
// Create the constant string structure, and store it in a static
326+
// within a special section.
327+
//
328+
// The section is the same as what clang sets, see:
329+
// https://github.com/llvm/llvm-project/blob/release/13.x/clang/lib/CodeGen/CodeGenModule.cpp#L5243
330+
#[link_section = "__DATA,__cfstring"]
331+
static CFSTRING: $crate::__string_macro::CFConstString = unsafe {
332+
if $crate::__string_macro::is_ascii_no_nul(INPUT) {
333+
// This is technically an optimization (UTF-16 strings are
334+
// always valid), but it's a fairly important one!
335+
$crate::__string_macro::CFConstString::new_ascii(
336+
&$crate::__string_macro::__CFConstantStringClassReference,
337+
&ASCII,
338+
)
339+
} else {
340340
$crate::__string_macro::CFConstString::new_utf16(
341341
&$crate::__string_macro::__CFConstantStringClassReference,
342342
&UTF16,
343343
)
344-
};
344+
}
345+
};
345346

346-
CFSTRING.as_nsstring()
347-
}
347+
// Return &'static NSString
348+
CFSTRING.as_nsstring()
348349
}};
349350
}
350351

test-assembly/crates/test_ns_string/expected/apple-aarch64.s

Lines changed: 13 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -2,83 +2,32 @@
22
.globl _get_ascii
33
.p2align 2
44
_get_ascii:
5-
stp x29, x30, [sp, #-16]!
6-
mov x29, sp
75
Lloh0:
8-
adrp x0, l___unnamed_1@PAGE
6+
adrp x0, SYM(test_ns_string[CRATE_ID]::get_ascii::CFSTRING, 0)@PAGE
97
Lloh1:
10-
add x0, x0, l___unnamed_1@PAGEOFF
11-
mov w1, #3
12-
bl SYM(objc2_foundation::__string_macro::is_ascii_no_nul::GENERATED_ID, 0)
13-
Lloh2:
14-
adrp x8, SYM(test_ns_string[CRATE_ID]::get_ascii::CFSTRING, 0)@PAGE
15-
Lloh3:
16-
add x8, x8, SYM(test_ns_string[CRATE_ID]::get_ascii::CFSTRING, 0)@PAGEOFF
17-
Lloh4:
18-
adrp x9, SYM(test_ns_string[CRATE_ID]::get_ascii::CFSTRING, 1)@PAGE
19-
Lloh5:
20-
add x9, x9, SYM(test_ns_string[CRATE_ID]::get_ascii::CFSTRING, 1)@PAGEOFF
21-
cmp w0, #0
22-
csel x0, x9, x8, ne
23-
ldp x29, x30, [sp], #16
8+
add x0, x0, SYM(test_ns_string[CRATE_ID]::get_ascii::CFSTRING, 0)@PAGEOFF
249
ret
25-
.loh AdrpAdd Lloh4, Lloh5
26-
.loh AdrpAdd Lloh2, Lloh3
2710
.loh AdrpAdd Lloh0, Lloh1
2811

2912
.globl _get_utf16
3013
.p2align 2
3114
_get_utf16:
32-
stp x29, x30, [sp, #-16]!
33-
mov x29, sp
34-
Lloh6:
35-
adrp x0, l___unnamed_2@PAGE
36-
Lloh7:
37-
add x0, x0, l___unnamed_2@PAGEOFF
38-
mov w1, #5
39-
bl SYM(objc2_foundation::__string_macro::is_ascii_no_nul::GENERATED_ID, 0)
40-
Lloh8:
41-
adrp x8, SYM(test_ns_string[CRATE_ID]::get_utf16::CFSTRING, 0)@PAGE
42-
Lloh9:
43-
add x8, x8, SYM(test_ns_string[CRATE_ID]::get_utf16::CFSTRING, 0)@PAGEOFF
44-
Lloh10:
45-
adrp x9, SYM(test_ns_string[CRATE_ID]::get_utf16::CFSTRING, 1)@PAGE
46-
Lloh11:
47-
add x9, x9, SYM(test_ns_string[CRATE_ID]::get_utf16::CFSTRING, 1)@PAGEOFF
48-
cmp w0, #0
49-
csel x0, x9, x8, ne
50-
ldp x29, x30, [sp], #16
15+
Lloh2:
16+
adrp x0, SYM(test_ns_string[CRATE_ID]::get_utf16::CFSTRING, 0)@PAGE
17+
Lloh3:
18+
add x0, x0, SYM(test_ns_string[CRATE_ID]::get_utf16::CFSTRING, 0)@PAGEOFF
5119
ret
52-
.loh AdrpAdd Lloh10, Lloh11
53-
.loh AdrpAdd Lloh8, Lloh9
54-
.loh AdrpAdd Lloh6, Lloh7
20+
.loh AdrpAdd Lloh2, Lloh3
5521

5622
.globl _get_with_nul
5723
.p2align 2
5824
_get_with_nul:
59-
stp x29, x30, [sp, #-16]!
60-
mov x29, sp
61-
Lloh12:
62-
adrp x0, l___unnamed_3@PAGE
63-
Lloh13:
64-
add x0, x0, l___unnamed_3@PAGEOFF
65-
mov w1, #6
66-
bl SYM(objc2_foundation::__string_macro::is_ascii_no_nul::GENERATED_ID, 0)
67-
Lloh14:
68-
adrp x8, SYM(test_ns_string[CRATE_ID]::get_with_nul::CFSTRING, 0)@PAGE
69-
Lloh15:
70-
add x8, x8, SYM(test_ns_string[CRATE_ID]::get_with_nul::CFSTRING, 0)@PAGEOFF
71-
Lloh16:
72-
adrp x9, SYM(test_ns_string[CRATE_ID]::get_with_nul::CFSTRING, 1)@PAGE
73-
Lloh17:
74-
add x9, x9, SYM(test_ns_string[CRATE_ID]::get_with_nul::CFSTRING, 1)@PAGEOFF
75-
cmp w0, #0
76-
csel x0, x9, x8, ne
77-
ldp x29, x30, [sp], #16
25+
Lloh4:
26+
adrp x0, SYM(test_ns_string[CRATE_ID]::get_with_nul::CFSTRING, 0)@PAGE
27+
Lloh5:
28+
add x0, x0, SYM(test_ns_string[CRATE_ID]::get_with_nul::CFSTRING, 0)@PAGEOFF
7829
ret
79-
.loh AdrpAdd Lloh16, Lloh17
80-
.loh AdrpAdd Lloh14, Lloh15
81-
.loh AdrpAdd Lloh12, Lloh13
30+
.loh AdrpAdd Lloh4, Lloh5
8231

8332
.section __DATA,__const
8433
.globl _EMPTY
@@ -91,16 +40,6 @@ _EMPTY:
9140
_XYZ:
9241
.quad SYM(test_ns_string[CRATE_ID]::XYZ::CFSTRING, 0)
9342

94-
.section __TEXT,__const
95-
l___unnamed_1:
96-
.ascii "abc"
97-
98-
l___unnamed_2:
99-
.ascii "\303\241b\304\207"
100-
101-
l___unnamed_3:
102-
.asciz "a\000b\000c"
103-
10443
.section __TEXT,__cstring,cstring_literals
10544
SYM(test_ns_string[CRATE_ID]::EMPTY::ASCII, 0):
10645
.space 1
@@ -114,20 +53,6 @@ SYM(test_ns_string[CRATE_ID]::EMPTY::CFSTRING, 0):
11453
.quad SYM(test_ns_string[CRATE_ID]::EMPTY::ASCII, 0)
11554
.space 8
11655

117-
.section __TEXT,__ustring
118-
.p2align 1
119-
SYM(test_ns_string[CRATE_ID]::EMPTY::UTF16, 0):
120-
.space 2
121-
122-
.section __DATA,__cfstring
123-
.globl SYM(test_ns_string[CRATE_ID]::EMPTY::CFSTRING, 1)
124-
.p2align 3
125-
SYM(test_ns_string[CRATE_ID]::EMPTY::CFSTRING, 1):
126-
.quad ___CFConstantStringClassReference
127-
.asciz "\320\007\000\000\000\000\000"
128-
.quad SYM(test_ns_string[CRATE_ID]::EMPTY::UTF16, 0)
129-
.space 8
130-
13156
.section __TEXT,__cstring,cstring_literals
13257
SYM(test_ns_string[CRATE_ID]::XYZ::ASCII, 0):
13358
.asciz "xyz"
@@ -141,57 +66,18 @@ SYM(test_ns_string[CRATE_ID]::XYZ::CFSTRING, 0):
14166
.quad SYM(test_ns_string[CRATE_ID]::XYZ::ASCII, 0)
14267
.asciz "\003\000\000\000\000\000\000"
14368

144-
.section __TEXT,__ustring
145-
.p2align 1
146-
SYM(test_ns_string[CRATE_ID]::XYZ::UTF16, 0):
147-
.asciz "x\000y\000z\000\000"
148-
149-
.section __DATA,__cfstring
150-
.globl SYM(test_ns_string[CRATE_ID]::XYZ::CFSTRING, 1)
151-
.p2align 3
152-
SYM(test_ns_string[CRATE_ID]::XYZ::CFSTRING, 1):
153-
.quad ___CFConstantStringClassReference
154-
.asciz "\320\007\000\000\000\000\000"
155-
.quad SYM(test_ns_string[CRATE_ID]::XYZ::UTF16, 0)
156-
.asciz "\003\000\000\000\000\000\000"
157-
15869
.section __TEXT,__cstring,cstring_literals
15970
SYM(test_ns_string[CRATE_ID]::get_ascii::ASCII, 0):
16071
.asciz "abc"
16172

16273
.section __DATA,__cfstring
16374
.p2align 3
164-
SYM(test_ns_string[CRATE_ID]::get_ascii::CFSTRING, 1):
75+
SYM(test_ns_string[CRATE_ID]::get_ascii::CFSTRING, 0):
16576
.quad ___CFConstantStringClassReference
16677
.asciz "\310\007\000\000\000\000\000"
16778
.quad SYM(test_ns_string[CRATE_ID]::get_ascii::ASCII, 0)
16879
.asciz "\003\000\000\000\000\000\000"
16980

170-
.section __TEXT,__ustring
171-
.p2align 1
172-
SYM(test_ns_string[CRATE_ID]::get_ascii::UTF16, 0):
173-
.asciz "a\000b\000c\000\000"
174-
175-
.section __DATA,__cfstring
176-
.p2align 3
177-
SYM(test_ns_string[CRATE_ID]::get_ascii::CFSTRING, 0):
178-
.quad ___CFConstantStringClassReference
179-
.asciz "\320\007\000\000\000\000\000"
180-
.quad SYM(test_ns_string[CRATE_ID]::get_ascii::UTF16, 0)
181-
.asciz "\003\000\000\000\000\000\000"
182-
183-
.section __TEXT,__cstring,cstring_literals
184-
SYM(test_ns_string[CRATE_ID]::get_utf16::ASCII, 0):
185-
.asciz "\303\241b\304\207"
186-
187-
.section __DATA,__cfstring
188-
.p2align 3
189-
SYM(test_ns_string[CRATE_ID]::get_utf16::CFSTRING, 1):
190-
.quad ___CFConstantStringClassReference
191-
.asciz "\310\007\000\000\000\000\000"
192-
.quad SYM(test_ns_string[CRATE_ID]::get_utf16::ASCII, 0)
193-
.asciz "\005\000\000\000\000\000\000"
194-
19581
.section __TEXT,__ustring
19682
.p2align 1
19783
SYM(test_ns_string[CRATE_ID]::get_utf16::UTF16, 0):
@@ -205,18 +91,6 @@ SYM(test_ns_string[CRATE_ID]::get_utf16::CFSTRING, 0):
20591
.quad SYM(test_ns_string[CRATE_ID]::get_utf16::UTF16, 0)
20692
.asciz "\003\000\000\000\000\000\000"
20793

208-
.section __TEXT,__cstring,cstring_literals
209-
SYM(test_ns_string[CRATE_ID]::get_with_nul::ASCII, 0):
210-
.asciz "a\000b\000c\000"
211-
212-
.section __DATA,__cfstring
213-
.p2align 3
214-
SYM(test_ns_string[CRATE_ID]::get_with_nul::CFSTRING, 1):
215-
.quad ___CFConstantStringClassReference
216-
.asciz "\310\007\000\000\000\000\000"
217-
.quad SYM(test_ns_string[CRATE_ID]::get_with_nul::ASCII, 0)
218-
.asciz "\006\000\000\000\000\000\000"
219-
22094
.section __TEXT,__ustring
22195
.p2align 1
22296
SYM(test_ns_string[CRATE_ID]::get_with_nul::UTF16, 0):

0 commit comments

Comments
 (0)