Skip to content

Commit 8b20d0d

Browse files
committed
Allow larger string pieces in fmt::Arguments repr.
1 parent f92c5f1 commit 8b20d0d

File tree

5 files changed

+58
-23
lines changed

5 files changed

+58
-23
lines changed

compiler/rustc_ast_lowering/src/format.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -349,10 +349,15 @@ fn expand_format_args<'hir>(
349349
return hir::ExprKind::Call(from_str, args);
350350
}
351351

352-
// Encode the literal in chunks of up to 127 bytes, split at utf-8 boundaries.
352+
// Encode the literal in chunks of up to u16::MAX bytes, split at utf-8 boundaries.
353353
while !s.is_empty() {
354-
let len = s.floor_char_boundary(127);
355-
bytecode.push(len as u8);
354+
let len = s.floor_char_boundary(usize::from(u16::MAX));
355+
if len < 0x80 {
356+
bytecode.push(len as u8);
357+
} else {
358+
bytecode.push(0x80);
359+
bytecode.extend_from_slice(&(len as u16).to_le_bytes());
360+
}
356361
bytecode.extend(&s.as_bytes()[..len]);
357362
s = &s[len..];
358363
}
@@ -362,7 +367,7 @@ fn expand_format_args<'hir>(
362367
FormatArgsPiece::Placeholder(p) => {
363368
// Push the start byte and remember its index so we can set the option bits later.
364369
let i = bytecode.len();
365-
bytecode.push(0x80);
370+
bytecode.push(0xC0);
366371

367372
let position = argmap
368373
.insert_full(

library/core/src/fmt/mod.rs

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use crate::cell::{Cell, Ref, RefCell, RefMut, SyncUnsafeCell, UnsafeCell};
66
use crate::char::{EscapeDebugExtArgs, MAX_LEN_UTF8};
7+
use crate::hint::assert_unchecked;
78
use crate::marker::{PhantomData, PointeeSized};
89
use crate::num::fmt as numfmt;
910
use crate::ops::Deref;
@@ -640,22 +641,29 @@ impl<'a> Formatter<'a> {
640641
//
641642
// The template byte sequence is the concatenation of parts of the following types:
642643
//
643-
// - Literal string piece (1-127 bytes):
644+
// - Literal string piece:
644645
// Pieces that must be formatted verbatim (e.g. "hello " and "\n" in "hello {name}\n")
645-
// are represented as a single byte containing their length followed directly by the bytes
646-
// of the string:
646+
// appear literally in the template byte sequence, prefixed by their length.
647+
//
648+
// For pieces of up to 127 bytes, these are represented as a single byte containing the
649+
// length followed directly by the bytes of the string:
647650
// ┌───┬────────────────────────────┐
648651
// │len│ `len` bytes (utf-8) │ (e.g. b"\x06hello ")
649652
// └───┴────────────────────────────┘
650653
//
651-
// These strings can be 127 bytes at most, such that the `len` byte always has the highest
652-
// bit cleared. Longer pieces are split into multiple pieces (at utf-8 boundaries).
654+
// For larger pieces up to u16::MAX bytes, these are represented as a 0x80 followed by
655+
// their length in 16-bit little endian, followed by the bytes of the string:
656+
// ┌────┬─────────┬───────────────────────────┐
657+
// │0x80│ len │ `len` bytes (utf-8) │ (e.g. b"\x80\x00\x01hello … ")
658+
// └────┴─────────┴───────────────────────────┘
659+
//
660+
// Longer pieces are split into multiple pieces of max u16::MAX bytes (at utf-8 boundaries).
653661
//
654662
// - Placeholder:
655663
// Placeholders (e.g. `{name}` in "hello {name}") are represented as a byte with the highest
656-
// bit set, followed by zero or more fields depending on the flags set in the first byte:
664+
// two bits set, followed by zero or more fields depending on the flags in the first byte:
657665
// ┌──────────┬┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┐
658-
// │0b10______│ flags ┊ width ┊ precision ┊ arg_index ┊ (e.g. b"\x82\x05\0")
666+
// │0b11______│ flags ┊ width ┊ precision ┊ arg_index ┊ (e.g. b"\xC2\x05\0")
659667
// └────││││││┴┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┘
660668
// ││││││ 32 bit 16 bit 16 bit 16 bit
661669
// │││││└─ flags present
@@ -665,12 +673,12 @@ impl<'a> Formatter<'a> {
665673
// │└─ width indirect
666674
// └─ precision indirect
667675
//
668-
// All fields other than the first byte are optional and only present when
669-
// their corresponding flag is set in the first byte.
676+
// All fields other than the first byte are optional and only present when their
677+
// corresponding flag is set in the first byte.
670678
//
671679
// So, a fully default placeholder without any options is just a single byte:
672680
// ┌──────────┐
673-
// │0b10000000│ (b"\x80")
681+
// │0b11000000│ (b"\xC0")
674682
// └──────────┘
675683
//
676684
// The fields are stored as little endian.
@@ -766,14 +774,21 @@ impl<'a> Arguments<'a> {
766774
// SAFETY: We can assume the template is valid.
767775
unsafe {
768776
let n = template.read();
777+
template = template.add(1);
769778
if n == 0 {
770779
// End of template.
771780
break;
772781
} else if n < 128 {
773-
// Literal string piece.
782+
// Short literal string piece.
774783
length += n as usize;
775-
template = template.add(1 + n as usize);
784+
template = template.add(n as usize);
785+
} else if n == 128 {
786+
// Long literal string piece.
787+
let len = usize::from(u16::from_le_bytes(template.cast_array().read()));
788+
length += len;
789+
template = template.add(2 + len);
776790
} else {
791+
assert_unchecked(n >= 0xC0);
777792
// Placeholder piece.
778793
if length == 0 {
779794
starts_with_placeholder = true;
@@ -783,7 +798,7 @@ impl<'a> Arguments<'a> {
783798
+ (n & 2 != 0) as usize * 2 // width (16 bit)
784799
+ (n & 4 != 0) as usize * 2 // precision (16 bit)
785800
+ (n & 8 != 0) as usize * 2; // arg_index (16 bit)
786-
template = template.add(1 + skip as usize);
801+
template = template.add(skip as usize);
787802
}
788803
}
789804
}
@@ -1633,7 +1648,7 @@ pub fn write(output: &mut dyn Write, fmt: Arguments<'_>) -> Result {
16331648
if n == 0 {
16341649
// End of template.
16351650
return Ok(());
1636-
} else if n < 128 {
1651+
} else if n < 0x80 {
16371652
// Literal string piece of length `n`.
16381653

16391654
// SAFETY: We can assume the strings in the template are valid.
@@ -1643,7 +1658,19 @@ pub fn write(output: &mut dyn Write, fmt: Arguments<'_>) -> Result {
16431658
s
16441659
};
16451660
output.write_str(s)?;
1646-
} else if n == 128 {
1661+
} else if n == 0x80 {
1662+
// Literal string piece with a 16-bit length.
1663+
1664+
// SAFETY: We can assume the strings in the template are valid.
1665+
let s = unsafe {
1666+
let len = usize::from(u16::from_le_bytes(template.cast_array().read()));
1667+
template = template.add(2);
1668+
let s = crate::str::from_raw_parts(template.as_ptr(), len);
1669+
template = template.add(len);
1670+
s
1671+
};
1672+
output.write_str(s)?;
1673+
} else if n == 0xC0 {
16471674
// Placeholder for next argument with default options.
16481675
//
16491676
// Having this as a separate case improves performance for the common case.
@@ -1656,6 +1683,9 @@ pub fn write(output: &mut dyn Write, fmt: Arguments<'_>) -> Result {
16561683
}
16571684
arg_index += 1;
16581685
} else {
1686+
// SAFETY: We can assume the template is valid.
1687+
unsafe { assert_unchecked(n > 0xC0) };
1688+
16591689
// Placeholder with custom options.
16601690

16611691
let mut opt = FormattingOptions::new();

tests/mir-opt/sroa/lifetimes.foo.ScalarReplacementOfAggregates.diff

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@
138138
StorageDead(_16);
139139
StorageLive(_20);
140140
StorageLive(_21);
141-
_21 = const b"\x80\x01 \x80\x01\n\x00";
141+
_21 = const b"\xc0\x01 \xc0\x01\n\x00";
142142
_20 = &(*_21);
143143
StorageLive(_22);
144144
StorageLive(_23);
@@ -201,6 +201,6 @@
201201
}
202202

203203
ALLOC0 (size: 7, align: 1) {
204-
80 01 20 80 01 0a 00 │ .. ....
204+
c0 01 20 c0 01 0a 00 │ .. ....
205205
}
206206

tests/ui/unpretty/exhaustive.hir.stdout

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ mod expressions {
394394
{
395395
super let args = (&expr,);
396396
super let args = [format_argument::new_display(args.0)];
397-
unsafe { format_arguments::new(b"\x80\x00", &args) }
397+
unsafe { format_arguments::new(b"\xc0\x00", &args) }
398398
};
399399
}
400400
}

tests/ui/unpretty/flattened-format-args.stdout

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ fn main() {
1414
super let args = (&x,);
1515
super let args = [format_argument::new_display(args.0)];
1616
unsafe {
17-
format_arguments::new(b"\x08a 123 b \x80\x05 xyz\n\x00",
17+
format_arguments::new(b"\x08a 123 b \xc0\x05 xyz\n\x00",
1818
&args)
1919
}
2020
});

0 commit comments

Comments
 (0)