|
| 1 | +/// A type that wraps a single byte with a convenient fmt::Debug impl that |
| 2 | +/// escapes the byte. |
| 3 | +pub(crate) struct Byte(pub(crate) u8); |
| 4 | + |
| 5 | +impl core::fmt::Debug for Byte { |
| 6 | + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
| 7 | + // Special case ASCII space. It's too hard to read otherwise, so |
| 8 | + // put quotes around it. I sometimes wonder whether just '\x20' would |
| 9 | + // be better... |
| 10 | + if self.0 == b' ' { |
| 11 | + return write!(f, "' '"); |
| 12 | + } |
| 13 | + // 10 bytes is enough to cover any output from ascii::escape_default. |
| 14 | + let mut bytes = [0u8; 10]; |
| 15 | + let mut len = 0; |
| 16 | + for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { |
| 17 | + // capitalize \xab to \xAB |
| 18 | + if i >= 2 && b'a' <= b && b <= b'f' { |
| 19 | + b -= 32; |
| 20 | + } |
| 21 | + bytes[len] = b; |
| 22 | + len += 1; |
| 23 | + } |
| 24 | + write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) |
| 25 | + } |
| 26 | +} |
| 27 | + |
| 28 | +/// A type that provides a human readable debug impl for arbitrary bytes. |
| 29 | +/// |
| 30 | +/// This generally works best when the bytes are presumed to be mostly UTF-8, |
| 31 | +/// but will work for anything. |
| 32 | +/// |
| 33 | +/// N.B. This is copied nearly verbatim from regex-automata. Sigh. |
| 34 | +pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]); |
| 35 | + |
| 36 | +impl<'a> core::fmt::Debug for Bytes<'a> { |
| 37 | + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
| 38 | + write!(f, "\"")?; |
| 39 | + // This is a sad re-implementation of a similar impl found in bstr. |
| 40 | + let mut bytes = self.0; |
| 41 | + while let Some(result) = utf8_decode(bytes) { |
| 42 | + let ch = match result { |
| 43 | + Ok(ch) => ch, |
| 44 | + Err(byte) => { |
| 45 | + write!(f, r"\x{:02x}", byte)?; |
| 46 | + bytes = &bytes[1..]; |
| 47 | + continue; |
| 48 | + } |
| 49 | + }; |
| 50 | + bytes = &bytes[ch.len_utf8()..]; |
| 51 | + match ch { |
| 52 | + '\0' => write!(f, "\\0")?, |
| 53 | + // ASCII control characters except \0, \n, \r, \t |
| 54 | + '\x01'..='\x08' |
| 55 | + | '\x0b' |
| 56 | + | '\x0c' |
| 57 | + | '\x0e'..='\x19' |
| 58 | + | '\x7f' => { |
| 59 | + write!(f, "\\x{:02x}", u32::from(ch))?; |
| 60 | + } |
| 61 | + '\n' | '\r' | '\t' | _ => { |
| 62 | + write!(f, "{}", ch.escape_debug())?; |
| 63 | + } |
| 64 | + } |
| 65 | + } |
| 66 | + write!(f, "\"")?; |
| 67 | + Ok(()) |
| 68 | + } |
| 69 | +} |
| 70 | + |
| 71 | +/// Decodes the next UTF-8 encoded codepoint from the given byte slice. |
| 72 | +/// |
| 73 | +/// If no valid encoding of a codepoint exists at the beginning of the given |
| 74 | +/// byte slice, then the first byte is returned instead. |
| 75 | +/// |
| 76 | +/// This returns `None` if and only if `bytes` is empty. |
| 77 | +fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> { |
| 78 | + if bytes.is_empty() { |
| 79 | + return None; |
| 80 | + } |
| 81 | + match core::str::from_utf8(&bytes[..core::cmp::min(4, bytes.len())]) { |
| 82 | + Ok(s) => Some(Ok(s.chars().next().unwrap())), |
| 83 | + Err(_) => Some(Err(bytes[0])), |
| 84 | + } |
| 85 | +} |
| 86 | + |
| 87 | +/* |
| 88 | +/// Given a UTF-8 leading byte, this returns the total number of code units |
| 89 | +/// in the following encoded codepoint. |
| 90 | +/// |
| 91 | +/// If the given byte is not a valid UTF-8 leading byte, then this returns |
| 92 | +/// `None`. |
| 93 | +fn len(byte: u8) -> Option<usize> { |
| 94 | + if byte <= 0x7F { |
| 95 | + return Some(1); |
| 96 | + } else if byte & 0b1100_0000 == 0b1000_0000 { |
| 97 | + return None; |
| 98 | + } else if byte <= 0b1101_1111 { |
| 99 | + Some(2) |
| 100 | + } else if byte <= 0b1110_1111 { |
| 101 | + Some(3) |
| 102 | + } else if byte <= 0b1111_0111 { |
| 103 | + Some(4) |
| 104 | + } else { |
| 105 | + None |
| 106 | + } |
| 107 | +} |
| 108 | +*/ |
0 commit comments