Skip to content

Commit c7b1b47

Browse files
authored
print: use hex for some consts (e.g. 0xff00 over 65280). (#14)
This adds a heuristic (via the MSE of digit/nibble probabilities), to pick the "less random (looking)", out of decimal (base 10) and hexadecimal (base 16, with `0x` prefix), for a given number. While being a mostly aesthetic distinction, using decimal everywhere obfuscates otherwise-notable bit-level patterns (e.g. 2<sup>n</sup>, 2<sup>n</sup>-1, small-integer multiples and other combinations thereof, etc.). A couple quick examples (from the sample that inspired finally tackling this): |Before|After| |-|-| |![image](https://github.com/user-attachments/assets/6547ea61-c225-4cfe-8f3f-53a0ade043d7)|![image](https://github.com/user-attachments/assets/ba5ac2cc-7ca0-41bf-b7c5-8f295712aac1)| |![image](https://github.com/user-attachments/assets/8647266f-fe08-4929-9b58-34010758be78)|![image](https://github.com/user-attachments/assets/08bdd1c8-7cf0-4ab6-afae-22e4b7abef6b)|
2 parents 92afdee + 25d90e7 commit c7b1b47

File tree

1 file changed

+77
-6
lines changed

1 file changed

+77
-6
lines changed

src/print/mod.rs

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,9 +1130,64 @@ impl Printer<'_> {
11301130
}
11311131

11321132
impl Printer<'_> {
1133-
/// Pretty-print a string literal with escaping and styling.
1133+
/// Pretty-print a numeric (integer) literal, in either base 10 or 16.
1134+
///
1135+
/// Heuristics (e.g. statistics of the digits/nibbles) are used to pick
1136+
/// e.g. `0xff00` over `65280`, and `1000` over `0x3e8`.
11341137
//
1135-
// FIXME(eddyb) add methods like this for all styled text (e.g. numeric literals).
1138+
// FIXME(eddyb) handle signedness, maybe tune heuristics?
1139+
// FIXME(eddyb) add methods like this for other kinds of numeric literals.
1140+
fn pretty_numeric_literal_as_dec_or_hex(&self, n: u128) -> pretty::Fragment {
1141+
let style = self.numeric_literal_style();
1142+
1143+
// FIXME(eddyb) it should be possible to avoid allocations or floats,
1144+
// but this initial implementation focuses on simplicity above all else.
1145+
1146+
let dec = format!("{n}");
1147+
1148+
// HACK(eddyb) all 2-digit decimal numbers always have 1-2 unique nibbles,
1149+
// making it effectively impossible to tell apart with a heuristic, and
1150+
// on top of that, even numbers that are "simpler" in hexadecimal, may
1151+
// still be more recognizable as decimal (e.g. `64` over `0x40`).
1152+
if dec.len() <= 2 {
1153+
return style.apply(dec).into();
1154+
}
1155+
1156+
let hex = format!("0x{n:x}");
1157+
1158+
fn score<const BASE: usize>(s: &str) -> f64 {
1159+
let probability_per_digit = 1.0 / (s.len() as f64);
1160+
let mut digit_probabilities = [0.0; BASE];
1161+
1162+
let mut total_probability = 0.0;
1163+
1164+
for d in s.chars() {
1165+
digit_probabilities[d.to_digit(BASE as u32).unwrap() as usize] +=
1166+
probability_per_digit;
1167+
total_probability += probability_per_digit;
1168+
}
1169+
1170+
// HACK(eddyb) this will end up being `1.0 / N * N / BASE`, which
1171+
// in theory should always result in `1.0 / BASE` (i.e. a constant),
1172+
// except for float rounding, but maybe this shouldn't care?
1173+
let avg_probability = total_probability / (BASE as f64);
1174+
1175+
// HACK(eddyb) compute MSE (mean squared error), hoping that will
1176+
// be inversely correlated with how "random" the digit string looks.
1177+
digit_probabilities.iter().map(|&p| (p - avg_probability).powi(2)).sum::<f64>()
1178+
/ (BASE as f64)
1179+
}
1180+
1181+
let hex_over_dec = score::<16>(&hex[2..]) - score::<10>(&dec);
1182+
1183+
// HACK(eddyb) arbitrary "epsilon" based on observed values.
1184+
let hex_over_dec =
1185+
if hex_over_dec.abs() < 1e-3 { hex.len() <= dec.len() } else { hex_over_dec > 0.0 };
1186+
1187+
style.apply(if hex_over_dec { hex } else { dec }).into()
1188+
}
1189+
1190+
/// Pretty-print a string literal with escaping and styling.
11361191
fn pretty_string_literal(&self, s: &str) -> pretty::Fragment {
11371192
// HACK(eddyb) this is somewhat inefficient, but we need to allocate a
11381193
// `String` for every piece anyway, so might as well make it convenient.
@@ -2570,13 +2625,29 @@ impl Print for ConstDef {
25702625
let (printed_value, ty) = if signed {
25712626
let sext_raw_bits =
25722627
(raw_bits as u128 as i128) << (128 - width) >> (128 - width);
2573-
(format!("{sext_raw_bits}"), format!("s{width}"))
2628+
// FIXME(eddyb) consider supporting negative hex.
2629+
(
2630+
if sext_raw_bits >= 0 {
2631+
printer.pretty_numeric_literal_as_dec_or_hex(
2632+
sext_raw_bits as u128,
2633+
)
2634+
} else {
2635+
printer
2636+
.numeric_literal_style()
2637+
.apply(format!("{sext_raw_bits}"))
2638+
.into()
2639+
},
2640+
format!("s{width}"),
2641+
)
25742642
} else {
2575-
(format!("{raw_bits}"), format!("u{width}"))
2643+
(
2644+
printer.pretty_numeric_literal_as_dec_or_hex(raw_bits.into()),
2645+
format!("u{width}"),
2646+
)
25762647
};
25772648
Some(pretty::Fragment::new([
2578-
printer.numeric_literal_style().apply(printed_value),
2579-
literal_ty_suffix(ty),
2649+
printed_value,
2650+
literal_ty_suffix(ty).into(),
25802651
]))
25812652
} else {
25822653
None

0 commit comments

Comments
 (0)