Skip to content

Commit a411769

Browse files
committed
Merge rust-bitcoin/rust-bitcoin#1023: Add consensus_decode_from_finite_reader optimization
082e185 Add `consensus_decode_from_finite_reader` optimization (Dawid Ciężarkiewicz) Pull request description: As things are right now, memory exhaustion protection in `Decodable` is based on checking input-decoded lengths against arbitrary limits, and ad-hoc wrapping collection deserialization in `Take`. The problem with that are two-fold: * Potential consensus bugs due to incorrect limits. * Performance degradation when decoding nested structured, due to recursive `Take<Take<..>>` readers. This change introduces a systematic approach to the problem. A concept of a "size-limited-reader" is introduced to rely on the input data to finish at enforced limit and fail deserialization. Memory exhaustion protection is now achived by capping allocations to reasonable values, yet allowing the underlying collections to grow to accomodate rare yet legitmately oversized data (with tiny performance cost), and reliance on input data size limit. A set of simple rules allow avoiding recursive `Take` wrappers. Fix #997 ACKs for top commit: apoelstra: ACK 082e185 tcharding: ACK 082e185 Tree-SHA512: fa04b62a4799c9a11c5f85ec78a18fa9c2cd4819c83a0d6148fbb203c6fa15c2689cb0847e612b35b8c285a756d81690b31a9bede4486b845f0c16b9fcc6d097
2 parents 8a997b1 + c4a0c6c commit a411769

File tree

5 files changed

+215
-86
lines changed

5 files changed

+215
-86
lines changed

src/blockdata/script.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
//! This module provides the structures and functions needed to support scripts.
2424
//!
2525
26+
use crate::consensus::encode::MAX_VEC_SIZE;
2627
use crate::prelude::*;
2728

2829
use crate::io;
@@ -1075,9 +1076,14 @@ impl Encodable for Script {
10751076
}
10761077

10771078
impl Decodable for Script {
1079+
#[inline]
1080+
fn consensus_decode_from_finite_reader<D: io::Read>(d: D) -> Result<Self, encode::Error> {
1081+
Ok(Script(Decodable::consensus_decode_from_finite_reader(d)?))
1082+
}
1083+
10781084
#[inline]
10791085
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, encode::Error> {
1080-
Ok(Script(Decodable::consensus_decode(d)?))
1086+
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
10811087
}
10821088
}
10831089

src/blockdata/transaction.rs

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -639,14 +639,20 @@ impl Encodable for TxIn {
639639
}
640640
}
641641
impl Decodable for TxIn {
642-
fn consensus_decode<D: io::Read>(mut d: D) -> Result<Self, encode::Error> {
642+
#[inline]
643+
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, encode::Error> {
643644
Ok(TxIn {
644-
previous_output: Decodable::consensus_decode(&mut d)?,
645-
script_sig: Decodable::consensus_decode(&mut d)?,
646-
sequence: Decodable::consensus_decode(d)?,
645+
previous_output: Decodable::consensus_decode_from_finite_reader(&mut d)?,
646+
script_sig: Decodable::consensus_decode_from_finite_reader(&mut d)?,
647+
sequence: Decodable::consensus_decode_from_finite_reader(d)?,
647648
witness: Witness::default(),
648649
})
649650
}
651+
652+
#[inline]
653+
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, encode::Error> {
654+
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
655+
}
650656
}
651657

652658
impl Encodable for Transaction {
@@ -680,20 +686,19 @@ impl Encodable for Transaction {
680686
}
681687

682688
impl Decodable for Transaction {
683-
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, encode::Error> {
684-
let mut d = d.take(MAX_VEC_SIZE as u64);
685-
let version = i32::consensus_decode(&mut d)?;
686-
let input = Vec::<TxIn>::consensus_decode(&mut d)?;
689+
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, encode::Error> {
690+
let version = i32::consensus_decode_from_finite_reader(&mut d)?;
691+
let input = Vec::<TxIn>::consensus_decode_from_finite_reader(&mut d)?;
687692
// segwit
688693
if input.is_empty() {
689-
let segwit_flag = u8::consensus_decode(&mut d)?;
694+
let segwit_flag = u8::consensus_decode_from_finite_reader(&mut d)?;
690695
match segwit_flag {
691696
// BIP144 input witnesses
692697
1 => {
693-
let mut input = Vec::<TxIn>::consensus_decode(&mut d)?;
694-
let output = Vec::<TxOut>::consensus_decode(&mut d)?;
698+
let mut input = Vec::<TxIn>::consensus_decode_from_finite_reader(&mut d)?;
699+
let output = Vec::<TxOut>::consensus_decode_from_finite_reader(&mut d)?;
695700
for txin in input.iter_mut() {
696-
txin.witness = Decodable::consensus_decode(&mut d)?;
701+
txin.witness = Decodable::consensus_decode_from_finite_reader(&mut d)?;
697702
}
698703
if !input.is_empty() && input.iter().all(|input| input.witness.is_empty()) {
699704
Err(encode::Error::ParseFailed("witness flag set but no witnesses present"))
@@ -702,7 +707,7 @@ impl Decodable for Transaction {
702707
version,
703708
input,
704709
output,
705-
lock_time: Decodable::consensus_decode(d)?,
710+
lock_time: Decodable::consensus_decode_from_finite_reader(d)?,
706711
})
707712
}
708713
}
@@ -714,11 +719,15 @@ impl Decodable for Transaction {
714719
Ok(Transaction {
715720
version,
716721
input,
717-
output: Decodable::consensus_decode(&mut d)?,
718-
lock_time: Decodable::consensus_decode(d)?,
722+
output: Decodable::consensus_decode_from_finite_reader(&mut d)?,
723+
lock_time: Decodable::consensus_decode_from_finite_reader(d)?,
719724
})
720725
}
721726
}
727+
728+
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, encode::Error> {
729+
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
730+
}
722731
}
723732

724733
/// This type is consensus valid but an input including it would prevent the transaction from

src/consensus/encode.rs

Lines changed: 125 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ pub fn deserialize<T: Decodable>(data: &[u8]) -> Result<T, Error> {
167167
/// doesn't consume the entire vector.
168168
pub fn deserialize_partial<T: Decodable>(data: &[u8]) -> Result<(T, usize), Error> {
169169
let mut decoder = Cursor::new(data);
170-
let rv = Decodable::consensus_decode(&mut decoder)?;
170+
let rv = Decodable::consensus_decode_from_finite_reader(&mut decoder)?;
171171
let consumed = decoder.position() as usize;
172172

173173
Ok((rv, consumed))
@@ -319,6 +319,47 @@ pub trait Encodable {
319319

320320
/// Data which can be encoded in a consensus-consistent way
321321
pub trait Decodable: Sized {
322+
/// Decode `Self` from a size-limited reader.
323+
///
324+
/// Like `consensus_decode` but relies on the reader being
325+
/// limited in the amount of data it returns, e.g. by
326+
/// being wrapped in [`std::io::Take`].
327+
///
328+
/// Failling to obide to this requirement might lead to
329+
/// memory exhaustion caused by malicious inputs.
330+
///
331+
/// Users should default to `consensus_decode`, but
332+
/// when data to be decoded is already in a byte vector
333+
/// of a limited size, calling this function directly
334+
/// might be marginally faster (due to avoiding
335+
/// extra checks).
336+
///
337+
/// ### Rules for trait implementations
338+
///
339+
/// * Simple types that that have a fixed size (own and member fields),
340+
/// don't have to overwrite this method, or be concern with it.
341+
/// * Types that deserialize using externally provided length
342+
/// should implement it:
343+
/// * Make `consensus_decode` forward to `consensus_decode_bytes_from_finite_reader`
344+
/// with the reader wrapped by `Take`. Failure to do so, without other
345+
/// forms of memory exhaustion protection might lead to resource exhaustion
346+
/// vulnerability.
347+
/// * Put a max cap on things like `Vec::with_capacity` to avoid oversized
348+
/// allocations, and rely on the reader running out of data, and collections
349+
/// reallocating on a legitimately oversized input data, instead of trying
350+
/// to enforce arbitrary length limits.
351+
/// * Types that contain other types that implement custom `consensus_decode_from_finite_reader`,
352+
/// should also implement it applying same rules, and in addition make sure to call
353+
/// `consensus_decode_from_finite_reader` on all members, to avoid creating redundant
354+
/// `Take` wrappers. Failure to do so might result only in a tiny performance hit.
355+
fn consensus_decode_from_finite_reader<D: io::Read>(d: D) -> Result<Self, Error> {
356+
// This method is always strictly less general than, `consensus_decode`,
357+
// so it's safe and make sense to default to just calling it.
358+
// This way most types, that don't care about protecting against
359+
// resource exhaustion due to malicious input, can just ignore it.
360+
Self::consensus_decode(d)
361+
}
362+
322363
/// Decode an object with a well-defined format
323364
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, Error>;
324365
}
@@ -555,23 +596,29 @@ macro_rules! impl_vec {
555596
Ok(len)
556597
}
557598
}
599+
558600
impl Decodable for Vec<$type> {
559601
#[inline]
560-
fn consensus_decode<D: io::Read>(mut d: D) -> Result<Self, Error> {
561-
let len = VarInt::consensus_decode(&mut d)?.0;
562-
let byte_size = (len as usize)
563-
.checked_mul(mem::size_of::<$type>())
564-
.ok_or(self::Error::ParseFailed("Invalid length"))?;
565-
if byte_size > MAX_VEC_SIZE {
566-
return Err(self::Error::OversizedVectorAllocation { requested: byte_size, max: MAX_VEC_SIZE })
567-
}
568-
let mut ret = Vec::with_capacity(len as usize);
569-
let mut d = d.take(MAX_VEC_SIZE as u64);
602+
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, Error> {
603+
let len = VarInt::consensus_decode_from_finite_reader(&mut d)?.0;
604+
// Do not allocate upfront more items than if the sequnce of type
605+
// occupied roughly quarter a block. This should never be the case
606+
// for normal data, but even if that's not true - `push` will just
607+
// reallocate.
608+
// Note: OOM protection relies on reader eventually running out of
609+
// data to feed us.
610+
let max_capacity = MAX_VEC_SIZE / 4 / mem::size_of::<$type>();
611+
let mut ret = Vec::with_capacity(core::cmp::min(len as usize, max_capacity));
570612
for _ in 0..len {
571-
ret.push(Decodable::consensus_decode(&mut d)?);
613+
ret.push(Decodable::consensus_decode_from_finite_reader(&mut d)?);
572614
}
573615
Ok(ret)
574616
}
617+
618+
#[inline]
619+
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, Error> {
620+
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
621+
}
575622
}
576623
}
577624
}
@@ -597,6 +644,33 @@ pub(crate) fn consensus_encode_with_size<S: io::Write>(data: &[u8], mut s: S) ->
597644
}
598645

599646

647+
struct ReadBytesFromFiniteReaderOpts {
648+
len: usize,
649+
chunk_size: usize,
650+
}
651+
652+
/// Read `opts.len` bytes from reader, where `opts.len` could potentially be malicious
653+
///
654+
/// This function relies on reader being bound in amount of data
655+
/// it returns for OOM protection. See [`Decodable::consensus_decode_from_finite_reader`].
656+
#[inline]
657+
fn read_bytes_from_finite_reader<D: io::Read>(mut d: D, mut opts: ReadBytesFromFiniteReaderOpts) -> Result<Vec<u8>, Error> {
658+
let mut ret = vec![];
659+
660+
assert_ne!(opts.chunk_size, 0);
661+
662+
while opts.len > 0 {
663+
let chunk_start = ret.len();
664+
let chunk_size = core::cmp::min(opts.len, opts.chunk_size);
665+
let chunk_end = chunk_start + chunk_size;
666+
ret.resize(chunk_end, 0u8);
667+
d.read_slice(&mut ret[chunk_start..chunk_end])?;
668+
opts.len -= chunk_size;
669+
}
670+
671+
Ok(ret)
672+
}
673+
600674
impl Encodable for Vec<u8> {
601675
#[inline]
602676
fn consensus_encode<S: io::Write>(&self, s: S) -> Result<usize, io::Error> {
@@ -606,14 +680,15 @@ impl Encodable for Vec<u8> {
606680

607681
impl Decodable for Vec<u8> {
608682
#[inline]
609-
fn consensus_decode<D: io::Read>(mut d: D) -> Result<Self, Error> {
683+
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, Error> {
610684
let len = VarInt::consensus_decode(&mut d)?.0 as usize;
611-
if len > MAX_VEC_SIZE {
612-
return Err(self::Error::OversizedVectorAllocation { requested: len, max: MAX_VEC_SIZE })
613-
}
614-
let mut ret = vec![0u8; len];
615-
d.read_slice(&mut ret)?;
616-
Ok(ret)
685+
// most real-world vec of bytes data, wouldn't be larger than 128KiB
686+
read_bytes_from_finite_reader(d, ReadBytesFromFiniteReaderOpts { len, chunk_size: 128 * 1024 })
687+
}
688+
689+
#[inline]
690+
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, Error> {
691+
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
617692
}
618693
}
619694

@@ -625,9 +700,14 @@ impl Encodable for Box<[u8]> {
625700
}
626701

627702
impl Decodable for Box<[u8]> {
703+
#[inline]
704+
fn consensus_decode_from_finite_reader<D: io::Read>(d: D) -> Result<Self, Error> {
705+
<Vec<u8>>::consensus_decode_from_finite_reader(d).map(From::from)
706+
}
707+
628708
#[inline]
629709
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, Error> {
630-
<Vec<u8>>::consensus_decode(d).map(From::from)
710+
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
631711
}
632712
}
633713

@@ -651,17 +731,11 @@ impl Encodable for CheckedData {
651731

652732
impl Decodable for CheckedData {
653733
#[inline]
654-
fn consensus_decode<D: io::Read>(mut d: D) -> Result<Self, Error> {
655-
let len = u32::consensus_decode(&mut d)?;
656-
if len > MAX_VEC_SIZE as u32 {
657-
return Err(self::Error::OversizedVectorAllocation {
658-
requested: len as usize,
659-
max: MAX_VEC_SIZE
660-
});
661-
}
662-
let checksum = <[u8; 4]>::consensus_decode(&mut d)?;
663-
let mut ret = vec![0u8; len as usize];
664-
d.read_slice(&mut ret)?;
734+
fn consensus_decode_from_finite_reader<D: io::Read>(mut d: D) -> Result<Self, Error> {
735+
let len = u32::consensus_decode_from_finite_reader(&mut d)? as usize;
736+
737+
let checksum = <[u8; 4]>::consensus_decode_from_finite_reader(&mut d)?;
738+
let ret = read_bytes_from_finite_reader(d, ReadBytesFromFiniteReaderOpts { len, chunk_size: MAX_VEC_SIZE })?;
665739
let expected_checksum = sha2_checksum(&ret);
666740
if expected_checksum != checksum {
667741
Err(self::Error::InvalidChecksum {
@@ -672,6 +746,10 @@ impl Decodable for CheckedData {
672746
Ok(CheckedData(ret))
673747
}
674748
}
749+
750+
fn consensus_decode<D: io::Read>(d: D) -> Result<Self, Error> {
751+
Self::consensus_decode_from_finite_reader(d.take(MAX_VEC_SIZE as u64))
752+
}
675753
}
676754

677755
// References
@@ -1064,5 +1142,21 @@ mod tests {
10641142

10651143
}
10661144
}
1145+
1146+
#[test]
1147+
fn test_read_bytes_from_finite_reader() {
1148+
let data : Vec<u8> = (0..10).collect();
1149+
1150+
for chunk_size in 1..20 {
1151+
assert_eq!(
1152+
read_bytes_from_finite_reader(
1153+
io::Cursor::new(&data),
1154+
ReadBytesFromFiniteReaderOpts { len: data.len(), chunk_size }
1155+
).unwrap(),
1156+
data
1157+
);
1158+
}
1159+
}
1160+
10671161
}
10681162

src/internal_macros.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,16 @@ macro_rules! impl_consensus_encoding {
3232
}
3333

3434
impl $crate::consensus::Decodable for $thing {
35+
36+
#[inline]
37+
fn consensus_decode_from_finite_reader<D: $crate::io::Read>(
38+
mut d: D,
39+
) -> Result<$thing, $crate::consensus::encode::Error> {
40+
Ok($thing {
41+
$($field: $crate::consensus::Decodable::consensus_decode_from_finite_reader(&mut d)?),+
42+
})
43+
}
44+
3545
#[inline]
3646
fn consensus_decode<D: $crate::io::Read>(
3747
d: D,

0 commit comments

Comments
 (0)