Skip to content

Commit 05dd731

Browse files
NobodyXuDorianNiemiecSVRJS
authored andcommitted
Add Crc checksum validation for gzip::header::Parser (Nullus157#432)
Ensure the header is valid, [gzip wikipedia](https://en.wikipedia.org/wiki/Gzip) says that: > Two least significant bytes of the [CRC-32](https://en.wikipedia.org/wiki/CRC-32) (ISO 3309) of all bytes in the gzip file up to (not including) this field. --------- Signed-off-by: Jiahao XU <[email protected]>
1 parent f6aef50 commit 05dd731

File tree

2 files changed

+60
-29
lines changed

2 files changed

+60
-29
lines changed

crates/compression-codecs/src/gzip/decoder.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ impl GzipDecoder {
6464
loop {
6565
match &mut self.state {
6666
State::Header(parser) => {
67-
if parser.input(input)?.is_some() {
67+
if parser.input(&mut self.crc, input)?.is_some() {
68+
self.crc.reset();
6869
self.state = State::Decoding;
6970
}
7071
}

crates/compression-codecs/src/gzip/header.rs

Lines changed: 58 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use compression_core::util::PartialBuffer;
2+
use flate2::Crc;
23
use std::io;
34

45
#[derive(Debug, Default)]
@@ -61,18 +62,39 @@ impl Header {
6162
}
6263
}
6364

65+
fn consume_input(crc: &mut Crc, n: usize, input: &mut PartialBuffer<&[u8]>) {
66+
crc.update(&input.unwritten()[..n]);
67+
input.advance(n);
68+
}
69+
70+
fn consume_cstr(crc: &mut Crc, input: &mut PartialBuffer<&[u8]>) -> Option<()> {
71+
if let Some(len) = memchr::memchr(0, input.unwritten()) {
72+
consume_input(crc, len + 1, input);
73+
Some(())
74+
} else {
75+
consume_input(crc, input.unwritten().len(), input);
76+
None
77+
}
78+
}
79+
6480
impl Parser {
65-
pub(super) fn input(&mut self, input: &mut PartialBuffer<&[u8]>) -> io::Result<Option<Header>> {
81+
pub(super) fn input(
82+
&mut self,
83+
crc: &mut Crc,
84+
input: &mut PartialBuffer<&[u8]>,
85+
) -> io::Result<Option<Header>> {
6686
loop {
6787
match &mut self.state {
6888
State::Fixed(data) => {
6989
data.copy_unwritten_from(input);
7090

7191
if data.unwritten().is_empty() {
72-
self.header = Header::parse(&data.take().into_inner())?;
92+
let data = data.get_mut();
93+
crc.update(data);
94+
self.header = Header::parse(data)?;
7395
self.state = State::ExtraLen(<_>::default());
7496
} else {
75-
return Ok(None);
97+
break Ok(None);
7698
}
7799
}
78100

@@ -85,22 +107,24 @@ impl Parser {
85107
data.copy_unwritten_from(input);
86108

87109
if data.unwritten().is_empty() {
88-
let len = u16::from_le_bytes(data.take().into_inner());
110+
let data = data.get_mut();
111+
crc.update(data);
112+
let len = u16::from_le_bytes(*data);
89113
self.state = State::Extra(len.into());
90114
} else {
91-
return Ok(None);
115+
break Ok(None);
92116
}
93117
}
94118

95119
State::Extra(bytes_to_consume) => {
96120
let n = input.unwritten().len().min(*bytes_to_consume);
97121
*bytes_to_consume -= n;
98-
input.advance(n);
122+
consume_input(crc, n, input);
99123

100124
if *bytes_to_consume == 0 {
101125
self.state = State::Filename;
102126
} else {
103-
return Ok(None);
127+
break Ok(None);
104128
}
105129
}
106130

@@ -110,13 +134,11 @@ impl Parser {
110134
continue;
111135
}
112136

113-
if let Some(len) = memchr::memchr(0, input.unwritten()) {
114-
input.advance(len + 1);
115-
self.state = State::Comment;
116-
} else {
117-
input.advance(input.unwritten().len());
118-
return Ok(None);
137+
if consume_cstr(crc, input).is_none() {
138+
break Ok(None);
119139
}
140+
141+
self.state = State::Comment;
120142
}
121143

122144
State::Comment => {
@@ -125,35 +147,43 @@ impl Parser {
125147
continue;
126148
}
127149

128-
if let Some(len) = memchr::memchr(0, input.unwritten()) {
129-
input.advance(len + 1);
130-
self.state = State::Crc(<_>::default());
131-
} else {
132-
input.advance(input.unwritten().len());
133-
return Ok(None);
150+
if consume_cstr(crc, input).is_none() {
151+
break Ok(None);
134152
}
153+
154+
self.state = State::Crc(<_>::default());
135155
}
136156

137157
State::Crc(data) => {
158+
let header = std::mem::take(&mut self.header);
159+
138160
if !self.header.flags.crc {
139161
self.state = State::Done;
140-
return Ok(Some(std::mem::take(&mut self.header)));
162+
break Ok(Some(header));
141163
}
142164

143165
data.copy_unwritten_from(input);
144166

145-
if data.unwritten().is_empty() {
167+
break if data.unwritten().is_empty() {
168+
let data = data.take().into_inner();
146169
self.state = State::Done;
147-
return Ok(Some(std::mem::take(&mut self.header)));
170+
let checksum = crc.sum().to_le_bytes();
171+
172+
if data == checksum[..2] {
173+
Ok(Some(header))
174+
} else {
175+
Err(io::Error::new(
176+
io::ErrorKind::InvalidData,
177+
"CRC computed for header does not match",
178+
))
179+
}
148180
} else {
149-
return Ok(None);
150-
}
181+
Ok(None)
182+
};
151183
}
152184

153-
State::Done => {
154-
return Err(io::Error::other("parser used after done"));
155-
}
156-
};
185+
State::Done => break Err(io::Error::other("parser used after done")),
186+
}
157187
}
158188
}
159189
}

0 commit comments

Comments
 (0)