Skip to content

Commit 1058100

Browse files
authored
Deduplicate how many times type-related opcodes are listed (#1822)
* Deduplicate how many times type-related opcodes are listed This commit refactors the parsing of core types in wasmparser to deduplicate the number of times that opcodes for particular types are listed. Previously each layer of parsing types would check for all valid prefixes and then delegate to a "lower" parser if applicable. This meant though that a type's opcode was listed at each layer it could be parsed, resulting in duplication. Instead now types only parse what is exclusive to that layer and then a lower layer parses everything else. The only gotcha with this is to preserve reasonable error messages (lest every error become "invalid abstract heap type"). For this a new `BinaryReaderErrorKind` enum (internal) was added which is used to track this and the message is updated as parsing fails and errors progress upwards. * Refactor `BinaryReaderError` some more
1 parent cc4c95c commit 1058100

File tree

2 files changed

+98
-69
lines changed

2 files changed

+98
-69
lines changed

crates/wasmparser/src/binary_reader.rs

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,17 @@ pub struct BinaryReaderError {
3434
#[derive(Debug, Clone)]
3535
pub(crate) struct BinaryReaderErrorInner {
3636
pub(crate) message: String,
37+
pub(crate) kind: BinaryReaderErrorKind,
3738
pub(crate) offset: usize,
3839
pub(crate) needed_hint: Option<usize>,
3940
}
4041

42+
#[derive(Debug, Clone, Copy)]
43+
pub(crate) enum BinaryReaderErrorKind {
44+
Custom,
45+
Invalid,
46+
}
47+
4148
/// The result for `BinaryReader` operations.
4249
pub type Result<T, E = BinaryReaderError> = core::result::Result<T, E>;
4350

@@ -56,31 +63,41 @@ impl fmt::Display for BinaryReaderError {
5663

5764
impl BinaryReaderError {
5865
#[cold]
59-
pub(crate) fn new(message: impl Into<String>, offset: usize) -> Self {
60-
let message = message.into();
66+
pub(crate) fn _new(kind: BinaryReaderErrorKind, message: String, offset: usize) -> Self {
6167
BinaryReaderError {
6268
inner: Box::new(BinaryReaderErrorInner {
69+
kind,
6370
message,
6471
offset,
6572
needed_hint: None,
6673
}),
6774
}
6875
}
6976

77+
#[cold]
78+
pub(crate) fn new(message: impl Into<String>, offset: usize) -> Self {
79+
Self::_new(BinaryReaderErrorKind::Custom, message.into(), offset)
80+
}
81+
82+
#[cold]
83+
pub(crate) fn invalid(msg: &'static str, offset: usize) -> Self {
84+
Self::_new(BinaryReaderErrorKind::Invalid, msg.into(), offset)
85+
}
86+
7087
#[cold]
7188
pub(crate) fn fmt(args: fmt::Arguments<'_>, offset: usize) -> Self {
7289
BinaryReaderError::new(args.to_string(), offset)
7390
}
7491

7592
#[cold]
7693
pub(crate) fn eof(offset: usize, needed_hint: usize) -> Self {
77-
BinaryReaderError {
78-
inner: Box::new(BinaryReaderErrorInner {
79-
message: "unexpected end-of-file".to_string(),
80-
offset,
81-
needed_hint: Some(needed_hint),
82-
}),
83-
}
94+
let mut err = BinaryReaderError::new("unexpected end-of-file", offset);
95+
err.inner.needed_hint = Some(needed_hint);
96+
err
97+
}
98+
99+
pub(crate) fn kind(&mut self) -> BinaryReaderErrorKind {
100+
self.inner.kind
84101
}
85102

86103
/// Get this error's message.
@@ -94,9 +111,12 @@ impl BinaryReaderError {
94111
}
95112

96113
#[cfg(feature = "validate")]
97-
pub(crate) fn add_context(&mut self, mut context: String) {
98-
context.push_str("\n");
99-
self.inner.message.insert_str(0, &context);
114+
pub(crate) fn add_context(&mut self, context: String) {
115+
self.inner.message = format!("{context}\n{}", self.inner.message);
116+
}
117+
118+
pub(crate) fn set_message(&mut self, message: &str) {
119+
self.inner.message = message.to_string();
100120
}
101121
}
102122

crates/wasmparser/src/readers/core/types.rs

Lines changed: 66 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* limitations under the License.
1414
*/
1515

16+
use crate::binary_reader::BinaryReaderErrorKind;
1617
use crate::limits::{
1718
MAX_WASM_FUNCTION_PARAMS, MAX_WASM_FUNCTION_RETURNS, MAX_WASM_STRUCT_FIELDS,
1819
MAX_WASM_SUPERTYPES, MAX_WASM_TYPES,
@@ -1709,79 +1710,65 @@ impl<'a> FromReader<'a> for ValType {
17091710
reader.read_u8()?;
17101711
Ok(ValType::V128)
17111712
}
1712-
0x70 | 0x6F | 0x65 | 0x64 | 0x63 | 0x6E | 0x71 | 0x72 | 0x73 | 0x74 | 0x75 | 0x6D
1713-
| 0x6B | 0x6A | 0x6C | 0x69 | 0x68 => Ok(ValType::Ref(reader.read()?)),
1714-
_ => bail!(reader.original_position(), "invalid value type"),
1713+
_ => {
1714+
// Reclassify errors as invalid value types here because
1715+
// that's the "root" of what was being parsed rather than
1716+
// reference types.
1717+
let refty = reader.read().map_err(|mut e| {
1718+
if let BinaryReaderErrorKind::Invalid = e.kind() {
1719+
e.set_message("invalid value type");
1720+
}
1721+
e
1722+
})?;
1723+
Ok(ValType::Ref(refty))
1724+
}
17151725
}
17161726
}
17171727
}
17181728

17191729
impl<'a> FromReader<'a> for RefType {
17201730
fn from_reader(reader: &mut BinaryReader<'a>) -> Result<Self> {
1721-
let absheapty = |byte, pos| match byte {
1722-
0x70 => Ok(RefType::FUNC.nullable()),
1723-
0x6F => Ok(RefType::EXTERN.nullable()),
1724-
0x6E => Ok(RefType::ANY.nullable()),
1725-
0x71 => Ok(RefType::NONE.nullable()),
1726-
0x72 => Ok(RefType::NOEXTERN.nullable()),
1727-
0x73 => Ok(RefType::NOFUNC.nullable()),
1728-
0x6D => Ok(RefType::EQ.nullable()),
1729-
0x6B => Ok(RefType::STRUCT.nullable()),
1730-
0x6A => Ok(RefType::ARRAY.nullable()),
1731-
0x6C => Ok(RefType::I31.nullable()),
1732-
0x69 => Ok(RefType::EXN.nullable()),
1733-
0x74 => Ok(RefType::NOEXN.nullable()),
1734-
0x68 => Ok(RefType::CONT.nullable()),
1735-
0x75 => Ok(RefType::NOCONT.nullable()),
1736-
_ => bail!(pos, "invalid abstract heap type"),
1737-
};
1738-
17391731
// NB: See `FromReader<'a> for ValType` for a table of how this
17401732
// interacts with other value encodings.
1741-
match reader.read()? {
1742-
byte @ (0x70 | 0x6F | 0x6E | 0x71 | 0x72 | 0x73 | 0x6D | 0x6B | 0x6A | 0x6C | 0x69
1743-
| 0x74) => {
1744-
let pos = reader.original_position();
1745-
absheapty(byte, pos)
1746-
}
1747-
0x65 => {
1748-
let byte = reader.read()?;
1749-
let pos = reader.original_position();
1750-
Ok(absheapty(byte, pos)?.shared().expect("must be abstract"))
1751-
}
1752-
byte @ (0x63 | 0x64) => {
1753-
let nullable = byte == 0x63;
1754-
let pos = reader.original_position();
1733+
let pos = reader.original_position();
1734+
match reader.peek()? {
1735+
0x63 | 0x64 => {
1736+
let nullable = reader.read_u8()? == 0x63;
17551737
RefType::new(nullable, reader.read()?)
17561738
.ok_or_else(|| crate::BinaryReaderError::new("type index too large", pos))
17571739
}
1758-
_ => bail!(reader.original_position(), "malformed reference type"),
1740+
_ => {
1741+
// Reclassify errors as invalid reference types here because
1742+
// that's the "root" of what was being parsed rather than
1743+
// heap types.
1744+
let hty = reader.read().map_err(|mut e| {
1745+
if let BinaryReaderErrorKind::Invalid = e.kind() {
1746+
e.set_message("malformed reference type");
1747+
}
1748+
e
1749+
})?;
1750+
RefType::new(true, hty)
1751+
.ok_or_else(|| crate::BinaryReaderError::new("type index too large", pos))
1752+
}
17591753
}
17601754
}
17611755
}
17621756

17631757
impl<'a> FromReader<'a> for HeapType {
17641758
fn from_reader(reader: &mut BinaryReader<'a>) -> Result<Self> {
1765-
// NB: See `FromReader<'a> for ValType` for a table of how this
1766-
// interacts with other value encodings.
1767-
match reader.peek()? {
1768-
0x65 => {
1769-
reader.read_u8()?;
1770-
let ty = reader.read()?;
1771-
Ok(HeapType::Abstract { shared: true, ty })
1772-
}
1773-
0x70 | 0x6F | 0x6E | 0x71 | 0x72 | 0x73 | 0x6D | 0x6B | 0x6A | 0x6C | 0x68 | 0x69
1774-
| 0x74 | 0x75 => {
1775-
let ty = reader.read()?;
1776-
Ok(HeapType::Abstract { shared: false, ty })
1777-
}
1778-
_ => {
1779-
let idx = match u32::try_from(reader.read_var_s33()?) {
1780-
Ok(idx) => idx,
1781-
Err(_) => {
1782-
bail!(reader.original_position(), "invalid indexed ref heap type");
1783-
}
1784-
};
1759+
// Unconditionally read a `s33` value. If it's positive then that means
1760+
// it fits in a `u32` meaning that this is a valid concrete type index.
1761+
// If it's negative, however, then it must be an abstract heap type due
1762+
// to how non-concrete types are encoded (see `ValType` comments). In
1763+
// that situation "rewind" the reader and go back to the original bytes
1764+
// to parse them as an abstract heap type.
1765+
let mut clone = reader.clone();
1766+
let s33 = clone.read_var_s33()?;
1767+
match u32::try_from(s33) {
1768+
Ok(idx) => {
1769+
// Be sure to update `reader` with the state after the s33 was
1770+
// read.
1771+
*reader = clone;
17851772
let idx = PackedIndex::from_module_index(idx).ok_or_else(|| {
17861773
BinaryReaderError::new(
17871774
"type index greater than implementation limits",
@@ -1790,6 +1777,25 @@ impl<'a> FromReader<'a> for HeapType {
17901777
})?;
17911778
Ok(HeapType::Concrete(idx.unpack()))
17921779
}
1780+
Err(_) => match reader.peek()? {
1781+
0x65 => {
1782+
reader.read_u8()?;
1783+
let ty = reader.read()?;
1784+
Ok(HeapType::Abstract { shared: true, ty })
1785+
}
1786+
_ => {
1787+
// Reclassify errors as "invalid heap type" here because
1788+
// that's the "root" of what was being parsed rather than
1789+
// abstract heap types.
1790+
let ty = reader.read().map_err(|mut e| {
1791+
if let BinaryReaderErrorKind::Invalid = e.kind() {
1792+
e.set_message("invalid heap type");
1793+
}
1794+
e
1795+
})?;
1796+
Ok(HeapType::Abstract { shared: false, ty })
1797+
}
1798+
},
17931799
}
17941800
}
17951801
}
@@ -1813,7 +1819,10 @@ impl<'a> FromReader<'a> for AbstractHeapType {
18131819
0x68 => Ok(Cont),
18141820
0x75 => Ok(NoCont),
18151821
_ => {
1816-
bail!(reader.original_position(), "invalid abstract heap type");
1822+
return Err(BinaryReaderError::invalid(
1823+
"invalid abstract heap type",
1824+
reader.original_position() - 1,
1825+
))
18171826
}
18181827
}
18191828
}

0 commit comments

Comments
 (0)