-
Notifications
You must be signed in to change notification settings - Fork 119
Remove unsafe unwraps in hostcalls.rs #345
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -156,32 +156,26 @@ fn proxy_get_header_map_pairs( | |
| } | ||
|
|
||
| pub fn get_map(map_type: MapType) -> Result<Vec<(String, String)>, Status> { | ||
| unsafe { | ||
| let mut return_data: *mut u8 = null_mut(); | ||
| let mut return_size: usize = 0; | ||
| match proxy_get_header_map_pairs(map_type, &mut return_data, &mut return_size) { | ||
| Status::Ok => { | ||
| if !return_data.is_null() { | ||
| let serialized_map = Vec::from_raw_parts(return_data, return_size, return_size); | ||
| Ok(utils::deserialize_map(&serialized_map)) | ||
| } else { | ||
| Ok(Vec::new()) | ||
| } | ||
| } | ||
| status => panic!("unexpected status: {}", status as u32), | ||
| } | ||
| } | ||
| get_map_impl(map_type, |v| String::from_utf8_lossy(&v).into_owned()) | ||
| } | ||
|
|
||
| pub fn get_map_bytes(map_type: MapType) -> Result<Vec<(String, Bytes)>, Status> { | ||
| get_map_impl(map_type, |v| v) | ||
| } | ||
|
|
||
| #[inline] | ||
| fn get_map_impl<F, V>(map_type: MapType, value_mapper: F) -> Result<Vec<(String, V)>, Status> | ||
| where | ||
| F: Fn(Vec<u8>) -> V, | ||
| { | ||
| unsafe { | ||
| let mut return_data: *mut u8 = null_mut(); | ||
| let mut return_size: usize = 0; | ||
| match proxy_get_header_map_pairs(map_type, &mut return_data, &mut return_size) { | ||
| Status::Ok => { | ||
| if !return_data.is_null() { | ||
| let serialized_map = Vec::from_raw_parts(return_data, return_size, return_size); | ||
| Ok(utils::deserialize_map_bytes(&serialized_map)) | ||
| Ok(utils::deserialize_map_impl(&serialized_map, value_mapper).unwrap()) | ||
| } else { | ||
| Ok(Vec::new()) | ||
| } | ||
|
|
@@ -243,12 +237,12 @@ pub fn get_map_value(map_type: MapType, key: &str) -> Result<Option<String>, Sta | |
| Status::Ok => { | ||
| if !return_data.is_null() { | ||
| Ok(Some( | ||
| String::from_utf8(Vec::from_raw_parts( | ||
| String::from_utf8_lossy(&Vec::from_raw_parts( | ||
| return_data, | ||
| return_size, | ||
| return_size, | ||
| )) | ||
| .unwrap(), | ||
| .into_owned(), | ||
| )) | ||
| } else { | ||
| Ok(Some(String::new())) | ||
|
|
@@ -1206,7 +1200,23 @@ mod tests { | |
|
|
||
| mod utils { | ||
| use crate::types::Bytes; | ||
| use std::convert::TryFrom; | ||
| use std::convert::TryInto; | ||
| use std::fmt::Display; | ||
|
|
||
| #[derive(Debug)] | ||
| pub enum Error { | ||
| BufferTooShort, | ||
| BufferOverflow, | ||
| } | ||
|
|
||
| impl Display for Error { | ||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
| match self { | ||
| Error::BufferTooShort => write!(f, "buffer too short"), | ||
| Error::BufferOverflow => write!(f, "buffer overflow"), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| pub(super) fn serialize_property_path(path: Vec<&str>) -> Bytes { | ||
| if path.is_empty() { | ||
|
|
@@ -1265,49 +1275,64 @@ mod utils { | |
| bytes | ||
| } | ||
|
|
||
| pub(super) fn deserialize_map(bytes: &[u8]) -> Vec<(String, String)> { | ||
| #[inline] | ||
| pub(super) fn deserialize_map_impl<F, V>( | ||
| bytes: &[u8], | ||
| value_mapper: F, | ||
| ) -> Result<Vec<(String, V)>, Error> | ||
| where | ||
| F: Fn(Vec<u8>) -> V, | ||
| { | ||
| if bytes.is_empty() { | ||
| return Vec::new(); | ||
| return Ok(Vec::new()); | ||
| } | ||
| let size = u32::from_le_bytes(<[u8; 4]>::try_from(&bytes[0..4]).unwrap()) as usize; | ||
| let mut map = Vec::with_capacity(size); | ||
| let mut p = 4 + size * 8; | ||
| for n in 0..size { | ||
| let s = 4 + n * 8; | ||
| let size = u32::from_le_bytes(<[u8; 4]>::try_from(&bytes[s..s + 4]).unwrap()) as usize; | ||
| let key = bytes[p..p + size].to_vec(); | ||
| p += size + 1; | ||
| let size = | ||
| u32::from_le_bytes(<[u8; 4]>::try_from(&bytes[s + 4..s + 8]).unwrap()) as usize; | ||
| let value = bytes[p..p + size].to_vec(); | ||
| p += size + 1; | ||
| map.push(( | ||
| String::from_utf8(key).unwrap(), | ||
| String::from_utf8(value).unwrap(), | ||
| )); | ||
| } | ||
| map | ||
| } | ||
|
|
||
| pub(super) fn deserialize_map_bytes(bytes: &[u8]) -> Vec<(String, Bytes)> { | ||
| if bytes.is_empty() { | ||
| return Vec::new(); | ||
| if bytes.len() < 4 { | ||
| return Err(Error::BufferTooShort); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand the point in #282 but it doesn't negate that you should validate your inputs. |
||
| } | ||
| let size = u32::from_le_bytes(<[u8; 4]>::try_from(&bytes[0..4]).unwrap()) as usize; | ||
|
|
||
| let size = u32::from_le_bytes(bytes[0..4].try_into().map_err(|_| Error::BufferTooShort)?) as usize; | ||
| let mut map = Vec::with_capacity(size); | ||
| let mut p = 4 + size * 8; | ||
|
|
||
| // check if header is large enough | ||
| let header_size = 4 + size.checked_mul(8).ok_or(Error::BufferOverflow)?; | ||
| if bytes.len() < header_size { | ||
| return Err(Error::BufferTooShort); | ||
| } | ||
|
|
||
| let mut p = header_size; | ||
|
|
||
| for n in 0..size { | ||
| let s = 4 + n * 8; | ||
| let size = u32::from_le_bytes(<[u8; 4]>::try_from(&bytes[s..s + 4]).unwrap()) as usize; | ||
| let key = bytes[p..p + size].to_vec(); | ||
| p += size + 1; | ||
| let size = | ||
| u32::from_le_bytes(<[u8; 4]>::try_from(&bytes[s + 4..s + 8]).unwrap()) as usize; | ||
| let value = bytes[p..p + size].to_vec(); | ||
| p += size + 1; | ||
| map.push((String::from_utf8(key).unwrap(), value)); | ||
| } | ||
| map | ||
|
|
||
| // read key size | ||
| let key_size = u32::from_le_bytes(bytes[s..s + 4].try_into().unwrap()) as usize; | ||
| let key_end = p.checked_add(key_size).ok_or(Error::BufferOverflow)?; | ||
| if key_end > bytes.len() { | ||
| return Err(Error::BufferTooShort); | ||
| } | ||
| let key = String::from_utf8_lossy(&bytes[p..key_end].to_vec()).into_owned(); | ||
|
|
||
| p = key_end.checked_add(1).ok_or(Error::BufferOverflow)?; | ||
|
|
||
| // read value size | ||
| let value_size = u32::from_le_bytes( | ||
| bytes[s + 4..s + 8] | ||
| .try_into() | ||
| .map_err(|_| Error::BufferOverflow)?, | ||
| ) as usize; | ||
| let value_end = p.checked_add(value_size).ok_or(Error::BufferOverflow)?; | ||
| if value_end > bytes.len() { | ||
| return Err(Error::BufferTooShort); | ||
| } | ||
| let value = bytes[p..value_end].to_vec(); | ||
|
|
||
| p = value_end.checked_add(1).ok_or(Error::BufferOverflow)?; | ||
|
|
||
| map.push((key, value_mapper(value))); | ||
| } | ||
|
|
||
| Ok(map) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
|
|
@@ -1324,6 +1349,9 @@ mod utils { | |
| ("Powered-By", "proxy-wasm"), | ||
| ]; | ||
|
|
||
| static BYTES_MAPPER: fn(Bytes) -> Bytes = |v| v; | ||
| static STRING_MAPPER: fn(Bytes) -> String = |v| String::from_utf8_lossy(&v).into_owned(); | ||
|
|
||
| #[rustfmt::skip] | ||
| pub(in crate::hostcalls) static SERIALIZED_MAP: &[u8] = &[ | ||
| // num entries | ||
|
|
@@ -1354,6 +1382,14 @@ mod utils { | |
| 112, 114, 111, 120, 121, 45, 119, 97, 115, 109, 0, | ||
| ]; | ||
|
|
||
| fn deserialize_map_strings(bytes: &[u8]) -> Vec<(String, String)> { | ||
| deserialize_map_impl(bytes, STRING_MAPPER).expect("deserialize_map failed") | ||
| } | ||
|
|
||
| fn deserialize_map_bytes(bytes: &[u8]) -> Vec<(String, Bytes)> { | ||
| deserialize_map_impl(bytes, BYTES_MAPPER).expect("deserialize_map failed") | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_serialize_map_empty() { | ||
| let serialized_map = serialize_map(&[]); | ||
|
|
@@ -1368,9 +1404,9 @@ mod utils { | |
|
|
||
| #[test] | ||
| fn test_deserialize_map_empty() { | ||
| let map = deserialize_map(&[]); | ||
| let map = deserialize_map_strings(&[]); | ||
| assert_eq!(map, []); | ||
| let map = deserialize_map(&[0, 0, 0, 0]); | ||
| let map = deserialize_map_strings(&[0, 0, 0, 0]); | ||
| assert_eq!(map, []); | ||
| } | ||
|
|
||
|
|
@@ -1397,7 +1433,7 @@ mod utils { | |
|
|
||
| #[test] | ||
| fn test_deserialize_map() { | ||
| let map = deserialize_map(SERIALIZED_MAP); | ||
| let map = deserialize_map_strings(SERIALIZED_MAP); | ||
| assert_eq!(map.len(), MAP.len()); | ||
| for (got, expected) in map.into_iter().zip(MAP) { | ||
| assert_eq!(got.0, expected.0); | ||
|
|
@@ -1417,7 +1453,7 @@ mod utils { | |
|
|
||
| #[test] | ||
| fn test_deserialize_map_roundtrip() { | ||
| let map = deserialize_map(SERIALIZED_MAP); | ||
| let map = deserialize_map_strings(SERIALIZED_MAP); | ||
| // TODO(v0.3): fix arguments, so that maps can be reused without conversion. | ||
| let map_refs: Vec<(&str, &str)> = | ||
| map.iter().map(|x| (x.0.as_ref(), x.1.as_ref())).collect(); | ||
|
|
@@ -1440,21 +1476,24 @@ mod utils { | |
| // 0x00-0x7f are valid single-byte UTF-8 characters. | ||
| for i in 0..0x7f { | ||
| let serialized_src = [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 99, 0, i, 0]; | ||
| let map = deserialize_map(&serialized_src); | ||
| let map = deserialize_map_strings(&serialized_src); | ||
| // TODO(v0.3): fix arguments, so that maps can be reused without conversion. | ||
| let map_refs: Vec<(&str, &str)> = | ||
| map.iter().map(|x| (x.0.as_ref(), x.1.as_ref())).collect(); | ||
| let serialized_map = serialize_map(&map_refs); | ||
| assert_eq!(serialized_map, serialized_src); | ||
| assert_eq!(serialized_map, serialized_src, "Failed at i={}", i); | ||
| } | ||
| // 0x80-0xff are invalid single-byte UTF-8 characters. | ||
| for i in 0x80..0xff { | ||
| let serialized_src = [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 99, 0, i, 0]; | ||
| std::panic::set_hook(Box::new(|_| {})); | ||
| let result = std::panic::catch_unwind(|| { | ||
| deserialize_map(&serialized_src); | ||
| }); | ||
| assert!(result.is_err()); | ||
| let map = deserialize_map_strings(&serialized_src); | ||
|
|
||
| // Invalid UTF-8 bytes should be replaced with the replacement character U+FFFD. | ||
| assert!( | ||
| map[0].1.contains('�'), | ||
| "Expected replacement character for byte 0x{:02x}", | ||
| i | ||
| ); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1495,7 +1534,7 @@ mod utils { | |
| fn bench_deserialize_map(b: &mut Bencher) { | ||
| let serialized_map = SERIALIZED_MAP.to_vec(); | ||
| b.iter(|| { | ||
| deserialize_map(test::black_box(&serialized_map)); | ||
| deserialize_map_strings(test::black_box(&serialized_map)); | ||
| }); | ||
| } | ||
|
|
||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
String::from_utf8_lossy()is a wrong "solution" - it changespanicto a silently corrupted data (which I could argue is even worse), and it doesn't actually solve the underlying problem (UTF-8 strings are wrong the type to use for HTTP field values).If you want to use
String::from_utf8_lossy(), then_bytes()variants provide an escape hatch and allow converting opaque bytes to other types inside plugins and not in the SDK. Those variants have been available for years.However, the proper solution is to use the appropriate type for HTTP field values, see: #287 (reviews welcome!).
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@PiotrSikora I adamantly disagree that crashing the WASM runtime is better than parsing lossy. I wouldn't use a web server that crashed on bad inputs. Most web servers will return 4xx errors on invalid headers, but this project is for software embedded inside a proxy like Envoy, you cannot make assumptions. I actually only took this approach because you seemed reluctant to other input on changing function contracts.
The premise that panicking is acceptable on invalid user input opens software reliant on this library up to denial of service attacks. Quite frankly I was shocked to see panics on invalid user input in the library. You mention I could use
_bytes()but based on other discussions here I see others assuming the library's string parsing is safe when it's anything but. We don't all read library source code before using it.I will look at your solution in #287, but panicking on invalid user input is never acceptable in a library crate. I really think you should be open to changing that ASAP.
PS - my issue is not about this PR, but about the cavalier attitude by maintainer(s) ignoring Rust best practices. Libraries should never panic based on invalid user inputs. See #310.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@foo4u I'm not sure where you got the idea that I think that anything (servers, libraries, whatever) should crash on invalid user input. That's definitely not the case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's sounds like panicking is better than accepting malformed data. The correct approach on these functions would be to return
Result<String, Error>.