Skip to content

Commit f175365

Browse files
committed
fix: wrong line number when json has escaped chars
1 parent 2fd6e42 commit f175365

File tree

5 files changed

+44
-66
lines changed

5 files changed

+44
-66
lines changed

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 0 additions & 49 deletions
This file was deleted.

src/error.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -501,10 +501,6 @@ fn starts_with_digit(slice: &str) -> bool {
501501
}
502502
}
503503

504-
pub(crate) fn invalid_utf8(json: &[u8], index: usize) -> Error {
505-
Error::syntax(ErrorCode::InvalidUTF8, json, index)
506-
}
507-
508504
#[cfg(test)]
509505
mod test {
510506
use crate::{from_slice, from_str, Deserialize};

src/parser.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use sonic_simd::{i8x32, m8x32, u8x32, u8x64, Mask, Simd};
1818
use crate::{
1919
config::DeserializeCfg,
2020
error::{
21-
invalid_utf8, Error,
21+
Error,
2222
ErrorCode::{self, *},
2323
Result,
2424
},
@@ -283,7 +283,7 @@ where
283283
reason = EofWhileParsing;
284284
index = len;
285285
}
286-
Error::syntax(reason, self.read.as_u8_slice(), index)
286+
Error::syntax(reason, self.read.origin_input(), index)
287287
}
288288

289289
// maybe error in generated in visitor, so we need fix the position.
@@ -802,7 +802,11 @@ where
802802
}
803803

804804
if !allowed {
805-
Err(invalid_utf8(self.read.as_u8_slice(), invalid))
805+
Err(Error::syntax(
806+
ErrorCode::InvalidUTF8,
807+
self.read.origin_input(),
808+
invalid,
809+
))
806810
} else {
807811
// this space is allowed, should update the next invalid utf8 position
808812
self.read.check_invalid_utf8();
@@ -852,10 +856,10 @@ where
852856
Ok((((point1 - 0xd800) << 10) | low_bit).wrapping_add(0x10000))
853857
} else if (0xDC00..0xE000).contains(&point1) {
854858
if self.cfg.utf8_lossy {
855-
return Ok(0xFFFD);
859+
Ok(0xFFFD)
856860
} else {
857861
// invalid surrogate
858-
return perr!(self, InvalidSurrogateUnicodeCodePoint);
862+
perr!(self, InvalidSurrogateUnicodeCodePoint)
859863
}
860864
} else {
861865
Ok(point1)

src/reader.rs

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ use std::{marker::PhantomData, pin::Pin, ptr::NonNull};
33
use faststr::FastStr;
44

55
use crate::{
6-
error::invalid_utf8,
6+
error::ErrorCode,
77
input::JsonSlice,
88
parser::as_str,
99
util::{private::Sealed, utf8::from_utf8},
10-
JsonInput, Result,
10+
Error, JsonInput, Result,
1111
};
1212

1313
pub(crate) struct Position {
@@ -71,6 +71,10 @@ pub trait Reader<'de>: Sealed {
7171
fn check_invalid_utf8(&mut self);
7272

7373
fn slice_ref(&self, subset: &'de [u8]) -> JsonSlice<'de>;
74+
75+
fn origin_input(&self) -> &'de [u8] {
76+
self.as_u8_slice()
77+
}
7478
}
7579

7680
enum PinnedInput<'a> {
@@ -267,12 +271,16 @@ impl<'a> Reader<'a> for Read<'a> {
267271
if self.next_invalid_utf8 == usize::MAX {
268272
Ok(())
269273
} else {
270-
Err(invalid_utf8(self.slice(), self.next_invalid_utf8))
274+
Err(Error::syntax(
275+
ErrorCode::InvalidUTF8,
276+
self.origin_input(),
277+
self.next_invalid_utf8,
278+
))
271279
}
272280
}
273281

274282
fn check_invalid_utf8(&mut self) {
275-
self.next_invalid_utf8 = match from_utf8(&self.slice()[self.index..]) {
283+
self.next_invalid_utf8 = match from_utf8(&self.origin_input()[self.index..]) {
276284
Ok(_) => usize::MAX,
277285
Err(e) => self.index + e.offset(),
278286
};
@@ -287,17 +295,19 @@ pub(crate) struct PaddedSliceRead<'a> {
287295
base: NonNull<u8>,
288296
cur: NonNull<u8>,
289297
len: usize,
298+
origin: &'a [u8],
290299
_life: PhantomData<&'a mut [u8]>,
291300
}
292301

293302
impl<'a> PaddedSliceRead<'a> {
294303
const PADDING_SIZE: usize = 64;
295-
pub fn new(slice: &'a mut [u8]) -> Self {
296-
let base = unsafe { NonNull::new_unchecked(slice.as_mut_ptr()) };
304+
pub fn new(buffer: &'a mut [u8], json: &'a [u8]) -> Self {
305+
let base = unsafe { NonNull::new_unchecked(buffer.as_mut_ptr()) };
297306
Self {
298307
base,
299308
cur: base,
300-
len: slice.len() - Self::PADDING_SIZE,
309+
len: buffer.len() - Self::PADDING_SIZE,
310+
origin: json,
301311
_life: PhantomData,
302312
}
303313
}
@@ -400,6 +410,11 @@ impl<'a> Reader<'a> for PaddedSliceRead<'a> {
400410
fn check_utf8_final(&self) -> Result<()> {
401411
Ok(())
402412
}
413+
414+
#[inline(always)]
415+
fn origin_input(&self) -> &'a [u8] {
416+
self.origin
417+
}
403418
}
404419

405420
#[cfg(test)]

src/value/node.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1313,7 +1313,7 @@ impl Value {
13131313
buffer.extend_from_slice(&[0; 61]);
13141314

13151315
let smut = Arc::get_mut(&mut shared).unwrap();
1316-
let slice = PaddedSliceRead::new(buffer.as_mut_slice());
1316+
let slice = PaddedSliceRead::new(buffer.as_mut_slice(), json);
13171317
let mut parser = Parser::new(slice).with_config(cfg);
13181318
let mut vis = DocumentVisitor::new(json.len(), smut);
13191319
parser.parse_dom(&mut vis)?;
@@ -2068,4 +2068,16 @@ mod test {
20682068

20692069
assert_eq!(obj, obj2);
20702070
}
2071+
2072+
#[test]
2073+
fn test_issue_179_line_column() {
2074+
let json = r#"
2075+
{
2076+
"key\nwith\nnewlines": "value",
2077+
"another_key": [, 1, 2, 3]
2078+
}
2079+
"#;
2080+
let err = crate::from_str::<Value>(json).unwrap_err();
2081+
assert_eq!(err.line(), 4);
2082+
}
20712083
}

0 commit comments

Comments
 (0)