From 23031d057f30fd8bc771f967d00a6644d000ca51 Mon Sep 17 00:00:00 2001 From: Erick Tryzelaar Date: Fri, 31 Jul 2015 07:33:23 -0700 Subject: [PATCH 1/9] Add test for parsing json "0" and "0.0" --- serde_tests/tests/test_json.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/serde_tests/tests/test_json.rs b/serde_tests/tests/test_json.rs index 14345a28..10bc2f4c 100644 --- a/serde_tests/tests/test_json.rs +++ b/serde_tests/tests/test_json.rs @@ -724,6 +724,7 @@ fn test_parse_i64() { #[test] fn test_parse_u64() { test_parse_ok(vec![ + ("0", 0u64), ("3", 3u64), ("1234", 1234), ]); @@ -732,6 +733,7 @@ fn test_parse_u64() { #[test] fn test_parse_f64() { test_parse_ok(vec![ + ("0.0", 0.0f64), ("3.0", 3.0f64), ("3.1", 3.1), ("-1.2", -1.2), From c9d55362d6de86c44a829d3e56aee290fbf06341 Mon Sep 17 00:00:00 2001 From: Erick Tryzelaar Date: Sat, 1 Aug 2015 13:49:57 -0700 Subject: [PATCH 2/9] Add a serde_json::Result helper type --- serde_json/src/de.rs | 68 ++++++++++++++++++++--------------------- serde_json/src/error.rs | 4 +++ serde_json/src/lib.rs | 2 +- 3 files changed, 39 insertions(+), 35 deletions(-) diff --git a/serde_json/src/de.rs b/serde_json/src/de.rs index 31f406af..2f752287 100644 --- a/serde_json/src/de.rs +++ b/serde_json/src/de.rs @@ -6,7 +6,7 @@ use std::str; use serde::de; use serde::iter::LineColIterator; -use super::error::{Error, ErrorCode}; +use super::error::{Error, ErrorCode, Result}; pub struct Deserializer>> { rdr: LineColIterator, @@ -28,7 +28,7 @@ impl Deserializer { /// Creates the JSON parser from an `std::iter::Iterator`. #[inline] - pub fn new(rdr: Iter) -> Result, Error> { + pub fn new(rdr: Iter) -> Result> { let mut deserializer = Deserializer { rdr: LineColIterator::new(rdr), ch: None, @@ -41,7 +41,7 @@ impl Deserializer } #[inline] - pub fn end(&mut self) -> Result<(), Error> { + pub fn end(&mut self) -> Result<()> { try!(self.parse_whitespace()); if self.eof() { Ok(()) @@ -54,7 +54,7 @@ impl Deserializer fn ch_or_null(&self) -> u8 { self.ch.unwrap_or(b'\x00') } - fn bump(&mut self) -> Result<(), Error> { + fn bump(&mut self) -> Result<()> { self.ch = match self.rdr.next() { Some(Err(err)) => { return Err(Error::IoError(err)); } Some(Ok(ch)) => Some(ch), @@ -64,7 +64,7 @@ impl Deserializer Ok(()) } - fn next_char(&mut self) -> Result, Error> { + fn next_char(&mut self) -> Result> { try!(self.bump()); Ok(self.ch) } @@ -77,7 +77,7 @@ impl Deserializer Error::SyntaxError(reason, self.rdr.line(), self.rdr.col()) } - fn parse_whitespace(&mut self) -> Result<(), Error> { + fn parse_whitespace(&mut self) -> Result<()> { while self.ch_is(b' ') || self.ch_is(b'\n') || self.ch_is(b'\t') || @@ -86,7 +86,7 @@ impl Deserializer Ok(()) } - fn parse_value(&mut self, mut visitor: V) -> Result + fn parse_value(&mut self, mut visitor: V) -> Result where V: de::Visitor, { try!(self.parse_whitespace()); @@ -134,7 +134,7 @@ impl Deserializer } } - fn parse_ident(&mut self, ident: &[u8]) -> Result<(), Error> { + fn parse_ident(&mut self, ident: &[u8]) -> Result<()> { for c in ident { if Some(*c) != try!(self.next_char()) { return Err(self.error(ErrorCode::ExpectedSomeIdent)); @@ -145,7 +145,7 @@ impl Deserializer Ok(()) } - fn parse_number(&mut self, mut visitor: V) -> Result + fn parse_number(&mut self, mut visitor: V) -> Result where V: de::Visitor, { let mut neg = false; @@ -189,7 +189,7 @@ impl Deserializer } } - fn parse_integer(&mut self) -> Result { + fn parse_integer(&mut self) -> Result { let mut accum: u64 = 0; match self.ch_or_null() { @@ -223,7 +223,7 @@ impl Deserializer Ok(accum) } - fn parse_decimal(&mut self, res: f64) -> Result { + fn parse_decimal(&mut self, res: f64) -> Result { try!(self.bump()); // Make sure a digit follows the decimal place. @@ -248,7 +248,7 @@ impl Deserializer Ok(res) } - fn parse_exponent(&mut self, mut res: f64) -> Result { + fn parse_exponent(&mut self, mut res: f64) -> Result { try!(self.bump()); let mut exp: u64 = 0; @@ -293,7 +293,7 @@ impl Deserializer Ok(res) } - fn decode_hex_escape(&mut self) -> Result { + fn decode_hex_escape(&mut self) -> Result { let mut i = 0; let mut n = 0u16; while i < 4 && !self.eof() { @@ -320,7 +320,7 @@ impl Deserializer Ok(n) } - fn parse_string(&mut self) -> Result<(), Error> { + fn parse_string(&mut self) -> Result<()> { self.str_buf.clear(); loop { @@ -409,7 +409,7 @@ impl Deserializer } } - fn parse_object_colon(&mut self) -> Result<(), Error> { + fn parse_object_colon(&mut self) -> Result<()> { try!(self.parse_whitespace()); if self.ch_is(b':') { @@ -429,14 +429,14 @@ impl de::Deserializer for Deserializer type Error = Error; #[inline] - fn visit(&mut self, visitor: V) -> Result + fn visit(&mut self, visitor: V) -> Result where V: de::Visitor, { self.parse_value(visitor) } #[inline] - fn visit_option(&mut self, mut visitor: V) -> Result + fn visit_option(&mut self, mut visitor: V) -> Result where V: de::Visitor, { try!(self.parse_whitespace()); @@ -456,7 +456,7 @@ impl de::Deserializer for Deserializer #[inline] fn visit_newtype_struct(&mut self, _name: &str, - mut visitor: V) -> Result + mut visitor: V) -> Result where V: de::Visitor, { visitor.visit_newtype_struct(self) @@ -466,7 +466,7 @@ impl de::Deserializer for Deserializer fn visit_enum(&mut self, _name: &str, _variants: &'static [&'static str], - mut visitor: V) -> Result + mut visitor: V) -> Result where V: de::EnumVisitor, { try!(self.parse_whitespace()); @@ -517,7 +517,7 @@ impl<'a, Iter> de::SeqVisitor for SeqVisitor<'a, Iter> { type Error = Error; - fn visit(&mut self) -> Result, Error> + fn visit(&mut self) -> Result> where T: de::Deserialize, { try!(self.de.parse_whitespace()); @@ -542,7 +542,7 @@ impl<'a, Iter> de::SeqVisitor for SeqVisitor<'a, Iter> Ok(Some(value)) } - fn end(&mut self) -> Result<(), Error> { + fn end(&mut self) -> Result<()> { try!(self.de.parse_whitespace()); if self.de.ch_is(b']') { @@ -574,7 +574,7 @@ impl<'a, Iter> de::MapVisitor for MapVisitor<'a, Iter> { type Error = Error; - fn visit_key(&mut self) -> Result, Error> + fn visit_key(&mut self) -> Result> where K: de::Deserialize, { try!(self.de.parse_whitespace()); @@ -607,7 +607,7 @@ impl<'a, Iter> de::MapVisitor for MapVisitor<'a, Iter> Ok(Some(try!(de::Deserialize::deserialize(self.de)))) } - fn visit_value(&mut self) -> Result + fn visit_value(&mut self) -> Result where V: de::Deserialize, { try!(self.de.parse_object_colon()); @@ -615,7 +615,7 @@ impl<'a, Iter> de::MapVisitor for MapVisitor<'a, Iter> Ok(try!(de::Deserialize::deserialize(self.de))) } - fn end(&mut self) -> Result<(), Error> { + fn end(&mut self) -> Result<()> { try!(self.de.parse_whitespace()); if self.de.ch_is(b'}') { @@ -628,7 +628,7 @@ impl<'a, Iter> de::MapVisitor for MapVisitor<'a, Iter> } } - fn missing_field(&mut self, _field: &'static str) -> Result + fn missing_field(&mut self, _field: &'static str) -> Result where V: de::Deserialize, { let mut de = de::value::ValueDeserializer::into_deserializer(()); @@ -641,7 +641,7 @@ impl de::VariantVisitor for Deserializer { type Error = Error; - fn visit_variant(&mut self) -> Result + fn visit_variant(&mut self) -> Result where V: de::Deserialize { let val = try!(de::Deserialize::deserialize(self)); @@ -649,11 +649,11 @@ impl de::VariantVisitor for Deserializer Ok(val) } - fn visit_unit(&mut self) -> Result<(), Error> { + fn visit_unit(&mut self) -> Result<()> { de::Deserialize::deserialize(self) } - fn visit_newtype(&mut self) -> Result + fn visit_newtype(&mut self) -> Result where T: de::Deserialize, { de::Deserialize::deserialize(self) @@ -661,7 +661,7 @@ impl de::VariantVisitor for Deserializer fn visit_tuple(&mut self, _len: usize, - visitor: V) -> Result + visitor: V) -> Result where V: de::Visitor, { de::Deserializer::visit(self, visitor) @@ -669,7 +669,7 @@ impl de::VariantVisitor for Deserializer fn visit_struct(&mut self, _fields: &'static [&'static str], - visitor: V) -> Result + visitor: V) -> Result where V: de::Visitor, { de::Deserializer::visit(self, visitor) @@ -677,7 +677,7 @@ impl de::VariantVisitor for Deserializer } /// Decodes a json value from a `std::io::Read`. -pub fn from_iter(iter: I) -> Result +pub fn from_iter(iter: I) -> Result where I: Iterator>, T: de::Deserialize, { @@ -690,7 +690,7 @@ pub fn from_iter(iter: I) -> Result } /// Decodes a json value from a `std::io::Read`. -pub fn from_reader(rdr: R) -> Result +pub fn from_reader(rdr: R) -> Result where R: io::Read, T: de::Deserialize, { @@ -698,14 +698,14 @@ pub fn from_reader(rdr: R) -> Result } /// Decodes a json value from a `&str`. -pub fn from_slice(v: &[u8]) -> Result +pub fn from_slice(v: &[u8]) -> Result where T: de::Deserialize { from_iter(v.iter().map(|byte| Ok(*byte))) } /// Decodes a json value from a `&str`. -pub fn from_str(s: &str) -> Result +pub fn from_str(s: &str) -> Result where T: de::Deserialize { from_slice(s.as_bytes()) diff --git a/serde_json/src/error.rs b/serde_json/src/error.rs index 171e2459..beaa8a72 100644 --- a/serde_json/src/error.rs +++ b/serde_json/src/error.rs @@ -1,6 +1,7 @@ use std::error; use std::fmt; use std::io; +use std::result; use serde::de; @@ -183,3 +184,6 @@ impl de::Error for Error { Error::MissingFieldError(field) } } + +/// Helper alias for `Result` objects that return a JSON `Error`. +pub type Result = result::Result; diff --git a/serde_json/src/lib.rs b/serde_json/src/lib.rs index 3032db9c..e113127a 100644 --- a/serde_json/src/lib.rs +++ b/serde_json/src/lib.rs @@ -96,7 +96,7 @@ extern crate num; extern crate serde; pub use self::de::{Deserializer, from_str}; -pub use self::error::{Error, ErrorCode}; +pub use self::error::{Error, ErrorCode, Result}; pub use self::ser::{ Serializer, to_writer, From fa562d449d652ae48eb650c491d9a0d835ef37d7 Mon Sep 17 00:00:00 2001 From: Erick Tryzelaar Date: Sat, 1 Aug 2015 11:41:42 -0700 Subject: [PATCH 3/9] Minor optimization to not check if JSON number starts with '-' twice --- serde_json/src/de.rs | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/serde_json/src/de.rs b/serde_json/src/de.rs index 2f752287..4328fec5 100644 --- a/serde_json/src/de.rs +++ b/serde_json/src/de.rs @@ -108,7 +108,13 @@ impl Deserializer try!(self.parse_ident(b"alse")); visitor.visit_bool(false) } - b'0' ... b'9' | b'-' => self.parse_number(visitor), + b'-' => { + try!(self.bump()); + self.parse_number(false, visitor) + } + b'0' ... b'9' => { + self.parse_number(true, visitor) + } b'"' => { try!(self.parse_string()); let s = str::from_utf8(&self.str_buf).unwrap(); @@ -145,16 +151,9 @@ impl Deserializer Ok(()) } - fn parse_number(&mut self, mut visitor: V) -> Result + fn parse_number(&mut self, pos: bool, mut visitor: V) -> Result where V: de::Visitor, { - let mut neg = false; - - if self.ch_is(b'-') { - try!(self.bump()); - neg = true; - } - let res = try!(self.parse_integer()); if self.ch_is(b'.') || self.ch_is(b'e') || self.ch_is(b'E') { @@ -168,13 +167,15 @@ impl Deserializer res = try!(self.parse_exponent(res)); } - if neg { - visitor.visit_f64(-res) - } else { + if pos { visitor.visit_f64(res) + } else { + visitor.visit_f64(-res) } } else { - if neg { + if pos { + visitor.visit_u64(res) + } else { let res = -(res as i64); // Make sure we didn't underflow. @@ -183,8 +184,6 @@ impl Deserializer } else { visitor.visit_i64(res) } - } else { - visitor.visit_u64(res) } } } From 8eff38b6f6b0a0c76794812df6a5077bfa8e6e19 Mon Sep 17 00:00:00 2001 From: Erick Tryzelaar Date: Sat, 1 Aug 2015 13:40:28 -0700 Subject: [PATCH 4/9] Eliminate some code duplication parsing an exponent as an integer --- serde_json/src/de.rs | 49 ++++++++++++++++++---------------- serde_tests/tests/test_json.rs | 14 +++++++--- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/serde_json/src/de.rs b/serde_json/src/de.rs index 4328fec5..4d2ab5c3 100644 --- a/serde_json/src/de.rs +++ b/serde_json/src/de.rs @@ -222,17 +222,20 @@ impl Deserializer Ok(accum) } - fn parse_decimal(&mut self, res: f64) -> Result { + fn parse_decimal(&mut self, mut res: f64) -> Result { try!(self.bump()); + let mut dec = 0.1; + // Make sure a digit follows the decimal place. match self.ch_or_null() { - b'0' ... b'9' => (), + c @ b'0' ... b'9' => { + try!(self.bump()); + res += (((c as u64) - (b'0' as u64)) as f64) * dec; + } _ => { return Err(self.error(ErrorCode::InvalidNumber)); } } - let mut res = res; - let mut dec = 1.0; while !self.eof() { match self.ch_or_null() { c @ b'0' ... b'9' => { @@ -250,30 +253,30 @@ impl Deserializer fn parse_exponent(&mut self, mut res: f64) -> Result { try!(self.bump()); - let mut exp: u64 = 0; - let mut neg_exp = false; - - if self.ch_is(b'+') { - try!(self.bump()); - } else if self.ch_is(b'-') { - try!(self.bump()); - neg_exp = true; - } + let pos = match self.ch_or_null() { + b'+' => { try!(self.bump()); true } + b'-' => { try!(self.bump()); false } + _ => { true } + }; // Make sure a digit follows the exponent place. - match self.ch_or_null() { - b'0' ... b'9' => (), + let mut exp = match self.ch_or_null() { + c @ b'0' ... b'9' => { + try!(self.bump()); + (c as u64) - (b'0' as u64) + } _ => { return Err(self.error(ErrorCode::InvalidNumber)); } - } - while !self.eof() { + }; + + loop { match self.ch_or_null() { c @ b'0' ... b'9' => { + try!(self.bump()); + exp = try_or_invalid!(self, exp.checked_mul(10)); exp = try_or_invalid!(self, exp.checked_add((c as u64) - (b'0' as u64))); - - try!(self.bump()); } - _ => break + _ => { break; } } } @@ -283,10 +286,10 @@ impl Deserializer return Err(self.error(ErrorCode::InvalidNumber)); }; - if neg_exp { - res /= exp; - } else { + if pos { res *= exp; + } else { + res /= exp; } Ok(res) diff --git a/serde_tests/tests/test_json.rs b/serde_tests/tests/test_json.rs index 10bc2f4c..3b6bfb61 100644 --- a/serde_tests/tests/test_json.rs +++ b/serde_tests/tests/test_json.rs @@ -708,7 +708,7 @@ fn test_parse_number_errors() { ("1e+", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 3)), ("1a", Error::SyntaxError(ErrorCode::TrailingCharacters, 1, 2)), ("777777777777777777777777777", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 20)), - ("1e777777777777777777777777777", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 22)), + ("1e777777777777777777777777777", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 23)), ]); } @@ -735,13 +735,21 @@ fn test_parse_f64() { test_parse_ok(vec![ ("0.0", 0.0f64), ("3.0", 3.0f64), + ("3.00", 3.0f64), ("3.1", 3.1), ("-1.2", -1.2), ("0.4", 0.4), ("0.4e5", 0.4e5), + ("0.4e+5", 0.4e5), ("0.4e15", 0.4e15), - ("0.4e-01", 0.4e-01), - (" 0.4e-01 ", 0.4e-01), + ("0.4e+15", 0.4e15), + ("0.4e-01", 0.4e-1), + (" 0.4e-01 ", 0.4e-1), + ("0.4e-001", 0.4e-1), + ("0.4e-0", 0.4e0), + ("0.00e00", 0.0), + ("0.00e+00", 0.0), + ("0.00e-00", 0.0), ]); } From 22024a2b71c8dc7712b2d7e4b8c1566490ad0f94 Mon Sep 17 00:00:00 2001 From: Erick Tryzelaar Date: Sat, 1 Aug 2015 14:26:53 -0700 Subject: [PATCH 5/9] Simplify parsing floating point decimals and exponents --- serde_json/src/de.rs | 78 +++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/serde_json/src/de.rs b/serde_json/src/de.rs index 4d2ab5c3..4fbdd33d 100644 --- a/serde_json/src/de.rs +++ b/serde_json/src/de.rs @@ -156,33 +156,25 @@ impl Deserializer { let res = try!(self.parse_integer()); - if self.ch_is(b'.') || self.ch_is(b'e') || self.ch_is(b'E') { - let mut res = res as f64; - - if self.ch_is(b'.') { - res = try!(self.parse_decimal(res)); + match self.ch_or_null() { + b'.' => { + self.parse_decimal(pos, res as f64, visitor) } - - if self.ch_is(b'e') || self.ch_is(b'E') { - res = try!(self.parse_exponent(res)); + b'e' | b'E' => { + self.parse_exponent(pos, res as f64, visitor) } - - if pos { - visitor.visit_f64(res) - } else { - visitor.visit_f64(-res) - } - } else { - if pos { - visitor.visit_u64(res) - } else { - let res = -(res as i64); - - // Make sure we didn't underflow. - if res > 0 { - Err(self.error(ErrorCode::InvalidNumber)) + _ => { + if pos { + visitor.visit_u64(res) } else { - visitor.visit_i64(res) + let res = -(res as i64); + + // Make sure we didn't underflow. + if res > 0 { + Err(self.error(ErrorCode::InvalidNumber)) + } else { + visitor.visit_i64(res) + } } } } @@ -222,7 +214,12 @@ impl Deserializer Ok(accum) } - fn parse_decimal(&mut self, mut res: f64) -> Result { + fn parse_decimal(&mut self, + pos: bool, + mut res: f64, + mut visitor: V) -> Result + where V: de::Visitor, + { try!(self.bump()); let mut dec = 0.1; @@ -247,13 +244,30 @@ impl Deserializer } } - Ok(res) + match self.ch_or_null() { + b'e' | b'E' => { + self.parse_exponent(pos, res, visitor) + } + _ => { + if pos { + visitor.visit_f64(res) + } else { + visitor.visit_f64(-res) + } + } + } + } - fn parse_exponent(&mut self, mut res: f64) -> Result { + fn parse_exponent(&mut self, + pos: bool, + mut res: f64, + mut visitor: V) -> Result + where V: de::Visitor, + { try!(self.bump()); - let pos = match self.ch_or_null() { + let pos_exp = match self.ch_or_null() { b'+' => { try!(self.bump()); true } b'-' => { try!(self.bump()); false } _ => { true } @@ -286,13 +300,17 @@ impl Deserializer return Err(self.error(ErrorCode::InvalidNumber)); }; - if pos { + if pos_exp { res *= exp; } else { res /= exp; } - Ok(res) + if pos { + visitor.visit_f64(res) + } else { + visitor.visit_f64(-res) + } } fn decode_hex_escape(&mut self) -> Result { From ed6777e59f780a27cb00f7213fc70c05289325b2 Mon Sep 17 00:00:00 2001 From: Erick Tryzelaar Date: Mon, 3 Aug 2015 09:09:44 -0700 Subject: [PATCH 6/9] Fix json parsing i64::MIN, add tests for min and max i64 and u64 values --- serde_json/src/de.rs | 2 +- serde_tests/tests/test_json.rs | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/serde_json/src/de.rs b/serde_json/src/de.rs index 4fbdd33d..26ab0703 100644 --- a/serde_json/src/de.rs +++ b/serde_json/src/de.rs @@ -167,7 +167,7 @@ impl Deserializer if pos { visitor.visit_u64(res) } else { - let res = -(res as i64); + let res = (res as i64).wrapping_neg(); // Make sure we didn't underflow. if res > 0 { diff --git a/serde_tests/tests/test_json.rs b/serde_tests/tests/test_json.rs index 3b6bfb61..69b1b7f8 100644 --- a/serde_tests/tests/test_json.rs +++ b/serde_tests/tests/test_json.rs @@ -1,6 +1,8 @@ use std::collections::BTreeMap; use std::fmt::Debug; +use std::i64; use std::marker::PhantomData; +use std::u64; use serde::de; use serde::ser; @@ -82,12 +84,23 @@ fn test_write_null() { test_pretty_encode_ok(tests); } +#[test] +fn test_write_u64() { + let tests = &[ + (3u64, "3"), + (u64::MAX, &u64::MAX.to_string()), + ]; + test_encode_ok(tests); + test_pretty_encode_ok(tests); +} + #[test] fn test_write_i64() { let tests = &[ (3i64, "3"), (-2i64, "-2"), (-1234i64, "-1234"), + (i64::MIN, &i64::MIN.to_string()), ]; test_encode_ok(tests); test_pretty_encode_ok(tests); @@ -95,11 +108,18 @@ fn test_write_i64() { #[test] fn test_write_f64() { + let min_string = f64::MIN.to_string(); + let max_string = f64::MAX.to_string(); + let epsilon_string = f64::EPSILON.to_string(); + let tests = &[ (3.0, "3.0"), (3.1, "3.1"), (-1.5, "-1.5"), (0.5, "0.5"), + (f64::MIN, &min_string), + (f64::MAX, &max_string), + (f64::EPSILON, &epsilon_string), ]; test_encode_ok(tests); test_pretty_encode_ok(tests); @@ -621,7 +641,7 @@ fn test_write_option() { ]); } -fn test_parse_ok(errors: Vec<(&'static str, T)>) +fn test_parse_ok(errors: Vec<(&str, T)>) where T: Clone + Debug + PartialEq + ser::Serialize + de::Deserialize, { for (s, value) in errors { @@ -718,6 +738,8 @@ fn test_parse_i64() { ("-2", -2), ("-1234", -1234), (" -1234 ", -1234), + (&i64::MIN.to_string(), i64::MIN), + (&i64::MAX.to_string(), i64::MAX), ]); } @@ -727,6 +749,7 @@ fn test_parse_u64() { ("0", 0u64), ("3", 3u64), ("1234", 1234), + (&u64::MAX.to_string(), u64::MAX), ]); } From fd84aec4853428000530bafbeef49442a8468582 Mon Sep 17 00:00:00 2001 From: Erick Tryzelaar Date: Tue, 4 Aug 2015 18:00:24 -0700 Subject: [PATCH 7/9] Fix parsing min, max, and epsilon f64 values --- serde_json/src/de.rs | 155 ++++++++++++++++++++++++--------- serde_json/src/ser.rs | 14 +-- serde_tests/tests/test_json.rs | 13 +-- 3 files changed, 122 insertions(+), 60 deletions(-) diff --git a/serde_json/src/de.rs b/serde_json/src/de.rs index 26ab0703..4a6e139e 100644 --- a/serde_json/src/de.rs +++ b/serde_json/src/de.rs @@ -110,10 +110,10 @@ impl Deserializer } b'-' => { try!(self.bump()); - self.parse_number(false, visitor) + self.parse_integer(false, visitor) } b'0' ... b'9' => { - self.parse_number(true, visitor) + self.parse_integer(true, visitor) } b'"' => { try!(self.parse_string()); @@ -151,11 +151,114 @@ impl Deserializer Ok(()) } - fn parse_number(&mut self, pos: bool, mut visitor: V) -> Result + fn parse_integer(&mut self, pos: bool, visitor: V) -> Result where V: de::Visitor, { - let res = try!(self.parse_integer()); + match self.ch_or_null() { + b'0' => { + try!(self.bump()); + // There can be only one leading '0'. + match self.ch_or_null() { + b'0' ... b'9' => { + Err(self.error(ErrorCode::InvalidNumber)) + } + _ => { + self.parse_number(pos, 0, visitor) + } + } + }, + c @ b'1' ... b'9' => { + try!(self.bump()); + + let mut res: u64 = (c as u64) - ('0' as u64); + + loop { + match self.ch_or_null() { + c @ b'0' ... b'9' => { + try!(self.bump()); + + let digit = (c as u64) - ('0' as u64); + + // We need to be careful with overflow. If we can, try to keep the + // number as a `u64` until we grow too large. At that point, switch to + // parsing the value as a `f64`. + match res.checked_mul(10) { + Some(res_) => { + res = res_; + match res.checked_add(digit) { + Some(res_) => { res = res_; } + None => { + return self.parse_float( + pos, + (res as f64) + (digit as f64), + visitor); + } + } + } + None => { + return self.parse_float( + pos, + (res as f64) * 10.0 + (digit as f64), + visitor); + } + } + } + _ => { + return self.parse_number(pos, res, visitor); + } + } + } + } + _ => { + Err(self.error(ErrorCode::InvalidNumber)) + } + } + } + + fn parse_float(&mut self, + pos: bool, + mut res: f64, + mut visitor: V) -> Result + where V: de::Visitor, + { + loop { + match self.ch_or_null() { + c @ b'0' ... b'9' => { + try!(self.bump()); + + let digit = (c as u64) - ('0' as u64); + + res *= 10.0; + res += digit as f64; + } + _ => { + match self.ch_or_null() { + b'.' => { + return self.parse_decimal(pos, res, visitor); + } + b'e' | b'E' => { + return self.parse_exponent(pos, res, visitor); + } + _ => { + if !pos { + res = -res; + } + + return visitor.visit_f64(res); + } + } + } + } + } + } + + fn parse_number(&mut self, + pos: bool, + res: u64, + mut visitor: V) -> Result + where V: de::Visitor, + { match self.ch_or_null() { b'.' => { self.parse_decimal(pos, res as f64, visitor) @@ -167,53 +270,19 @@ impl Deserializer if pos { visitor.visit_u64(res) } else { - let res = (res as i64).wrapping_neg(); + let res_i64 = (res as i64).wrapping_neg(); - // Make sure we didn't underflow. - if res > 0 { - Err(self.error(ErrorCode::InvalidNumber)) + // Convert into a float if we underflow. + if res_i64 > 0 { + visitor.visit_f64(-(res as f64)) } else { - visitor.visit_i64(res) + visitor.visit_i64(res_i64) } } } } } - fn parse_integer(&mut self) -> Result { - let mut accum: u64 = 0; - - match self.ch_or_null() { - b'0' => { - try!(self.bump()); - - // There can be only one leading '0'. - match self.ch_or_null() { - b'0' ... b'9' => { - return Err(self.error(ErrorCode::InvalidNumber)); - } - _ => () - } - }, - b'1' ... b'9' => { - while !self.eof() { - match self.ch_or_null() { - c @ b'0' ... b'9' => { - accum = try_or_invalid!(self, accum.checked_mul(10)); - accum = try_or_invalid!(self, accum.checked_add((c as u64) - ('0' as u64))); - - try!(self.bump()); - } - _ => break, - } - } - } - _ => { return Err(self.error(ErrorCode::InvalidNumber)); } - } - - Ok(accum) - } - fn parse_decimal(&mut self, pos: bool, mut res: f64, diff --git a/serde_json/src/ser.rs b/serde_json/src/ser.rs index 67674aa5..b03ce38b 100644 --- a/serde_json/src/ser.rs +++ b/serde_json/src/ser.rs @@ -465,12 +465,7 @@ fn fmt_f32_or_null(wr: &mut W, value: f32) -> io::Result<()> match value.classify() { FpCategory::Nan | FpCategory::Infinite => wr.write_all(b"null"), _ => { - let s = format!("{:?}", value); - try!(wr.write_all(s.as_bytes())); - if !s.contains('.') { - try!(wr.write_all(b".0")) - } - Ok(()) + write!(wr, "{:?}", value) } } } @@ -481,12 +476,7 @@ fn fmt_f64_or_null(wr: &mut W, value: f64) -> io::Result<()> match value.classify() { FpCategory::Nan | FpCategory::Infinite => wr.write_all(b"null"), _ => { - let s = format!("{:?}", value); - try!(wr.write_all(s.as_bytes())); - if !s.contains('.') { - try!(wr.write_all(b".0")) - } - Ok(()) + write!(wr, "{:?}", value) } } } diff --git a/serde_tests/tests/test_json.rs b/serde_tests/tests/test_json.rs index 69b1b7f8..d8e177b7 100644 --- a/serde_tests/tests/test_json.rs +++ b/serde_tests/tests/test_json.rs @@ -1,4 +1,5 @@ use std::collections::BTreeMap; +use std::f64; use std::fmt::Debug; use std::i64; use std::marker::PhantomData; @@ -108,12 +109,12 @@ fn test_write_i64() { #[test] fn test_write_f64() { - let min_string = f64::MIN.to_string(); - let max_string = f64::MAX.to_string(); - let epsilon_string = f64::EPSILON.to_string(); + let min_string = format!("{:?}", f64::MIN); + let max_string = format!("{:?}", f64::MAX); + let epsilon_string = format!("{:?}", f64::EPSILON); let tests = &[ - (3.0, "3.0"), + (3.0, "3"), (3.1, "3.1"), (-1.5, "-1.5"), (0.5, "0.5"), @@ -727,7 +728,6 @@ fn test_parse_number_errors() { ("1e", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 2)), ("1e+", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 3)), ("1a", Error::SyntaxError(ErrorCode::TrailingCharacters, 1, 2)), - ("777777777777777777777777777", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 20)), ("1e777777777777777777777777777", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 23)), ]); } @@ -773,6 +773,9 @@ fn test_parse_f64() { ("0.00e00", 0.0), ("0.00e+00", 0.0), ("0.00e-00", 0.0), + (&format!("{:?}", (i64::MIN as f64) - 1.0), (i64::MIN as f64) - 1.0), + (&format!("{:?}", (u64::MAX as f64) + 1.0), (u64::MAX as f64) + 1.0), + (&format!("{:?}", f64::EPSILON), f64::EPSILON), ]); } From b6371f045f62367f3c593929dde85df3acabf1d2 Mon Sep 17 00:00:00 2001 From: Erick Tryzelaar Date: Thu, 6 Aug 2015 07:12:00 -0700 Subject: [PATCH 8/9] Simplify parsing a number --- serde_json/src/de.rs | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/serde_json/src/de.rs b/serde_json/src/de.rs index 4a6e139e..8dea69e1 100644 --- a/serde_json/src/de.rs +++ b/serde_json/src/de.rs @@ -183,19 +183,8 @@ impl Deserializer // We need to be careful with overflow. If we can, try to keep the // number as a `u64` until we grow too large. At that point, switch to // parsing the value as a `f64`. - match res.checked_mul(10) { - Some(res_) => { - res = res_; - match res.checked_add(digit) { - Some(res_) => { res = res_; } - None => { - return self.parse_float( - pos, - (res as f64) + (digit as f64), - visitor); - } - } - } + match res.checked_mul(10).and_then(|val| val.checked_add(digit)) { + Some(res_) => { res = res_; } None => { return self.parse_float( pos, From 199a02cb68b956fbfb1891164541ae51c02a24de Mon Sep 17 00:00:00 2001 From: Erick Tryzelaar Date: Thu, 6 Aug 2015 09:31:37 -0700 Subject: [PATCH 9/9] i64::wrapping_neg is not stable yet --- serde_json/src/de.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/serde_json/src/de.rs b/serde_json/src/de.rs index 8dea69e1..04491899 100644 --- a/serde_json/src/de.rs +++ b/serde_json/src/de.rs @@ -259,7 +259,9 @@ impl Deserializer if pos { visitor.visit_u64(res) } else { - let res_i64 = (res as i64).wrapping_neg(); + // FIXME: `wrapping_neg` will be stable in Rust 1.2 + //let res_i64 = (res as i64).wrapping_neg(); + let res_i64 = (!res + 1) as i64; // Convert into a float if we underflow. if res_i64 > 0 {