diff --git a/benches/bench_log.rs b/benches/bench_log.rs index e6086895..2fa17e32 100644 --- a/benches/bench_log.rs +++ b/benches/bench_log.rs @@ -1060,3 +1060,16 @@ fn bench_deserializer(b: &mut Bencher) { let _log: Log = json::from_str(s).unwrap(); }); } + +#[bench] +fn bench_deserializers(b: &mut Bencher) { + let s = r#"{"timestamp":25469139677502,"zone_id":123456,"zone_plan":1,"http":{"protocol":2,"status":200,"host_status":503,"up_status":520,"method":1,"content_type":"text/html","user_agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.146 Safari/537.36","referer":"https://www.cloudflare.com/","request_uri":"/cdn-cgi/trace"},"origin":{"ip":"1.2.3.4","port":8000,"hostname":"www.example.com","protocol":2},"country":238,"cache_status":3,"server_ip":"192.168.1.1","server_name":"metal.cloudflare.com","remote_ip":"10.1.2.3","bytes_dlv":123456,"ray_id":"10c73629cce30078-LAX"}"#; + + b.bytes = s.len() as u64; + + //b.iter(|| { + for _ in range(0i, 10000) { + let _log: Log = json::from_str(s).unwrap(); + } + //}); +} diff --git a/src/json/mod.rs b/src/json/mod.rs index 82a8b615..dbe3a6bb 100644 --- a/src/json/mod.rs +++ b/src/json/mod.rs @@ -159,7 +159,7 @@ pub struct MyStruct { fn main() { let json_str_to_deserialize = "{ \"attr1\": 1, \"attr2\": \"toto\" }"; - let mut parser = json::Parser::new(json_str_to_deserialize.chars()); + let mut parser = json::Parser::new(json_str_to_deserialize.bytes()); let deserialized_object: MyStruct = match Deserializable::deserialize(&mut parser) { Ok(v) => v, Err(e) => fail!("Decoding error: {}", e) @@ -256,7 +256,7 @@ fn main() { // Deserialize like before. - let mut parser = json::Parser::new(json_str.as_slice().chars()); + let mut parser = json::Parser::new(json_str.as_slice().bytes()); let deserialized: TestStruct1 = Deserializable::deserialize(&mut parser).unwrap(); } ``` @@ -1647,15 +1647,15 @@ impl Stack { /// an iterator of char. pub struct Parser { rdr: Iter, - ch: Option, + ch: Option, line: uint, col: uint, // A state machine is kept to make it possible to interupt and resume parsing. state_stack: Vec, - buf: string::String, + buf: Vec, } -impl> Iterator> for Parser { +impl> Iterator> for Parser { #[inline] fn next(&mut self) -> Option> { let state = match self.state_stack.pop() { @@ -1698,28 +1698,33 @@ impl> Iterator> for Parser> Parser { +impl> Parser { /// Creates the JSON parser. #[inline] pub fn new(rdr: Iter) -> Parser { let mut p = Parser { rdr: rdr, - ch: Some('\x00'), + ch: Some(b'\x00'), line: 1, col: 0, state_stack: vec!(ParseValue), - buf: string::String::with_capacity(100), + buf: Vec::with_capacity(100), }; p.bump(); return p; } + #[inline(always)] fn eof(&self) -> bool { self.ch.is_none() } - fn ch_or_null(&self) -> char { self.ch.unwrap_or('\x00') } + + #[inline] + fn ch_or_null(&self) -> u8 { self.ch.unwrap_or(b'\x00') } + + #[inline(always)] fn bump(&mut self) { self.ch = self.rdr.next(); - if self.ch_is('\n') { + if self.ch_is(b'\n') { self.line += 1; self.col = 1; } else { @@ -1727,44 +1732,50 @@ impl> Parser { } } - fn next_char(&mut self) -> Option { + #[inline] + fn next_char(&mut self) -> Option { self.bump(); self.ch } - fn ch_is(&self, c: char) -> bool { + + #[inline(always)] + fn ch_is(&self, c: u8) -> bool { self.ch == Some(c) } + #[inline] fn error(&self, reason: ErrorCode) -> Result { Err(SyntaxError(reason, self.line, self.col)) } + #[inline] fn parse_whitespace(&mut self) { - while self.ch_is(' ') || - self.ch_is('\n') || - self.ch_is('\t') || - self.ch_is('\r') { self.bump(); } + while self.ch_is(b' ') || + self.ch_is(b'\n') || + self.ch_is(b'\t') || + self.ch_is(b'\r') { self.bump(); } } + #[inline] fn parse_number(&mut self) -> Result { let mut neg = 1; - if self.ch_is('-') { + if self.ch_is(b'-') { self.bump(); neg = -1; } let res = try!(self.parse_integer()); - if self.ch_is('.') || self.ch_is('e') || self.ch_is('E') { + if self.ch_is(b'.') || self.ch_is(b'e') || self.ch_is(b'E') { let neg = neg as f64; let mut res = res as f64; - if self.ch_is('.') { + if self.ch_is(b'.') { res = try!(self.parse_decimal(res)); } - if self.ch_is('e') || self.ch_is('E') { + if self.ch_is(b'e') || self.ch_is(b'E') { res = try!(self.parse_exponent(res)); } @@ -1774,23 +1785,24 @@ impl> Parser { } } + #[inline] fn parse_integer(&mut self) -> Result { let mut res = 0; match self.ch_or_null() { - '0' => { + b'0' => { self.bump(); // There can be only one leading '0'. match self.ch_or_null() { - '0' ... '9' => return self.error(InvalidNumber), + b'0' ... b'9' => return self.error(InvalidNumber), _ => () } }, - '1' ... '9' => { + b'1' ... b'9' => { while !self.eof() { match self.ch_or_null() { - c @ '0' ... '9' => { + c @ b'0' ... b'9' => { res *= 10; res += (c as i64) - ('0' as i64); self.bump(); @@ -1811,7 +1823,7 @@ impl> Parser { // Make sure a digit follows the decimal place. match self.ch_or_null() { - '0' ... '9' => (), + b'0' ... b'9' => (), _ => return self.error(InvalidNumber) } @@ -1819,9 +1831,9 @@ impl> Parser { let mut dec = 1.0; while !self.eof() { match self.ch_or_null() { - c @ '0' ... '9' => { + c @ b'0' ... b'9' => { dec /= 10.0; - res += (((c as int) - ('0' as int)) as f64) * dec; + res += (((c as int) - (b'0' as int)) as f64) * dec; self.bump(); } _ => break, @@ -1831,29 +1843,30 @@ impl> Parser { Ok(res) } + #[inline] fn parse_exponent(&mut self, mut res: f64) -> Result { self.bump(); let mut exp = 0u; let mut neg_exp = false; - if self.ch_is('+') { + if self.ch_is(b'+') { self.bump(); - } else if self.ch_is('-') { + } else if self.ch_is(b'-') { self.bump(); neg_exp = true; } // Make sure a digit follows the exponent place. match self.ch_or_null() { - '0' ... '9' => (), + b'0' ... b'9' => (), _ => return self.error(InvalidNumber) } while !self.eof() { match self.ch_or_null() { - c @ '0' ... '9' => { + c @ b'0' ... b'9' => { exp *= 10; - exp += (c as uint) - ('0' as uint); + exp += (c as uint) - (b'0' as uint); self.bump(); } @@ -1871,19 +1884,20 @@ impl> Parser { Ok(res) } + #[inline] fn decode_hex_escape(&mut self) -> Result { let mut i = 0u; let mut n = 0u16; while i < 4u && !self.eof() { self.bump(); n = match self.ch_or_null() { - c @ '0' ... '9' => n * 16_u16 + ((c as u16) - ('0' as u16)), - 'a' | 'A' => n * 16_u16 + 10_u16, - 'b' | 'B' => n * 16_u16 + 11_u16, - 'c' | 'C' => n * 16_u16 + 12_u16, - 'd' | 'D' => n * 16_u16 + 13_u16, - 'e' | 'E' => n * 16_u16 + 14_u16, - 'f' | 'F' => n * 16_u16 + 15_u16, + c @ b'0' ... b'9' => n * 16_u16 + ((c as u16) - (b'0' as u16)), + b'a' | b'A' => n * 16_u16 + 10_u16, + b'b' | b'B' => n * 16_u16 + 11_u16, + b'c' | b'C' => n * 16_u16 + 12_u16, + b'd' | b'D' => n * 16_u16 + 13_u16, + b'e' | b'E' => n * 16_u16 + 14_u16, + b'f' | b'F' => n * 16_u16 + 15_u16, _ => return self.error(InvalidEscape) }; @@ -1898,6 +1912,7 @@ impl> Parser { Ok(n) } + #[inline] fn parse_string(&mut self) -> Result<&str, ParserError> { self.buf.clear(); @@ -1912,15 +1927,15 @@ impl> Parser { if escape { match ch { - '"' => self.buf.push('"'), - '\\' => self.buf.push('\\'), - '/' => self.buf.push('/'), - 'b' => self.buf.push('\x08'), - 'f' => self.buf.push('\x0c'), - 'n' => self.buf.push('\n'), - 'r' => self.buf.push('\r'), - 't' => self.buf.push('\t'), - 'u' => { + b'"' => self.buf.push(b'"'), + b'\\' => self.buf.push(b'\\'), + b'/' => self.buf.push(b'/'), + b'b' => self.buf.push(b'\x08'), + b'f' => self.buf.push(b'\x0c'), + b'n' => self.buf.push(b'\n'), + b'r' => self.buf.push(b'\r'), + b't' => self.buf.push(b'\t'), + b'u' => { let c = match try!(self.decode_hex_escape()) { 0xDC00 ... 0xDFFF => return self.error(LoneLeadingSurrogateInHexEscape), @@ -1930,7 +1945,7 @@ impl> Parser { let c1 = self.next_char(); let c2 = self.next_char(); match (c1, c2) { - (Some('\\'), Some('u')) => (), + (Some(b'\\'), Some(b'u')) => (), _ => return self.error(UnexpectedEndOfHexEscape), } @@ -1947,18 +1962,20 @@ impl> Parser { } }; - self.buf.push(c); + let mut buf = [0u8, .. 4]; + let len = c.encode_utf8(buf).unwrap_or(0); + self.buf.extend(buf.slice_to(len).iter().map(|b| *b)); } _ => return self.error(InvalidEscape), } escape = false; } else { match ch { - '"' => { + b'"' => { self.bump(); - return Ok(self.buf.as_slice()); + return Ok(str::from_utf8(self.buf.as_slice()).unwrap()); } - '\\' => { + b'\\' => { escape = true; } ch => { @@ -1969,10 +1986,11 @@ impl> Parser { } } + #[inline] fn parse_list_start(&mut self) -> Result { self.parse_whitespace(); - if self.ch_is(']') { + if self.ch_is(b']') { self.bump(); Ok(de::End) } else { @@ -1981,14 +1999,15 @@ impl> Parser { } } + #[inline] fn parse_list_comma_or_end(&mut self) -> Result { self.parse_whitespace(); - if self.ch_is(',') { + if self.ch_is(b',') { self.bump(); self.state_stack.push(ParseListCommaOrEnd); self.parse_value() - } else if self.ch_is(']') { + } else if self.ch_is(b']') { self.bump(); Ok(de::End) } else if self.eof() { @@ -1998,10 +2017,11 @@ impl> Parser { } } + #[inline] fn parse_object_start(&mut self) -> Result, ParserError> { self.parse_whitespace(); - if self.ch_is('}') { + if self.ch_is(b'}') { self.bump(); Ok(None) } else { @@ -2009,13 +2029,14 @@ impl> Parser { } } + #[inline] fn parse_object_comma_or_end(&mut self) -> Result, ParserError> { self.parse_whitespace(); - if self.ch_is(',') { + if self.ch_is(b',') { self.bump(); Ok(Some(try!(self.parse_object_key()))) - } else if self.ch_is('}') { + } else if self.ch_is(b'}') { self.bump(); Ok(None) } else if self.eof() { @@ -2025,6 +2046,7 @@ impl> Parser { } } + #[inline] fn parse_object_key(&mut self) -> Result<&str, ParserError> { self.parse_whitespace(); @@ -2033,7 +2055,7 @@ impl> Parser { } match self.ch_or_null() { - '"' => { + b'"' => { self.state_stack.push(ParseObjectValue); Ok(try!(self.parse_string())) @@ -2042,10 +2064,11 @@ impl> Parser { } } + #[inline] fn parse_object_value(&mut self) -> Result { self.parse_whitespace(); - if self.ch_is(':') { + if self.ch_is(b':') { self.bump(); self.state_stack.push(ParseObjectCommaOrEnd); self.parse_value() @@ -2056,6 +2079,7 @@ impl> Parser { } } + #[inline] fn parse_value(&mut self) -> Result { self.parse_whitespace(); @@ -2064,19 +2088,19 @@ impl> Parser { } match self.ch_or_null() { - 'n' => self.parse_ident("ull", de::Null), - 't' => self.parse_ident("rue", de::Bool(true)), - 'f' => self.parse_ident("alse", de::Bool(false)), - '0' ... '9' | '-' => self.parse_number(), - '"' => { + b'n' => self.parse_ident(b"ull", de::Null), + b't' => self.parse_ident(b"rue", de::Bool(true)), + b'f' => self.parse_ident(b"alse", de::Bool(false)), + b'0' ... b'9' | b'-' => self.parse_number(), + b'"' => { Ok(de::String(try!(self.parse_string()).to_string())) } - '[' => { + b'[' => { self.bump(); self.state_stack.push(ParseListStart); Ok(de::SeqStart(0)) } - '{' => { + b'{' => { self.bump(); self.state_stack.push(ParseObjectStart); Ok(de::MapStart(0)) @@ -2087,8 +2111,9 @@ impl> Parser { } } - fn parse_ident(&mut self, ident: &str, token: de::Token) -> Result { - if ident.chars().all(|c| Some(c) == self.next_char()) { + #[inline] + fn parse_ident(&mut self, ident: &[u8], token: de::Token) -> Result { + if ident.iter().all(|c| Some(*c) == self.next_char()) { self.bump(); Ok(token) } else { @@ -2096,13 +2121,14 @@ impl> Parser { } } + #[inline] fn error_event(&mut self, reason: ErrorCode) -> Result { self.state_stack.clear(); Err(SyntaxError(reason, self.line, self.col)) } } -impl> de::Deserializer for Parser { +impl> de::Deserializer for Parser { fn end_of_stream_error(&mut self) -> ParserError { SyntaxError(EOFWhileParsingValue, self.line, self.col) } @@ -2215,9 +2241,9 @@ impl> de::Deserializer for Parser { } } -/// Decodes a json value from an `Iterator`. +/// Decodes a json value from an `Iterator`. pub fn from_iter< - Iter: Iterator, + Iter: Iterator, T: de::Deserializable, ParserError> >(iter: Iter) -> Result { let mut parser = Parser::new(iter); @@ -2234,9 +2260,9 @@ pub fn from_iter< /// Decodes a json value from a string pub fn from_str< 'a, - T: de::Deserializable>, ParserError> + T: de::Deserializable>, ParserError> >(s: &'a str) -> Result { - from_iter(s.chars()) + from_iter(s.bytes()) } /// Decodes a json value from a `Json`. @@ -2416,7 +2442,7 @@ mod tests { use std::collections::TreeMap; use super::{Json, Null, Boolean, Floating, String, List, Object}; - use super::{Parser, ParserError, from_iter, from_str}; + use super::{Parser, ParserError, from_str}; use super::{JsonDeserializer, ToJson, from_json}; use super::{ EOFWhileParsingList, @@ -2865,23 +2891,23 @@ mod tests { // FIXME (#5527): these could be merged once UFCS is finished. fn test_parse_err< 'a, - T: Show + de::Deserializable>, ParserError> + T: Show + de::Deserializable>, ParserError> >(errors: &[(&'a str, ParserError)]) { for &(s, ref err) in errors.iter() { - let v: Result = from_iter(s.chars()); + let v: Result = from_str(s); assert_eq!(v.unwrap_err(), *err); } } fn test_parse_ok< 'a, - T: PartialEq + Show + ToJson + de::Deserializable>, ParserError> + T: PartialEq + Show + ToJson + de::Deserializable>, ParserError> >(errors: &[(&'a str, T)]) { for &(s, ref value) in errors.iter() { - let v: T = from_iter(s.chars()).unwrap(); + let v: T = from_str(s).unwrap(); assert_eq!(v, *value); - let v: Json = from_iter(s.chars()).unwrap(); + let v: Json = from_str(s).unwrap(); assert_eq!(v, value.to_json()); } } @@ -3061,7 +3087,7 @@ mod tests { ("[[3], [1, 2]]", vec!(vec!(3i), vec!(1, 2))), ]); - let v: () = from_iter("[]".chars()).unwrap(); + let v: () = from_str("[]").unwrap(); assert_eq!(v, ()); test_parse_ok([ @@ -3973,7 +3999,7 @@ mod bench { let src = json_str(count); b.iter( || { - let mut parser = Parser::new(src.as_slice().chars()); + let mut parser = Parser::new(src.as_slice().bytes()); assert_eq!(parser.next(), Some(Ok(de::SeqStart(0)))); for _ in range(0, count) {