diff --git a/serde2/benches/bench_log.rs b/serde2/benches/bench_log.rs index 67649d18..3a905073 100644 --- a/serde2/benches/bench_log.rs +++ b/serde2/benches/bench_log.rs @@ -16,6 +16,8 @@ use serde2::json::ser::escape_str; use serde2::json; use serde2::ser::{Serialize, Serializer}; use serde2::ser; +use serde2::de::{Deserialize, Deserializer}; +use serde2::de; use serialize::Encodable; @@ -34,6 +36,69 @@ struct Http { request_uri: String, } +impl< + S: Deserializer, + E, +> Deserialize for Http { + fn deserialize(state: &mut S) -> Result { + struct Visitor; + + impl< + S: Deserializer, + E, + > de::Visitor for Visitor { + fn visit_map< + Visitor: de::MapVisitor, + >(&mut self, state: &mut S, mut visitor: Visitor) -> Result { + let mut protocol = None; + let mut status = None; + let mut host_status = None; + let mut up_status = None; + let mut method = None; + let mut content_type = None; + let mut user_agent = None; + let mut referer = None; + let mut request_uri = None; + + loop { + match try!(visitor.visit_key(state)) { + Some(s) => { + let s: String = s; + match s.as_slice() { + "protocol" => { protocol = Some(try!(visitor.visit_value(state))); } + "status" => { status = Some(try!(visitor.visit_value(state))); } + "host_status" => { host_status = Some(try!(visitor.visit_value(state))); } + "up_status" => { up_status = Some(try!(visitor.visit_value(state))); } + "method" => { method = Some(try!(visitor.visit_value(state))); } + "content_type" => { content_type = Some(try!(visitor.visit_value(state))); } + "user_agent" => { user_agent = Some(try!(visitor.visit_value(state))); } + "referer" => { referer = Some(try!(visitor.visit_value(state))); } + "request_uri" => { request_uri = Some(try!(visitor.visit_value(state))); } + _ => panic!(), + } + } + None => { break; } + } + } + + Ok(Http { + protocol: protocol.unwrap(), + status: status.unwrap(), + host_status: host_status.unwrap(), + up_status: up_status.unwrap(), + method: method.unwrap(), + content_type: content_type.unwrap(), + user_agent: user_agent.unwrap(), + referer: referer.unwrap(), + request_uri: request_uri.unwrap(), + }) + } + } + + state.visit(&mut Visitor) + } +} + #[deriving(Show, PartialEq, FromPrimitive)] enum HttpProtocol { HTTP_PROTOCOL_UNKNOWN, @@ -68,14 +133,12 @@ impl ser::Serialize for HttpProtocol { } } -/* -impl, E> de::Deserialize for HttpProtocol { +impl, E> de::Deserialize for HttpProtocol { #[inline] - fn deserialize_token(d: &mut D, token: de::Token) -> Result { - d.expect_from_primitive(token) + fn deserialize(state: &mut S) -> Result { + de::deserialize_from_primitive(state) } } -*/ #[deriving(Show, PartialEq, FromPrimitive)] enum HttpMethod { @@ -119,14 +182,12 @@ impl ser::Serialize for HttpMethod { } } -/* -impl, E> de::Deserialize for HttpMethod { +impl, E> de::Deserialize for HttpMethod { #[inline] - fn deserialize_token(d: &mut D, token: de::Token) -> Result { - d.expect_from_primitive(token) + fn deserialize(state: &mut S) -> Result { + de::deserialize_from_primitive(state) } } -*/ #[deriving(Show, PartialEq, FromPrimitive)] enum CacheStatus { @@ -163,14 +224,12 @@ impl ser::Serialize for CacheStatus { } } -/* -impl, E> de::Deserialize for CacheStatus { +impl, E> de::Deserialize for CacheStatus { #[inline] - fn deserialize_token(d: &mut D, token: de::Token) -> Result { - d.expect_from_primitive(token) + fn deserialize(state: &mut S) -> Result { + de::deserialize_from_primitive(state) } } -*/ #[deriving(Show, PartialEq, Encodable, Decodable)] #[deriving_serialize] @@ -182,6 +241,54 @@ struct Origin { protocol: OriginProtocol, } +impl< + S: Deserializer, + E, +> Deserialize for Origin { + fn deserialize(state: &mut S) -> Result { + struct Visitor; + + impl< + S: Deserializer, + E, + > de::Visitor for Visitor { + fn visit_map< + Visitor: de::MapVisitor, + >(&mut self, state: &mut S, mut visitor: Visitor) -> Result { + let mut ip = None; + let mut port = None; + let mut hostname = None; + let mut protocol = None; + + loop { + match try!(visitor.visit_key(state)) { + Some(s) => { + let s: String = s; + match s.as_slice() { + "ip" => { ip = Some(try!(visitor.visit_value(state))); } + "port" => { port = Some(try!(visitor.visit_value(state))); } + "hostname" => { hostname = Some(try!(visitor.visit_value(state))); } + "protocol" => { protocol = Some(try!(visitor.visit_value(state))); } + _ => panic!(), + } + } + None => { break; } + } + } + + Ok(Origin { + ip: ip.unwrap(), + port: port.unwrap(), + hostname: hostname.unwrap(), + protocol: protocol.unwrap(), + }) + } + } + + state.visit(&mut Visitor) + } +} + #[deriving(Show, PartialEq, FromPrimitive)] enum OriginProtocol { ORIGIN_PROTOCOL_UNKNOWN, @@ -216,14 +323,12 @@ impl ser::Serialize for OriginProtocol { } } -/* -impl, E> de::Deserialize for OriginProtocol { +impl, E> de::Deserialize for OriginProtocol { #[inline] - fn deserialize_token(d: &mut D, token: de::Token) -> Result { - d.expect_from_primitive(token) + fn deserialize(state: &mut S) -> Result { + de::deserialize_from_primitive(state) } } -*/ #[deriving(Show, PartialEq, FromPrimitive)] enum ZonePlan { @@ -261,14 +366,12 @@ impl ser::Serialize for ZonePlan { } } -/* -impl, E> de::Deserialize for ZonePlan { +impl, E> de::Deserialize for ZonePlan { #[inline] - fn deserialize_token(d: &mut D, token: de::Token) -> Result { - d.expect_from_primitive(token) + fn deserialize(state: &mut S) -> Result { + de::deserialize_from_primitive(state) } } -*/ #[deriving(Show, PartialEq, FromPrimitive)] enum Country { @@ -557,14 +660,12 @@ impl ser::Serialize for Country { } } -/* -impl, E> de::Deserialize for Country { +impl, E> de::Deserialize for Country { #[inline] - fn deserialize_token(d: &mut D, token: de::Token) -> Result { - d.expect_from_primitive(token) + fn deserialize(state: &mut S) -> Result { + de::deserialize_from_primitive(state) } } -*/ #[deriving(Show, PartialEq, Encodable, Decodable)] #[deriving_serialize] @@ -584,6 +685,78 @@ struct Log { ray_id: String, } +impl< + S: Deserializer, + E, +> Deserialize for Log { + fn deserialize(state: &mut S) -> Result { + struct Visitor; + + impl< + S: Deserializer, + E, + > de::Visitor for Visitor { + fn visit_map< + Visitor: de::MapVisitor, + >(&mut self, state: &mut S, mut visitor: Visitor) -> Result { + let mut timestamp = None; + let mut zone_id = None; + let mut zone_plan = None; + let mut http = None; + let mut origin = None; + let mut country = None; + let mut cache_status = None; + let mut server_ip = None; + let mut server_name = None; + let mut remote_ip = None; + let mut bytes_dlv = None; + let mut ray_id = None; + + loop { + match try!(visitor.visit_key(state)) { + Some(s) => { + let s: String = s; + match s.as_slice() { + "timestamp" => { timestamp = Some(try!(visitor.visit_value(state))); } + "zone_id" => { zone_id = Some(try!(visitor.visit_value(state))); } + "zone_plan" => { zone_plan = Some(try!(visitor.visit_value(state))); } + "http" => { http = Some(try!(visitor.visit_value(state))); } + "origin" => { origin = Some(try!(visitor.visit_value(state))); } + "country" => { country = Some(try!(visitor.visit_value(state))); } + "cache_status" => { cache_status = Some(try!(visitor.visit_value(state))); } + "server_ip" => { server_ip = Some(try!(visitor.visit_value(state))); } + "server_name" => { server_name = Some(try!(visitor.visit_value(state))); } + "remote_ip" => { remote_ip = Some(try!(visitor.visit_value(state))); } + "bytes_dlv" => { bytes_dlv = Some(try!(visitor.visit_value(state))); } + "ray_id" => { ray_id = Some(try!(visitor.visit_value(state))); } + _ => panic!(), + } + } + None => { break; } + } + } + + Ok(Log { + timestamp: timestamp.unwrap(), + zone_id: zone_id.unwrap(), + zone_plan: zone_plan.unwrap(), + http: http.unwrap(), + origin: origin.unwrap(), + country: country.unwrap(), + cache_status: cache_status.unwrap(), + server_ip: server_ip.unwrap(), + server_name: server_name.unwrap(), + remote_ip: remote_ip.unwrap(), + bytes_dlv: bytes_dlv.unwrap(), + ray_id: ray_id.unwrap(), + }) + } + } + + state.visit(&mut Visitor) + } +} + impl Log { fn new() -> Log { Log { @@ -769,7 +942,7 @@ fn test_serializer_vec() { let mut serializer = json::Writer::new(wr); serializer.visit(&log).unwrap(); - let json = serializer.unwrap(); + let json = serializer.into_inner(); assert_eq!(json.as_slice(), JSON_STR.as_bytes()); } @@ -786,7 +959,7 @@ fn bench_serializer_vec(b: &mut Bencher) { let mut serializer = json::Writer::new(wr.by_ref()); serializer.visit(&log).unwrap(); - let _json = serializer.unwrap(); + let _json = serializer.into_inner(); }); } @@ -1117,3 +1290,12 @@ fn bench_manual_my_mem_writer1_escape(b: &mut Bencher) { manual_escape(&mut wr, &log); }); } + +#[bench] +fn bench_deserializer(b: &mut Bencher) { + b.bytes = JSON_STR.len() as u64; + + b.iter(|| { + let _log: Log = json::from_str(JSON_STR).unwrap(); + }); +} diff --git a/serde2/src/bin.rs b/serde2/src/bin.rs index 1feadae4..ee809ef1 100644 --- a/serde2/src/bin.rs +++ b/serde2/src/bin.rs @@ -8,7 +8,7 @@ use serde2::de; use serde2::de::{Deserialize, Deserializer}; #[deriving(Show)] -enum Token { +pub enum Token { Null, Int(int), String(string::String), @@ -32,7 +32,7 @@ struct MyDeserializer { } impl> MyDeserializer { - fn new(tokens: Iter) -> MyDeserializer { + pub fn new(tokens: Iter) -> MyDeserializer { MyDeserializer { tokens: tokens, peeked: None, @@ -193,10 +193,9 @@ struct MyMapVisitor { impl< Iter: Iterator, > de::MapVisitor, Error> for MyMapVisitor { - fn visit< + fn visit_key< K: Deserialize, Error>, - V: Deserialize, Error>, - >(&mut self, d: &mut MyDeserializer) -> Result, Error> { + >(&mut self, d: &mut MyDeserializer) -> Result, Error> { match d.peek() { Some(&Token::End) => { d.next(); @@ -205,10 +204,7 @@ impl< Some(_) => { self.len -= 1; - let key = try!(Deserialize::deserialize(d)); - let value = try!(Deserialize::deserialize(d)); - - Ok(Some((key, value))) + Ok(Some(try!(Deserialize::deserialize(d)))) } None => { Err(d.syntax_error()) @@ -216,6 +212,12 @@ impl< } } + fn visit_value< + V: Deserialize, Error>, + >(&mut self, d: &mut MyDeserializer) -> Result { + Ok(try!(Deserialize::deserialize(d))) + } + fn end(&mut self, d: &mut MyDeserializer) -> Result<(), Error> { match d.next() { Some(Token::End) => Ok(()), @@ -326,7 +328,7 @@ mod json { /////////////////////////////////////////////////////////////////////////////// -fn main() { +pub fn main() { let tokens = vec!( Token::SeqStart(2), Token::Int(1), diff --git a/serde2/src/de.rs b/serde2/src/de.rs index 9ec177e5..d79e17b1 100644 --- a/serde2/src/de.rs +++ b/serde2/src/de.rs @@ -1,6 +1,5 @@ use std::collections::{HashMap, TreeMap}; use std::hash::Hash; -use std::num; /////////////////////////////////////////////////////////////////////////////// @@ -39,6 +38,18 @@ pub trait Visitor, R, E> { self.visit_i64(state, v as i64) } + fn visit_i8(&mut self, state: &mut S, v: i8) -> Result { + self.visit_i64(state, v as i64) + } + + fn visit_i16(&mut self, state: &mut S, v: i16) -> Result { + self.visit_i64(state, v as i64) + } + + fn visit_i32(&mut self, state: &mut S, v: i32) -> Result { + self.visit_i64(state, v as i64) + } + fn visit_i64(&mut self, state: &mut S, _v: i64) -> Result { Err(state.syntax_error()) } @@ -47,6 +58,18 @@ pub trait Visitor, R, E> { self.visit_u64(state, v as u64) } + fn visit_u8(&mut self, state: &mut S, v: u8) -> Result { + self.visit_u64(state, v as u64) + } + + fn visit_u16(&mut self, state: &mut S, v: u16) -> Result { + self.visit_u64(state, v as u64) + } + + fn visit_u32(&mut self, state: &mut S, v: u32) -> Result { + self.visit_u64(state, v as u64) + } + fn visit_u64(&mut self, state: &mut S, _v: u64) -> Result { Err(state.syntax_error()) } @@ -109,7 +132,23 @@ pub trait MapVisitor { fn visit< K: Deserialize, V: Deserialize, - >(&mut self, state: &mut S) -> Result, E>; + >(&mut self, state: &mut S) -> Result, E> { + match try!(self.visit_key(state)) { + Some(key) => { + let value = try!(self.visit_value(state)); + Ok(Some((key, value))) + } + None => Ok(None) + } + } + + fn visit_key< + K: Deserialize, + >(&mut self, state: &mut S) -> Result, E>; + + fn visit_value< + V: Deserialize, + >(&mut self, state: &mut S) -> Result; fn end(&mut self, state: &mut S) -> Result<(), E>; @@ -167,9 +206,9 @@ impl< /////////////////////////////////////////////////////////////////////////////// macro_rules! impl_deserialize_num_method { - ($dst_ty:ty, $src_ty:ty, $method:ident) => { - fn $method(&mut self, state: &mut S, v: $src_ty) -> Result<$dst_ty, E> { - match num::cast(v) { + ($src_ty:ty, $method:ident, $from_method:ident) => { + fn $method(&mut self, state: &mut S, v: $src_ty) -> Result { + match FromPrimitive::$from_method(v) { Some(v) => Ok(v), None => Err(state.syntax_error()), } @@ -177,31 +216,56 @@ macro_rules! impl_deserialize_num_method { } } +#[inline] +pub fn deserialize_from_primitive< + S: Deserializer, + E, + T: Deserialize + FromPrimitive +>(state: &mut S) -> Result { + struct Visitor; + + impl< + S: Deserializer, + E, + T: Deserialize + FromPrimitive + > self::Visitor for Visitor { + impl_deserialize_num_method!(int, visit_int, from_int) + impl_deserialize_num_method!(i8, visit_i8, from_i8) + impl_deserialize_num_method!(i16, visit_i16, from_i16) + impl_deserialize_num_method!(i32, visit_i32, from_i32) + impl_deserialize_num_method!(i64, visit_i64, from_i64) + impl_deserialize_num_method!(uint, visit_uint, from_uint) + impl_deserialize_num_method!(u8, visit_u8, from_u8) + impl_deserialize_num_method!(u16, visit_u16, from_u16) + impl_deserialize_num_method!(u32, visit_u32, from_u32) + impl_deserialize_num_method!(u64, visit_u64, from_u64) + impl_deserialize_num_method!(f32, visit_f32, from_f32) + impl_deserialize_num_method!(f64, visit_f64, from_f64) + } + + state.visit(&mut Visitor) +} + macro_rules! impl_deserialize_num { ($ty:ty) => { impl, E> Deserialize for $ty { #[inline] fn deserialize(state: &mut S) -> Result<$ty, E> { - struct Visitor; - - impl, E> self::Visitor for Visitor { - impl_deserialize_num_method!($ty, int, visit_int) - impl_deserialize_num_method!($ty, i64, visit_i64) - impl_deserialize_num_method!($ty, uint, visit_uint) - impl_deserialize_num_method!($ty, u64, visit_u64) - impl_deserialize_num_method!($ty, f32, visit_f32) - impl_deserialize_num_method!($ty, f64, visit_f64) - } - - state.visit(&mut Visitor) + deserialize_from_primitive(state) } } } } impl_deserialize_num!(int) +impl_deserialize_num!(i8) +impl_deserialize_num!(i16) +impl_deserialize_num!(i32) impl_deserialize_num!(i64) impl_deserialize_num!(uint) +impl_deserialize_num!(u8) +impl_deserialize_num!(u16) +impl_deserialize_num!(u32) impl_deserialize_num!(u64) impl_deserialize_num!(f32) impl_deserialize_num!(f64) diff --git a/serde2/src/json/de.rs b/serde2/src/json/de.rs index 69a01a8f..56841b05 100644 --- a/serde2/src/json/de.rs +++ b/serde2/src/json/de.rs @@ -1,104 +1,36 @@ use std::char; -use std::fmt; use std::num::Float; use std::str::ScalarValue; use std::str; use de; use de::Deserializer; - -#[deriving(Clone, PartialEq, Eq)] -pub enum ErrorCode { - EOFWhileParsingList, - EOFWhileParsingObject, - EOFWhileParsingString, - EOFWhileParsingValue, - ExpectedColon, - InvalidEscape, - InvalidNumber, - InvalidSyntax(SyntaxExpectation), - InvalidUnicodeCodePoint, - KeyMustBeAString, - LoneLeadingSurrogateInHexEscape, - MissingField(&'static str), - NotFourDigit, - NotUtf8, - TrailingCharacters, - UnexpectedEndOfHexEscape, - UnknownVariant, - UnrecognizedHex, -} - -/// The failed expectation of InvalidSyntax -#[deriving(Clone, PartialEq, Eq, Show)] -pub enum SyntaxExpectation { - ListCommaOrEnd, - ObjectCommaOrEnd, - SomeValue, - SomeIdent, - EnumMapStart, - EnumVariantString, - EnumToken, - EnumEndToken, - EnumEnd, -} - -impl fmt::Show for ErrorCode { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - ErrorCode::EOFWhileParsingList => "EOF While parsing list".fmt(f), - ErrorCode::EOFWhileParsingObject => "EOF While parsing object".fmt(f), - ErrorCode::EOFWhileParsingString => "EOF While parsing string".fmt(f), - ErrorCode::EOFWhileParsingValue => "EOF While parsing value".fmt(f), - ErrorCode::ExpectedColon => "expected `:`".fmt(f), - ErrorCode::InvalidEscape => "invalid escape".fmt(f), - ErrorCode::InvalidNumber => "invalid number".fmt(f), - ErrorCode::InvalidSyntax(expect) => { - write!(f, "invalid syntax, expected: {}", expect) - } - ErrorCode::InvalidUnicodeCodePoint => "invalid unicode code point".fmt(f), - ErrorCode::KeyMustBeAString => "key must be a string".fmt(f), - ErrorCode::LoneLeadingSurrogateInHexEscape => "lone leading surrogate in hex escape".fmt(f), - ErrorCode::MissingField(field) => { - write!(f, "missing field \"{}\"", field) - } - ErrorCode::NotFourDigit => "invalid \\u escape (not four digits)".fmt(f), - ErrorCode::NotUtf8 => "contents not utf-8".fmt(f), - ErrorCode::TrailingCharacters => "trailing characters".fmt(f), - ErrorCode::UnexpectedEndOfHexEscape => "unexpected end of hex escape".fmt(f), - ErrorCode::UnknownVariant => "unknown variant".fmt(f), - ErrorCode::UnrecognizedHex => "invalid \\u escape (unrecognized hex)".fmt(f), - } - } -} - -#[deriving(PartialEq, Eq, Show)] -pub enum Error { - SyntaxError(ErrorCode, uint, uint), -} +use super::error::{Error, ErrorCode}; pub struct Parser { rdr: Iter, - ch: Option, + ch: Option, line: uint, col: uint, + buf: Vec, } -impl< - Iter: Iterator, -> Parser { +impl> Parser { /// Creates the JSON parser. + #[inline] pub fn new(rdr: Iter) -> Parser { let mut p = Parser { rdr: rdr, - ch: Some('\x00'), + ch: Some(b'\x00'), line: 1, col: 0, + buf: Vec::with_capacity(128), }; p.bump(); return p; } + #[inline] pub fn end(&mut self) -> Result<(), Error> { if self.eof() { Ok(()) @@ -109,12 +41,14 @@ impl< fn eof(&self) -> bool { self.ch.is_none() } - fn ch_or_null(&self) -> char { self.ch.unwrap_or('\x00') } + #[inline] + fn ch_or_null(&self) -> u8 { self.ch.unwrap_or(b'\x00') } + #[inline] fn bump(&mut self) { self.ch = self.rdr.next(); - if self.ch_is('\n') { + if self.ch_is(b'\n') { self.line += 1; self.col = 1; } else { @@ -122,27 +56,31 @@ impl< } } - fn next_char(&mut self) -> Option { + #[inline] + fn next_char(&mut self) -> Option { self.bump(); self.ch } - fn ch_is(&self, c: char) -> bool { + #[inline] + fn ch_is(&self, c: u8) -> bool { self.ch == Some(c) } - fn parse_whitespace(&mut self) { - while self.ch_is(' ') || - self.ch_is('\n') || - self.ch_is('\t') || - self.ch_is('\r') { self.bump(); } - } - + #[inline] fn error(&mut self, reason: ErrorCode) -> Error { - //self.state_stack.clear(); Error::SyntaxError(reason, self.line, self.col) } + #[inline] + fn parse_whitespace(&mut self) { + while self.ch_is(b' ') || + self.ch_is(b'\n') || + self.ch_is(b'\t') || + self.ch_is(b'\r') { self.bump(); } + } + + #[inline] fn parse_value< R, V: de::Visitor, R, Error>, @@ -154,68 +92,71 @@ impl< } match self.ch_or_null() { - 'n' => { - try!(self.parse_ident("ull")); + b'n' => { + try!(self.parse_ident(b"ull")); visitor.visit_null(self) } - 't' => { - try!(self.parse_ident("rue")); + b't' => { + try!(self.parse_ident(b"rue")); visitor.visit_bool(self, true) } - 'f' => { - try!(self.parse_ident("alse")); + b'f' => { + try!(self.parse_ident(b"alse")); visitor.visit_bool(self, false) } - '0' ... '9' | '-' => self.parse_number(visitor), - '"' => { - let s = try!(self.parse_string()); + b'0' ... b'9' | b'-' => self.parse_number(visitor), + b'"' => { + try!(self.parse_string()); + let s = String::from_utf8(self.buf.clone()).unwrap(); visitor.visit_string(self, s) } - '[' => { + b'[' => { self.bump(); visitor.visit_seq(self, SeqVisitor { first: true }) } - '{' => { + b'{' => { self.bump(); visitor.visit_map(self, MapVisitor { first: true }) } _ => { - Err(self.error(ErrorCode::InvalidSyntax(SyntaxExpectation::SomeValue))) + Err(self.error(ErrorCode::ExpectedSomeValue)) } } } - fn parse_ident(&mut self, ident: &str) -> Result<(), Error> { - if ident.chars().all(|c| Some(c) == self.next_char()) { + #[inline] + fn parse_ident(&mut self, ident: &[u8]) -> Result<(), Error> { + if ident.iter().all(|c| Some(*c) == self.next_char()) { self.bump(); Ok(()) } else { - Err(self.error(ErrorCode::InvalidSyntax(SyntaxExpectation::SomeIdent))) + Err(self.error(ErrorCode::ExpectedSomeIdent)) } } + #[inline] fn parse_number< R, V: de::Visitor, R, Error>, >(&mut self, visitor: &mut V) -> Result { let mut neg = 1; - if self.ch_is('-') { + if self.ch_is(b'-') { self.bump(); neg = -1; } let res = try!(self.parse_integer()); - if self.ch_is('.') || self.ch_is('e') || self.ch_is('E') { + if self.ch_is(b'.') || self.ch_is(b'e') || self.ch_is(b'E') { let neg = neg as f64; let mut res = res as f64; - if self.ch_is('.') { + if self.ch_is(b'.') { res = try!(self.parse_decimal(res)); } - if self.ch_is('e') || self.ch_is('E') { + if self.ch_is(b'e') || self.ch_is(b'E') { res = try!(self.parse_exponent(res)); } @@ -225,59 +166,57 @@ impl< } } + #[inline] fn parse_integer(&mut self) -> Result { let mut res = 0; match self.ch_or_null() { - '0' => { + b'0' => { self.bump(); // There can be only one leading '0'. match self.ch_or_null() { - '0' ... '9' => { + b'0' ... b'9' => { return Err(self.error(ErrorCode::InvalidNumber)); } _ => () } }, - '1' ... '9' => { + b'1' ... b'9' => { while !self.eof() { match self.ch_or_null() { - c @ '0' ... '9' => { + c @ b'0' ... b'9' => { res *= 10; - res += (c as i64) - ('0' as i64); + res += (c as i64) - (b'0' as i64); self.bump(); } _ => break, } } } - _ => { - return Err(self.error(ErrorCode::InvalidNumber)); - } + _ => { return Err(self.error(ErrorCode::InvalidNumber)); } } Ok(res) } + #[inline] fn parse_decimal(&mut self, res: f64) -> Result { self.bump(); // Make sure a digit follows the decimal place. match self.ch_or_null() { - '0' ... '9' => (), - _ => { - return Err(self.error(ErrorCode::InvalidNumber)); - } + b'0' ... b'9' => (), + _ => { return Err(self.error(ErrorCode::InvalidNumber)); } } let mut res = res; let mut dec = 1.0; while !self.eof() { match self.ch_or_null() { - c @ '0' ... '9' => { + c @ b'0' ... b'9' => { dec /= 10.0; - res += (((c as int) - ('0' as int)) as f64) * dec; + res += (((c as int) - (b'0' as int)) as f64) * dec; self.bump(); } _ => break, @@ -287,31 +226,30 @@ impl< Ok(res) } + #[inline] fn parse_exponent(&mut self, mut res: f64) -> Result { self.bump(); let mut exp = 0u; let mut neg_exp = false; - if self.ch_is('+') { + if self.ch_is(b'+') { self.bump(); - } else if self.ch_is('-') { + } else if self.ch_is(b'-') { self.bump(); neg_exp = true; } // Make sure a digit follows the exponent place. match self.ch_or_null() { - '0' ... '9' => (), - _ => { - return Err(self.error(ErrorCode::InvalidNumber)); - } + b'0' ... b'9' => (), + _ => { return Err(self.error(ErrorCode::InvalidNumber)); } } while !self.eof() { match self.ch_or_null() { - c @ '0' ... '9' => { + c @ b'0' ... b'9' => { exp *= 10; - exp += (c as uint) - ('0' as uint); + exp += (c as uint) - (b'0' as uint); self.bump(); } @@ -329,22 +267,21 @@ impl< Ok(res) } + #[inline] fn decode_hex_escape(&mut self) -> Result { let mut i = 0u; let mut n = 0u16; while i < 4u && !self.eof() { self.bump(); n = match self.ch_or_null() { - c @ '0' ... '9' => n * 16_u16 + ((c as u16) - ('0' as u16)), - 'a' | 'A' => n * 16_u16 + 10_u16, - 'b' | 'B' => n * 16_u16 + 11_u16, - 'c' | 'C' => n * 16_u16 + 12_u16, - 'd' | 'D' => n * 16_u16 + 13_u16, - 'e' | 'E' => n * 16_u16 + 14_u16, - 'f' | 'F' => n * 16_u16 + 15_u16, - _ => { - return Err(self.error(ErrorCode::InvalidEscape)); - } + c @ b'0' ... b'9' => n * 16_u16 + ((c as u16) - (b'0' as u16)), + b'a' | b'A' => n * 16_u16 + 10_u16, + b'b' | b'B' => n * 16_u16 + 11_u16, + b'c' | b'C' => n * 16_u16 + 12_u16, + b'd' | b'D' => n * 16_u16 + 13_u16, + b'e' | b'E' => n * 16_u16 + 14_u16, + b'f' | b'F' => n * 16_u16 + 15_u16, + _ => { return Err(self.error(ErrorCode::InvalidEscape)); } }; i += 1u; @@ -358,81 +295,91 @@ impl< Ok(n) } - fn parse_string(&mut self) -> Result { + #[inline] + fn parse_string(&mut self) -> Result<(), Error> { + self.buf.clear(); + let mut escape = false; - let mut res = String::new(); loop { - self.bump(); - if self.eof() { - return Err(self.error(ErrorCode::EOFWhileParsingString)); - } + let ch = match self.next_char() { + Some(ch) => ch, + None => { return Err(self.error(ErrorCode::EOFWhileParsingString)); } + }; if escape { - match self.ch_or_null() { - '"' => res.push('"'), - '\\' => res.push('\\'), - '/' => res.push('/'), - 'b' => res.push('\x08'), - 'f' => res.push('\x0c'), - 'n' => res.push('\n'), - 'r' => res.push('\r'), - 't' => res.push('\t'), - 'u' => match try!(self.decode_hex_escape()) { - 0xDC00 ... 0xDFFF => { - return Err(self.error(ErrorCode::LoneLeadingSurrogateInHexEscape)); - } + match ch { + b'"' => self.buf.push(b'"'), + b'\\' => self.buf.push(b'\\'), + b'/' => self.buf.push(b'/'), + b'b' => self.buf.push(b'\x08'), + b'f' => self.buf.push(b'\x0c'), + b'n' => self.buf.push(b'\n'), + b'r' => self.buf.push(b'\r'), + b't' => self.buf.push(b'\t'), + b'u' => { + let c = match try!(self.decode_hex_escape()) { + 0xDC00 ... 0xDFFF => { + return Err(self.error(ErrorCode::LoneLeadingSurrogateInHexEscape)); + } - // Non-BMP characters are encoded as a sequence of - // two hex escapes, representing UTF-16 surrogates. - n1 @ 0xD800 ... 0xDBFF => { - let c1 = self.next_char(); - let c2 = self.next_char(); - match (c1, c2) { - (Some('\\'), Some('u')) => (), - _ => { - return Err(self.error(ErrorCode::UnexpectedEndOfHexEscape)); + // Non-BMP characters are encoded as a sequence of + // two hex escapes, representing UTF-16 surrogates. + n1 @ 0xD800 ... 0xDBFF => { + let c1 = self.next_char(); + let c2 = self.next_char(); + match (c1, c2) { + (Some(b'\\'), Some(b'u')) => (), + _ => { + return Err(self.error(ErrorCode::UnexpectedEndOfHexEscape)); + } + } + + let buf = &[n1, try!(self.decode_hex_escape())]; + match str::utf16_items(buf.as_slice()).next() { + Some(ScalarValue(c)) => c, + _ => { + return Err(self.error(ErrorCode::LoneLeadingSurrogateInHexEscape)); + } } } - let buf = [n1, try!(self.decode_hex_escape())]; - match str::utf16_items(buf.as_slice()).next() { - Some(ScalarValue(c)) => res.push(c), - _ => { - return Err(self.error(ErrorCode::LoneLeadingSurrogateInHexEscape)); + n => match char::from_u32(n as u32) { + Some(c) => c, + None => { + return Err(self.error(ErrorCode::InvalidUnicodeCodePoint)); } } - } + }; - n => match char::from_u32(n as u32) { - Some(c) => res.push(c), - None => { - return Err(self.error(ErrorCode::InvalidUnicodeCodePoint)); - } - }, - }, + let buf = &mut [0u8, .. 4]; + let len = c.encode_utf8(buf).unwrap_or(0); + self.buf.extend(buf.slice_to(len).iter().map(|b| *b)); + } _ => { return Err(self.error(ErrorCode::InvalidEscape)); } } escape = false; - } else if self.ch_is('\\') { - escape = true; } else { - match self.ch { - Some('"') => { + match ch { + b'"' => { self.bump(); - return Ok(res); - }, - Some(c) => res.push(c), - None => unreachable!() + return Ok(()); + } + b'\\' => { + escape = true; + } + ch => { + self.buf.push(ch); + } } } } } } -impl> Deserializer for Parser { +impl> Deserializer for Parser { #[inline] fn visit< R, @@ -442,7 +389,7 @@ impl> Deserializer for Parser { } fn syntax_error(&mut self) -> Error { - Error::SyntaxError(ErrorCode::InvalidSyntax(SyntaxExpectation::SomeValue), self.line, self.col) + Error::SyntaxError(ErrorCode::ExpectedSomeValue, self.line, self.col) } fn end_of_stream_error(&mut self) -> Error { @@ -454,13 +401,13 @@ struct SeqVisitor { first: bool, } -impl> de::SeqVisitor, Error> for SeqVisitor { +impl> de::SeqVisitor, Error> for SeqVisitor { fn visit< T: de::Deserialize, Error>, >(&mut self, d: &mut Parser) -> Result, Error> { d.parse_whitespace(); - if d.ch_is(']') { + if d.ch_is(b']') { d.bump(); return Ok(None); } @@ -468,12 +415,12 @@ impl> de::SeqVisitor, Error> for SeqVisitor { if self.first { self.first = false; } else { - if d.ch_is(',') { + if d.ch_is(b',') { d.bump(); } else if d.eof() { return Err(d.error(ErrorCode::EOFWhileParsingList)); } else { - return Err(d.error(ErrorCode::InvalidSyntax(SyntaxExpectation::ListCommaOrEnd))); + return Err(d.error(ErrorCode::ExpectedListCommaOrEnd)); } } @@ -482,7 +429,7 @@ impl> de::SeqVisitor, Error> for SeqVisitor { } fn end(&mut self, d: &mut Parser) -> Result<(), Error> { - if d.ch_is(']') { + if d.ch_is(b']') { d.bump(); Ok(()) } else if d.eof() { @@ -497,14 +444,13 @@ struct MapVisitor { first: bool, } -impl> de::MapVisitor, Error> for MapVisitor { - fn visit< +impl> de::MapVisitor, Error> for MapVisitor { + fn visit_key< K: de::Deserialize, Error>, - V: de::Deserialize, Error>, - >(&mut self, d: &mut Parser) -> Result, Error> { + >(&mut self, d: &mut Parser) -> Result, Error> { d.parse_whitespace(); - if d.ch_is('}') { + if d.ch_is(b'}') { d.bump(); return Ok(None); } @@ -512,13 +458,13 @@ impl> de::MapVisitor, Error> for MapVisitor { if self.first { self.first = false; } else { - if d.ch_is(',') { + if d.ch_is(b',') { d.bump(); d.parse_whitespace(); } else if d.eof() { return Err(d.error(ErrorCode::EOFWhileParsingObject)); } else { - return Err(d.error(ErrorCode::InvalidSyntax(SyntaxExpectation::ObjectCommaOrEnd))); + return Err(d.error(ErrorCode::ExpectedObjectCommaOrEnd)); } } @@ -526,15 +472,19 @@ impl> de::MapVisitor, Error> for MapVisitor { return Err(d.error(ErrorCode::EOFWhileParsingValue)); } - if !d.ch_is('"') { + if !d.ch_is(b'"') { return Err(d.error(ErrorCode::KeyMustBeAString)); } - let key = try!(de::Deserialize::deserialize(d)); + Ok(Some(try!(de::Deserialize::deserialize(d)))) + } + fn visit_value< + V: de::Deserialize, Error>, + >(&mut self, d: &mut Parser) -> Result { d.parse_whitespace(); - if d.ch_is(':') { + if d.ch_is(b':') { d.bump(); } else if d.eof() { return Err(d.error(ErrorCode::EOFWhileParsingObject)); @@ -544,13 +494,11 @@ impl> de::MapVisitor, Error> for MapVisitor { d.parse_whitespace(); - let value = try!(de::Deserialize::deserialize(d)); - - Ok(Some((key, value))) + Ok(try!(de::Deserialize::deserialize(d))) } fn end(&mut self, d: &mut Parser) -> Result<(), Error> { - if d.ch_is(']') { + if d.ch_is(b']') { d.bump(); Ok(()) } else if d.eof() { @@ -561,9 +509,9 @@ impl> de::MapVisitor, Error> for MapVisitor { } } -/// Decodes a json value from an `Iterator`. +/// Decodes a json value from an `Iterator`. pub fn from_iter< - Iter: Iterator, + Iter: Iterator, T: de::Deserialize, Error> >(iter: Iter) -> Result { let mut parser = Parser::new(iter); @@ -577,12 +525,11 @@ pub fn from_iter< /// Decodes a json value from a string pub fn from_str< 'a, - T: de::Deserialize>, Error> + T: de::Deserialize>, Error> >(s: &'a str) -> Result { - from_iter(s.chars()) + from_iter(s.bytes()) } - #[cfg(test)] mod tests { use std::str; @@ -590,7 +537,8 @@ mod tests { use std::collections::TreeMap; use de::Deserialize; - use super::{Parser, Error, ErrorCode, SyntaxExpectation, from_str}; + use super::{Parser, from_str}; + use super::super::error::{Error, ErrorCode}; macro_rules! treemap { ($($k:expr => $v:expr),*) => ({ @@ -602,7 +550,7 @@ mod tests { fn test_parse_ok< 'a, - T: PartialEq + Show + Deserialize>, Error>, + T: PartialEq + Show + Deserialize>, Error>, >(errors: Vec<(&'a str, T)>) { for (s, value) in errors.into_iter() { let v: Result = from_str(s); @@ -617,7 +565,7 @@ mod tests { fn test_parse_err< 'a, - T: PartialEq + Show + Deserialize>, Error> + T: PartialEq + Show + Deserialize>, Error> >(errors: Vec<(&'a str, Error)>) { for (s, err) in errors.into_iter() { let v: Result = from_str(s); @@ -635,10 +583,10 @@ mod tests { #[test] fn test_parse_bool() { test_parse_err::(vec![ - ("t", Error::SyntaxError(ErrorCode::InvalidSyntax(SyntaxExpectation::SomeIdent), 1, 2)), - ("truz", Error::SyntaxError(ErrorCode::InvalidSyntax(SyntaxExpectation::SomeIdent), 1, 4)), - ("f", Error::SyntaxError(ErrorCode::InvalidSyntax(SyntaxExpectation::SomeIdent), 1, 2)), - ("faz", Error::SyntaxError(ErrorCode::InvalidSyntax(SyntaxExpectation::SomeIdent), 1, 3)), + ("t", Error::SyntaxError(ErrorCode::ExpectedSomeIdent, 1, 2)), + ("truz", Error::SyntaxError(ErrorCode::ExpectedSomeIdent, 1, 4)), + ("f", Error::SyntaxError(ErrorCode::ExpectedSomeIdent, 1, 2)), + ("faz", Error::SyntaxError(ErrorCode::ExpectedSomeIdent, 1, 3)), ("truea", Error::SyntaxError(ErrorCode::TrailingCharacters, 1, 5)), ("falsea", Error::SyntaxError(ErrorCode::TrailingCharacters, 1, 6)), ]); @@ -652,8 +600,8 @@ mod tests { #[test] fn test_parse_numbers() { test_parse_err::(vec![ - ("+", Error::SyntaxError(ErrorCode::InvalidSyntax(SyntaxExpectation::SomeValue), 1, 1)), - (".", Error::SyntaxError(ErrorCode::InvalidSyntax(SyntaxExpectation::SomeValue), 1, 1)), + ("+", Error::SyntaxError(ErrorCode::ExpectedSomeValue, 1, 1)), + (".", Error::SyntaxError(ErrorCode::ExpectedSomeValue, 1, 1)), ("-", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 2)), ("00", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 2)), ("1.", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 3)), @@ -707,8 +655,8 @@ mod tests { ("[ ", Error::SyntaxError(ErrorCode::EOFWhileParsingValue, 1, 3)), ("[1", Error::SyntaxError(ErrorCode::EOFWhileParsingList, 1, 3)), ("[1,", Error::SyntaxError(ErrorCode::EOFWhileParsingValue, 1, 4)), - ("[1,]", Error::SyntaxError(ErrorCode::InvalidSyntax(SyntaxExpectation::SomeValue), 1, 4)), - ("[1 2]", Error::SyntaxError(ErrorCode::InvalidSyntax(SyntaxExpectation::ListCommaOrEnd), 1, 4)), + ("[1,]", Error::SyntaxError(ErrorCode::ExpectedSomeValue, 1, 4)), + ("[1 2]", Error::SyntaxError(ErrorCode::ExpectedListCommaOrEnd, 1, 4)), ("[]a", Error::SyntaxError(ErrorCode::TrailingCharacters, 1, 3)), ]); @@ -761,7 +709,7 @@ mod tests { ("{\"a\" 1", Error::SyntaxError(ErrorCode::ExpectedColon, 1, 6)), ("{\"a\":", Error::SyntaxError(ErrorCode::EOFWhileParsingValue, 1, 6)), ("{\"a\":1", Error::SyntaxError(ErrorCode::EOFWhileParsingObject, 1, 7)), - ("{\"a\":1 1", Error::SyntaxError(ErrorCode::InvalidSyntax(SyntaxExpectation::ObjectCommaOrEnd), 1, 8)), + ("{\"a\":1 1", Error::SyntaxError(ErrorCode::ExpectedObjectCommaOrEnd, 1, 8)), ("{\"a\":1,", Error::SyntaxError(ErrorCode::EOFWhileParsingValue, 1, 8)), ("{}a", Error::SyntaxError(ErrorCode::TrailingCharacters, 1, 3)), ]); diff --git a/serde2/src/json/mod.rs b/serde2/src/json/mod.rs index a890538b..bfbad813 100644 --- a/serde2/src/json/mod.rs +++ b/serde2/src/json/mod.rs @@ -2,5 +2,8 @@ pub use self::ser::Writer; pub use self::ser::{to_vec, to_string}; pub use self::ser::escape_str; +pub use self::de::from_str; + pub mod ser; pub mod de; +pub mod error; diff --git a/serde2/src/json/ser.rs b/serde2/src/json/ser.rs index 00326879..c122fd0d 100644 --- a/serde2/src/json/ser.rs +++ b/serde2/src/json/ser.rs @@ -1,5 +1,5 @@ use std::f64; -use std::io::{IoError, MemWriter}; +use std::io::IoError; use std::io; use std::num::{Float, FPNaN, FPInfinite}; @@ -23,7 +23,7 @@ impl Writer { /// Unwrap the Writer from the Serializer. #[inline] - pub fn unwrap(self) -> W { + pub fn into_inner(self) -> W { self.writer } } @@ -234,10 +234,10 @@ fn fmt_f64_or_null(wr: &mut W, value: f64) -> Result<(), IoError> pub fn to_vec< T: ser::Serialize, >(value: &T) -> Result, IoError> { - let writer = MemWriter::with_capacity(1024); + let writer = Vec::with_capacity(128); let mut writer = Writer::new(writer); try!(writer.visit(value)); - Ok(writer.unwrap().unwrap()) + Ok(writer.into_inner()) } #[inline]