use std::char; use std::fmt; use std::num; use std::str::ScalarValue; use std::str; use de; use de::Deserializer; #[deriving(Clone, PartialEq, Eq)] pub enum ErrorCode { EOFWhileParsingList, EOFWhileParsingObject, EOFWhileParsingString, EOFWhileParsingValue, ExpectedColon, InvalidEscape, InvalidNumber, InvalidSyntax(SyntaxExpectation), InvalidUnicodeCodePoint, KeyMustBeAString, LoneLeadingSurrogateInHexEscape, MissingField(&'static str), NotFourDigit, NotUtf8, TrailingCharacters, UnexpectedEndOfHexEscape, UnknownVariant, UnrecognizedHex, } /// The failed expectation of InvalidSyntax #[deriving(Clone, PartialEq, Eq, Show)] pub enum SyntaxExpectation { ListCommaOrEnd, ObjectCommaOrEnd, SomeValue, SomeIdent, EnumMapStart, EnumVariantString, EnumToken, EnumEndToken, EnumEnd, } impl fmt::Show for ErrorCode { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { EOFWhileParsingList => "EOF While parsing list".fmt(f), EOFWhileParsingObject => "EOF While parsing object".fmt(f), EOFWhileParsingString => "EOF While parsing string".fmt(f), EOFWhileParsingValue => "EOF While parsing value".fmt(f), ExpectedColon => "expected `:`".fmt(f), InvalidEscape => "invalid escape".fmt(f), InvalidNumber => "invalid number".fmt(f), InvalidSyntax(expect) => { write!(f, "invalid syntax, expected: {}", expect) } InvalidUnicodeCodePoint => "invalid unicode code point".fmt(f), KeyMustBeAString => "key must be a string".fmt(f), LoneLeadingSurrogateInHexEscape => "lone leading surrogate in hex escape".fmt(f), MissingField(field) => { write!(f, "missing field \"{}\"", field) } NotFourDigit => "invalid \\u escape (not four digits)".fmt(f), NotUtf8 => "contents not utf-8".fmt(f), TrailingCharacters => "trailing characters".fmt(f), UnexpectedEndOfHexEscape => "unexpected end of hex escape".fmt(f), UnknownVariant => "unknown variant".fmt(f), UnrecognizedHex => "invalid \\u escape (unrecognized hex)".fmt(f), } } } #[deriving(PartialEq, Eq, Show)] pub enum Error { SyntaxError(ErrorCode, uint, uint), } pub struct Parser { rdr: Iter, ch: Option, line: uint, col: uint, } impl< Iter: Iterator, > Parser { /// Creates the JSON parser. pub fn new(rdr: Iter) -> Parser { let mut p = Parser { rdr: rdr, ch: Some('\x00'), line: 1, col: 0, }; p.bump(); return p; } pub fn end(&mut self) -> Result<(), Error> { if self.eof() { Ok(()) } else { Err(self.error(TrailingCharacters)) } } fn eof(&self) -> bool { self.ch.is_none() } fn ch_or_null(&self) -> char { self.ch.unwrap_or('\x00') } fn bump(&mut self) { self.ch = self.rdr.next(); if self.ch_is('\n') { self.line += 1; self.col = 1; } else { self.col += 1; } } fn next_char(&mut self) -> Option { self.bump(); self.ch } fn ch_is(&self, c: char) -> bool { self.ch == Some(c) } fn parse_whitespace(&mut self) { while self.ch_is(' ') || self.ch_is('\n') || self.ch_is('\t') || self.ch_is('\r') { self.bump(); } } fn error(&mut self, reason: ErrorCode) -> Error { //self.state_stack.clear(); SyntaxError(reason, self.line, self.col) } fn parse_value< R, V: de::Visitor, R, Error>, >(&mut self, visitor: &mut V) -> Result { self.parse_whitespace(); if self.eof() { return Err(self.error(EOFWhileParsingValue)); } match self.ch_or_null() { 'n' => { try!(self.parse_ident("ull")); visitor.visit_null(self) } 't' => { try!(self.parse_ident("rue")); visitor.visit_bool(self, true) } 'f' => { try!(self.parse_ident("alse")); visitor.visit_bool(self, false) } '0' ... '9' | '-' => self.parse_number(visitor), '"' => { let s = try!(self.parse_string()); visitor.visit_string(self, s) } '[' => { self.bump(); visitor.visit_seq(self, SeqVisitor { first: true }) } '{' => { self.bump(); visitor.visit_map(self, MapVisitor { first: true }) } _ => { Err(self.error(InvalidSyntax(SomeValue))) } } } fn parse_ident(&mut self, ident: &str) -> Result<(), Error> { if ident.chars().all(|c| Some(c) == self.next_char()) { self.bump(); Ok(()) } else { Err(self.error(InvalidSyntax(SomeIdent))) } } fn parse_number< R, V: de::Visitor, R, Error>, >(&mut self, visitor: &mut V) -> Result { let mut neg = 1; if self.ch_is('-') { self.bump(); neg = -1; } let res = try!(self.parse_integer()); if self.ch_is('.') || self.ch_is('e') || self.ch_is('E') { let neg = neg as f64; let mut res = res as f64; if self.ch_is('.') { res = try!(self.parse_decimal(res)); } if self.ch_is('e') || self.ch_is('E') { res = try!(self.parse_exponent(res)); } visitor.visit_f64(self, neg * res) } else { visitor.visit_i64(self, neg * res) } } fn parse_integer(&mut self) -> Result { let mut res = 0; match self.ch_or_null() { '0' => { self.bump(); // There can be only one leading '0'. match self.ch_or_null() { '0' ... '9' => { return Err(self.error(InvalidNumber)); } _ => () } }, '1' ... '9' => { while !self.eof() { match self.ch_or_null() { c @ '0' ... '9' => { res *= 10; res += (c as i64) - ('0' as i64); self.bump(); } _ => break, } } } _ => { return Err(self.error(InvalidNumber)); } } Ok(res) } fn parse_decimal(&mut self, res: f64) -> Result { self.bump(); // Make sure a digit follows the decimal place. match self.ch_or_null() { '0' ... '9' => (), _ => { return Err(self.error(InvalidNumber)); } } let mut res = res; let mut dec = 1.0; while !self.eof() { match self.ch_or_null() { c @ '0' ... '9' => { dec /= 10.0; res += (((c as int) - ('0' as int)) as f64) * dec; self.bump(); } _ => break, } } Ok(res) } fn parse_exponent(&mut self, mut res: f64) -> Result { self.bump(); let mut exp = 0u; let mut neg_exp = false; if self.ch_is('+') { self.bump(); } else if self.ch_is('-') { self.bump(); neg_exp = true; } // Make sure a digit follows the exponent place. match self.ch_or_null() { '0' ... '9' => (), _ => { return Err(self.error(InvalidNumber)); } } while !self.eof() { match self.ch_or_null() { c @ '0' ... '9' => { exp *= 10; exp += (c as uint) - ('0' as uint); self.bump(); } _ => break } } let exp: f64 = num::pow(10u as f64, exp); if neg_exp { res /= exp; } else { res *= exp; } Ok(res) } fn decode_hex_escape(&mut self) -> Result { let mut i = 0u; let mut n = 0u16; while i < 4u && !self.eof() { self.bump(); n = match self.ch_or_null() { c @ '0' ... '9' => n * 16_u16 + ((c as u16) - ('0' as u16)), 'a' | 'A' => n * 16_u16 + 10_u16, 'b' | 'B' => n * 16_u16 + 11_u16, 'c' | 'C' => n * 16_u16 + 12_u16, 'd' | 'D' => n * 16_u16 + 13_u16, 'e' | 'E' => n * 16_u16 + 14_u16, 'f' | 'F' => n * 16_u16 + 15_u16, _ => { return Err(self.error(InvalidEscape)); } }; i += 1u; } // Error out if we didn't parse 4 digits. if i != 4u { return Err(self.error(InvalidEscape)); } Ok(n) } fn parse_string(&mut self) -> Result { let mut escape = false; let mut res = String::new(); loop { self.bump(); if self.eof() { return Err(self.error(EOFWhileParsingString)); } if escape { match self.ch_or_null() { '"' => res.push('"'), '\\' => res.push('\\'), '/' => res.push('/'), 'b' => res.push('\x08'), 'f' => res.push('\x0c'), 'n' => res.push('\n'), 'r' => res.push('\r'), 't' => res.push('\t'), 'u' => match try!(self.decode_hex_escape()) { 0xDC00 ... 0xDFFF => { return Err(self.error(LoneLeadingSurrogateInHexEscape)); } // Non-BMP characters are encoded as a sequence of // two hex escapes, representing UTF-16 surrogates. n1 @ 0xD800 ... 0xDBFF => { let c1 = self.next_char(); let c2 = self.next_char(); match (c1, c2) { (Some('\\'), Some('u')) => (), _ => { return Err(self.error(UnexpectedEndOfHexEscape)); } } let buf = [n1, try!(self.decode_hex_escape())]; match str::utf16_items(buf.as_slice()).next() { Some(ScalarValue(c)) => res.push(c), _ => { return Err(self.error(LoneLeadingSurrogateInHexEscape)); } } } n => match char::from_u32(n as u32) { Some(c) => res.push(c), None => { return Err(self.error(InvalidUnicodeCodePoint)); } }, }, _ => { return Err(self.error(InvalidEscape)); } } escape = false; } else if self.ch_is('\\') { escape = true; } else { match self.ch { Some('"') => { self.bump(); return Ok(res); }, Some(c) => res.push(c), None => unreachable!() } } } } } impl> Deserializer for Parser { #[inline] fn visit< R, V: de::Visitor, R, Error>, >(&mut self, visitor: &mut V) -> Result { self.parse_value(visitor) } fn syntax_error(&mut self) -> Error { SyntaxError(InvalidSyntax(SomeValue), self.line, self.col) } fn end_of_stream_error(&mut self) -> Error { SyntaxError(EOFWhileParsingValue, self.line, self.col) } } struct SeqVisitor { first: bool, } impl> de::SeqVisitor, Error> for SeqVisitor { fn visit< T: de::Deserialize, Error>, >(&mut self, d: &mut Parser) -> Result, Error> { d.parse_whitespace(); if d.ch_is(']') { d.bump(); return Ok(None); } if self.first { self.first = false; } else { if d.ch_is(',') { d.bump(); } else if d.eof() { return Err(d.error(EOFWhileParsingList)); } else { return Err(d.error(InvalidSyntax(ListCommaOrEnd))); } } let value = try!(de::Deserialize::deserialize(d)); Ok(Some(value)) } fn end(&mut self, d: &mut Parser) -> Result<(), Error> { if d.ch_is(']') { d.bump(); Ok(()) } else if d.eof() { Err(d.error(EOFWhileParsingList)) } else { Err(d.error(TrailingCharacters)) } } } struct MapVisitor { first: bool, } impl> de::MapVisitor, Error> for MapVisitor { fn visit< K: de::Deserialize, Error>, V: de::Deserialize, Error>, >(&mut self, d: &mut Parser) -> Result, Error> { d.parse_whitespace(); if d.ch_is('}') { d.bump(); return Ok(None); } if self.first { self.first = false; } else { if d.ch_is(',') { d.bump(); d.parse_whitespace(); } else if d.eof() { return Err(d.error(EOFWhileParsingObject)); } else { return Err(d.error(InvalidSyntax(ObjectCommaOrEnd))); } } if d.eof() { return Err(d.error(EOFWhileParsingValue)); } if !d.ch_is('"') { return Err(d.error(KeyMustBeAString)); } let key = try!(de::Deserialize::deserialize(d)); d.parse_whitespace(); if d.ch_is(':') { d.bump(); } else if d.eof() { return Err(d.error(EOFWhileParsingObject)); } else { return Err(d.error(ExpectedColon)); } d.parse_whitespace(); let value = try!(de::Deserialize::deserialize(d)); Ok(Some((key, value))) } fn end(&mut self, d: &mut Parser) -> Result<(), Error> { if d.ch_is(']') { d.bump(); Ok(()) } else if d.eof() { Err(d.error(EOFWhileParsingList)) } else { Err(d.error(TrailingCharacters)) } } } /// Decodes a json value from an `Iterator`. pub fn from_iter< Iter: Iterator, T: de::Deserialize, Error> >(iter: Iter) -> Result { let mut parser = Parser::new(iter); let value = try!(de::Deserialize::deserialize(&mut parser)); // Make sure the whole stream has been consumed. try!(parser.end()); Ok(value) } /// Decodes a json value from a string pub fn from_str< 'a, T: de::Deserialize>, Error> >(s: &'a str) -> Result { from_iter(s.chars()) } #[cfg(test)] mod tests { use std::str; use std::fmt::Show; use std::collections::TreeMap; use de::Deserialize; use super::{Parser, Error, from_str}; use super::{ ListCommaOrEnd, ObjectCommaOrEnd, SomeIdent, SomeValue, }; use super::{ EOFWhileParsingList, EOFWhileParsingObject, EOFWhileParsingString, EOFWhileParsingValue, ExpectedColon, InvalidNumber, InvalidSyntax, KeyMustBeAString, TrailingCharacters, }; use super::SyntaxError; macro_rules! treemap { ($($k:expr => $v:expr),*) => ({ let mut _m = TreeMap::new(); $(_m.insert($k, $v);)* _m }) } fn test_parse_ok< 'a, T: PartialEq + Show + Deserialize>, Error>, >(errors: Vec<(&'a str, T)>) { for (s, value) in errors.into_iter() { let v: Result = from_str(s); assert_eq!(v, Ok(value)); /* let v: Json = from_iter(s.chars()).unwrap(); assert_eq!(v, value.to_json()); */ } } fn test_parse_err< 'a, T: PartialEq + Show + Deserialize>, Error> >(errors: Vec<(&'a str, Error)>) { for (s, err) in errors.into_iter() { let v: Result = from_str(s); assert_eq!(v, Err(err)); } } #[test] fn test_parse_null() { test_parse_ok(vec![ ("null", ()), ]); } #[test] fn test_parse_bool() { test_parse_err::(vec![ ("t", SyntaxError(InvalidSyntax(SomeIdent), 1, 2)), ("truz", SyntaxError(InvalidSyntax(SomeIdent), 1, 4)), ("f", SyntaxError(InvalidSyntax(SomeIdent), 1, 2)), ("faz", SyntaxError(InvalidSyntax(SomeIdent), 1, 3)), ("truea", SyntaxError(TrailingCharacters, 1, 5)), ("falsea", SyntaxError(TrailingCharacters, 1, 6)), ]); test_parse_ok(vec![ ("true", true), ("false", false), ]); } #[test] fn test_parse_numbers() { test_parse_err::(vec![ ("+", SyntaxError(InvalidSyntax(SomeValue), 1, 1)), (".", SyntaxError(InvalidSyntax(SomeValue), 1, 1)), ("-", SyntaxError(InvalidNumber, 1, 2)), ("00", SyntaxError(InvalidNumber, 1, 2)), ("1.", SyntaxError(InvalidNumber, 1, 3)), ("1e", SyntaxError(InvalidNumber, 1, 3)), ("1e+", SyntaxError(InvalidNumber, 1, 4)), ("1a", SyntaxError(TrailingCharacters, 1, 2)), ]); test_parse_ok(vec![ ("3", 3i64), ("-2", -2), ("-1234", -1234), ]); test_parse_ok(vec![ ("3.0", 3.0f64), ("3.1", 3.1), ("-1.2", -1.2), ("0.4", 0.4), ("0.4e5", 0.4e5), ("0.4e15", 0.4e15), ("0.4e-01", 0.4e-01), ]); } #[test] fn test_parse_string() { test_parse_err::(vec![ ("\"", SyntaxError(EOFWhileParsingString, 1, 2)), ("\"lol", SyntaxError(EOFWhileParsingString, 1, 5)), ("\"lol\"a", SyntaxError(TrailingCharacters, 1, 6)), ]); test_parse_ok(vec![ ("\"\"", "".to_string()), ("\"foo\"", "foo".to_string()), ("\"\\\"\"", "\"".to_string()), ("\"\\b\"", "\x08".to_string()), ("\"\\n\"", "\n".to_string()), ("\"\\r\"", "\r".to_string()), ("\"\\t\"", "\t".to_string()), ("\"\\u12ab\"", "\u12ab".to_string()), ("\"\\uAB12\"", "\uAB12".to_string()), ]); } #[test] fn test_parse_list() { test_parse_err::>(vec![ ("[", SyntaxError(EOFWhileParsingValue, 1, 2)), ("[ ", SyntaxError(EOFWhileParsingValue, 1, 3)), ("[1", SyntaxError(EOFWhileParsingList, 1, 3)), ("[1,", SyntaxError(EOFWhileParsingValue, 1, 4)), ("[1,]", SyntaxError(InvalidSyntax(SomeValue), 1, 4)), ("[1 2]", SyntaxError(InvalidSyntax(ListCommaOrEnd), 1, 4)), ("[]a", SyntaxError(TrailingCharacters, 1, 3)), ]); test_parse_ok(vec![ ("[]", vec![]), ("[ ]", vec![]), ("[null]", vec![()]), ("[ null ]", vec![()]), ]); test_parse_ok(vec![ ("[true]", vec![true]), ]); test_parse_ok(vec![ ("[3,1]", vec![3i, 1]), ("[ 3 , 1 ]", vec![3i, 1]), ]); test_parse_ok(vec![ ("[[3], [1, 2]]", vec![vec![3i], vec![1, 2]]), ]); test_parse_ok(vec![ ("[]", ()), ]); test_parse_ok(vec![ ("[1]", (1u,)), ]); test_parse_ok(vec![ ("[1, 2]", (1u, 2u)), ]); test_parse_ok(vec![ ("[1, 2, 3]", (1u, 2u, 3u)), ]); } #[test] fn test_parse_object() { test_parse_err::>(vec![ ("{", SyntaxError(EOFWhileParsingValue, 1, 2)), ("{ ", SyntaxError(EOFWhileParsingValue, 1, 3)), ("{1", SyntaxError(KeyMustBeAString, 1, 2)), ("{ \"a\"", SyntaxError(EOFWhileParsingObject, 1, 6)), ("{\"a\"", SyntaxError(EOFWhileParsingObject, 1, 5)), ("{\"a\" ", SyntaxError(EOFWhileParsingObject, 1, 6)), ("{\"a\" 1", SyntaxError(ExpectedColon, 1, 6)), ("{\"a\":", SyntaxError(EOFWhileParsingValue, 1, 6)), ("{\"a\":1", SyntaxError(EOFWhileParsingObject, 1, 7)), ("{\"a\":1 1", SyntaxError(InvalidSyntax(ObjectCommaOrEnd), 1, 8)), ("{\"a\":1,", SyntaxError(EOFWhileParsingValue, 1, 8)), ("{}a", SyntaxError(TrailingCharacters, 1, 3)), ]); test_parse_ok(vec![ ("{}", treemap!()), ("{ }", treemap!()), ( "{\"a\":3}", treemap!("a".to_string() => 3i) ), ( "{ \"a\" : 3 }", treemap!("a".to_string() => 3i) ), ( "{\"a\":3,\"b\":4}", treemap!("a".to_string() => 3i, "b".to_string() => 4) ), ( "{ \"a\" : 3 , \"b\" : 4 }", treemap!("a".to_string() => 3i, "b".to_string() => 4), ), ]); test_parse_ok(vec![ ( "{\"a\": {\"b\": 3, \"c\": 4}}", treemap!("a".to_string() => treemap!("b".to_string() => 3i, "c".to_string() => 4i)), ), ]); } }