From 875610044f6b43fbdc20e7a10b83d1734f9c147e Mon Sep 17 00:00:00 2001 From: Mikhail Borisov Date: Sat, 9 May 2015 03:18:13 +0300 Subject: [PATCH 1/3] Improved support for byte strings --- serde_macros/src/de.rs | 10 ++++++++ src/bytes.rs | 40 ++++++++++++++++++++++++++++++- src/de/impls.rs | 19 +++++++++++++++ src/de/mod.rs | 4 ++-- src/de/value.rs | 54 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 124 insertions(+), 3 deletions(-) diff --git a/serde_macros/src/de.rs b/serde_macros/src/de.rs index 66fe052b..d3b6f029 100644 --- a/serde_macros/src/de.rs +++ b/serde_macros/src/de.rs @@ -578,6 +578,16 @@ fn deserialize_field_visitor( _ => Err(::serde::de::Error::unknown_field_error(value)), } } + + fn visit_bytes(&mut self, value: &[u8]) -> ::std::result::Result<__Field, E> + where E: ::serde::de::Error, + { + // TODO: would be better to generate a byte string literal match + match ::std::str::from_utf8(value) { + Ok(s) => self.visit_str(s), + _ => Err(::serde::de::Error::syntax_error()), + } + } } deserializer.visit(__FieldVisitor) diff --git a/src/bytes.rs b/src/bytes.rs index 9eab4eb6..9e5a9a7d 100644 --- a/src/bytes.rs +++ b/src/bytes.rs @@ -1,6 +1,9 @@ //! Helper module to enable serializing bytes more efficiently use std::ops; +use std::fmt; +use std::ascii; +use std::char; use ser; use de; @@ -8,10 +11,17 @@ use de; /////////////////////////////////////////////////////////////////////////////// /// `Bytes` wraps a `&[u8]` in order to serialize into a byte array. +#[derive(Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)] pub struct Bytes<'a> { bytes: &'a [u8], } +impl<'a> fmt::Debug for Bytes<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "b\"{}\"", escape_bytestring(self.bytes)) + } +} + impl<'a> From<&'a [u8]> for Bytes<'a> { fn from(bytes: &'a [u8]) -> Self { Bytes { @@ -28,6 +38,12 @@ impl<'a> From<&'a Vec> for Bytes<'a> { } } +impl<'a> Into<&'a [u8]> for Bytes<'a> { + fn into(self) -> &'a [u8] { + self.bytes + } +} + impl<'a> ops::Deref for Bytes<'a> { type Target = [u8]; @@ -46,7 +62,7 @@ impl<'a> ser::Serialize for Bytes<'a> { /////////////////////////////////////////////////////////////////////////////// /// `ByteBuf` wraps a `Vec` in order to hook into serialize and from deserialize a byte array. -#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[derive(Clone, Eq, Hash, PartialEq, PartialOrd, Ord)] pub struct ByteBuf { bytes: Vec, } @@ -63,6 +79,16 @@ impl ByteBuf { bytes: Vec::with_capacity(cap) } } + + pub fn as_vec(self) -> Vec { + self.bytes + } +} + +impl fmt::Debug for ByteBuf { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "b\"{}\"", escape_bytestring(self.bytes.as_ref())) + } } impl From for ByteBuf where T: Into> { @@ -172,3 +198,15 @@ impl de::Deserialize for ByteBuf { deserializer.visit_bytes(ByteBufVisitor) } } + +/////////////////////////////////////////////////////////////////////////////// + +fn escape_bytestring(bytes: &[u8]) -> String { + let mut result = String::new(); + for &b in bytes { + for esc in ascii::escape_default(b) { + result.push(char::from_u32(esc as u32).unwrap()); + } + } + result +} diff --git a/src/de/impls.rs b/src/de/impls.rs index f73b3e52..30ceb33f 100644 --- a/src/de/impls.rs +++ b/src/de/impls.rs @@ -3,6 +3,7 @@ use std::hash::Hash; use std::marker::PhantomData; use std::path; use std::rc::Rc; +use std::str; use std::sync::Arc; use num::FromPrimitive; @@ -198,6 +199,24 @@ impl Visitor for StringVisitor { { Ok(v) } + + fn visit_bytes(&mut self, v: &[u8]) -> Result + where E: Error, + { + match str::from_utf8(v) { + Ok(s) => Ok(s.to_string()), + Err(_) => Err(Error::syntax_error()), + } + } + + fn visit_byte_buf<'a, E>(&mut self, v: Vec) -> Result + where E: Error, + { + match String::from_utf8(v) { + Ok(s) => Ok(s), + Err(_) => Err(Error::syntax_error()), + } + } } impl Deserialize for String { diff --git a/src/de/mod.rs b/src/de/mod.rs index b10f12c0..cdaea298 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -263,10 +263,10 @@ pub trait Visitor { Err(Error::syntax_error()) } - fn visit_byte_buf(&mut self, _v: Vec) -> Result + fn visit_byte_buf(&mut self, v: Vec) -> Result where E: Error, { - Err(Error::syntax_error()) + self.visit_bytes(&v) } } diff --git a/src/de/value.rs b/src/de/value.rs index b39e45f7..4ddbc299 100644 --- a/src/de/value.rs +++ b/src/de/value.rs @@ -12,6 +12,7 @@ use std::hash::Hash; use std::vec; use de; +use bytes; /////////////////////////////////////////////////////////////////////////////// @@ -409,3 +410,56 @@ impl ValueDeserializer for HashMap MapDeserializer::new(self.into_iter(), len) } } + +/////////////////////////////////////////////////////////////////////////////// + +impl<'a> ValueDeserializer for bytes::Bytes<'a> +{ + type Deserializer = BytesDeserializer<'a>; + + fn into_deserializer(self) -> BytesDeserializer<'a> { + BytesDeserializer(Some(self.into())) + } +} + +pub struct BytesDeserializer<'a> (Option<&'a [u8]>); + +impl<'a> de::Deserializer for BytesDeserializer<'a> { + type Error = Error; + + fn visit(&mut self, mut visitor: V) -> Result + where V: de::Visitor, + { + match self.0.take() { + Some(bytes) => visitor.visit_bytes(bytes), + None => Err(de::Error::end_of_stream_error()), + } + } +} + + +/////////////////////////////////////////////////////////////////////////////// + +impl ValueDeserializer for bytes::ByteBuf +{ + type Deserializer = ByteBufDeserializer; + + fn into_deserializer(self) -> Self::Deserializer { + ByteBufDeserializer(Some(self.as_vec())) + } +} + +pub struct ByteBufDeserializer(Option>); + +impl de::Deserializer for ByteBufDeserializer { + type Error = Error; + + fn visit(&mut self, mut visitor: V) -> Result + where V: de::Visitor, + { + match self.0.take() { + Some(bytes) => visitor.visit_byte_buf(bytes), + None => Err(de::Error::end_of_stream_error()), + } + } +} From 5fd9daa86526139a25abb7ac328bae14032bdf44 Mon Sep 17 00:00:00 2001 From: Mikhail Borisov Date: Tue, 12 May 2015 15:03:26 +0300 Subject: [PATCH 2/3] WIP --- src/bytes.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/bytes.rs b/src/bytes.rs index 9e5a9a7d..5a6afa71 100644 --- a/src/bytes.rs +++ b/src/bytes.rs @@ -3,7 +3,6 @@ use std::ops; use std::fmt; use std::ascii; -use std::char; use ser; use de; @@ -91,6 +90,15 @@ impl fmt::Debug for ByteBuf { } } +/* +// Disabled: triggers conflict with From implementation below +impl Into> for ByteBuf { + fn into(self) -> Vec { + self.bytes + } +} +*/ + impl From for ByteBuf where T: Into> { fn from(bytes: T) -> Self { ByteBuf { @@ -205,7 +213,7 @@ fn escape_bytestring(bytes: &[u8]) -> String { let mut result = String::new(); for &b in bytes { for esc in ascii::escape_default(b) { - result.push(char::from_u32(esc as u32).unwrap()); + result.push(esc as char); } } result From 5c631f3e58154ee203edc45fa2beacfc43d2907f Mon Sep 17 00:00:00 2001 From: Mikhail Borisov Date: Tue, 12 May 2015 15:16:06 +0300 Subject: [PATCH 3/3] WIP --- src/bytes.rs | 13 +++---------- src/de/value.rs | 2 +- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/bytes.rs b/src/bytes.rs index 5a6afa71..b3e02711 100644 --- a/src/bytes.rs +++ b/src/bytes.rs @@ -78,10 +78,6 @@ impl ByteBuf { bytes: Vec::with_capacity(cap) } } - - pub fn as_vec(self) -> Vec { - self.bytes - } } impl fmt::Debug for ByteBuf { @@ -90,19 +86,16 @@ impl fmt::Debug for ByteBuf { } } -/* -// Disabled: triggers conflict with From implementation below impl Into> for ByteBuf { fn into(self) -> Vec { self.bytes } } -*/ -impl From for ByteBuf where T: Into> { - fn from(bytes: T) -> Self { +impl From> for ByteBuf { + fn from(bytes: Vec) -> Self { ByteBuf { - bytes: bytes.into(), + bytes: bytes, } } } diff --git a/src/de/value.rs b/src/de/value.rs index 4ddbc299..e512badc 100644 --- a/src/de/value.rs +++ b/src/de/value.rs @@ -445,7 +445,7 @@ impl ValueDeserializer for bytes::ByteBuf type Deserializer = ByteBufDeserializer; fn into_deserializer(self) -> Self::Deserializer { - ByteBufDeserializer(Some(self.as_vec())) + ByteBufDeserializer(Some(self.into())) } }