From 13a9f929de5f28a39a0eaa18cb5106432d282ed9 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Thu, 19 Jan 2017 01:03:28 -0800 Subject: [PATCH] Stateful deserialization --- serde/src/de/mod.rs | 338 +++++++++++++++++++++++++++++++++++++++--- serde/src/de/value.rs | 64 ++++---- 2 files changed, 349 insertions(+), 53 deletions(-) diff --git a/serde/src/de/mod.rs b/serde/src/de/mod.rs index f6307eab..12fc98c8 100644 --- a/serde/src/de/mod.rs +++ b/serde/src/de/mod.rs @@ -9,6 +9,7 @@ use error; use collections::{String, Vec}; use core::fmt; +use core::marker::PhantomData; /////////////////////////////////////////////////////////////////////////////// @@ -230,6 +231,149 @@ pub trait Deserialize: Sized { where D: Deserializer; } +/// `DeserializeSeed` is the stateful form of the `Deserialize` trait. If you +/// ever find yourself looking for a way to pass data into a `Deserialize` impl, +/// this trait is the way to do it. +/// +/// As one example of stateful deserialization consider deserializing a JSON +/// array into an existing buffer. Using the `Deserialize` trait we could +/// deserialize a JSON array into a `Vec` but it would be a freshly allocated +/// `Vec`; there is no way for `Deserialize` to reuse a previously allocated +/// buffer. Using `DeserializeSeed` instead makes this possible as in the +/// example code below. +/// +/// The canonical API for stateless deserialization looks like this: +/// +/// ```rust +/// # use serde::Deserialize; +/// # enum Error {} +/// fn func() -> Result +/// # { unimplemented!() } +/// ``` +/// +/// Adjusting an API like this to support stateful deserialization is a matter +/// of accepting a seed as input: +/// +/// ```rust +/// # use serde::de::DeserializeSeed; +/// # enum Error {} +/// fn func_seed(seed: T) -> Result +/// # { unimplemented!() } +/// ``` +/// +/// In practice the majority of deserialization is stateless. An API expecting a +/// seed can be appeased by passing `std::marker::PhantomData` as a seed in the +/// case of stateless deserialization. +/// +/// # Example +/// +/// Suppose we have JSON that looks like `[[1, 2], [3, 4, 5], [6]]` and we need +/// to deserialize it into a flat representation like `vec![1, 2, 3, 4, 5, 6]`. +/// Allocating a brand new `Vec` for each subarray would be slow. Instead we +/// would like to allocate a single `Vec` and then deserialize each subarray +/// into it. This requires stateful deserialization using the DeserializeSeed +/// trait. +/// +/// ```rust +/// # use serde::de::{Deserialize, DeserializeSeed, Deserializer, Visitor, SeqVisitor}; +/// # use std::marker::PhantomData; +/// # +/// // A DeserializeSeed implementation that uses stateful deserialization to +/// // append array elements onto the end of an existing vector. The preexisting +/// // state ("seed") in this case is the Vec. The `deserialize` method of +/// // `ExtendVec` will be traversing the inner arrays of the JSON input and +/// // appending each integer into the existing Vec. +/// struct ExtendVec<'a, T: 'a>(&'a mut Vec); +/// +/// impl<'a, T> DeserializeSeed for ExtendVec<'a, T> +/// where T: Deserialize +/// { +/// // The return type of the `deserialize` method. This implementation +/// // appends onto an existing vector but does not create any new data +/// // structure, so the return type is (). +/// type Value = (); +/// +/// fn deserialize(self, deserializer: D) -> Result +/// where D: Deserializer +/// { +/// // Visitor implementation that will walk an inner array of the JSON +/// // input. +/// struct ExtendVecVisitor<'a, T: 'a>(&'a mut Vec); +/// +/// impl<'a, T> Visitor for ExtendVecVisitor<'a, T> +/// where T: Deserialize +/// { +/// type Value = (); +/// +/// fn visit_seq(self, mut visitor: V) -> Result<(), V::Error> +/// where V: SeqVisitor +/// { +/// // Visit each element in the inner array and push it onto +/// // the existing vector. +/// while let Some(elem) = visitor.visit()? { +/// self.0.push(elem); +/// } +/// Ok(()) +/// } +/// } +/// +/// deserializer.deserialize_seq(ExtendVecVisitor(self.0)) +/// } +/// } +/// +/// // Visitor implementation that will walk the outer array of the JSON input. +/// struct FlattenedVecVisitor(PhantomData); +/// +/// impl Visitor for FlattenedVecVisitor +/// where T: Deserialize +/// { +/// // This Visitor constructs a single Vec to hold the flattened +/// // contents of the inner arrays. +/// type Value = Vec; +/// +/// fn visit_seq(self, mut visitor: V) -> Result, V::Error> +/// where V: SeqVisitor +/// { +/// // Create a single Vec to hold the flattened contents. +/// let mut vec = Vec::new(); +/// +/// // Each iteration through this loop is one inner array. +/// while let Some(()) = visitor.visit_seed(ExtendVec(&mut vec))? { +/// // Nothing to do; inner array has been appended into `vec`. +/// } +/// +/// // Return the finished vec. +/// Ok(vec) +/// } +/// } +/// +/// # fn example(deserializer: D) -> Result<(), D::Error> { +/// let visitor = FlattenedVecVisitor(PhantomData); +/// let flattened: Vec = deserializer.deserialize_seq(visitor)?; +/// # Ok(()) } +/// ``` +pub trait DeserializeSeed: Sized { + /// The type produced by using this seed. + type Value; + + /// Equivalent to the more common `Deserialize::deserialize` method, except + /// with some initial piece of data (the seed) passed in. + fn deserialize(self, deserializer: D) -> Result + where D: Deserializer; +} + +impl DeserializeSeed for PhantomData + where T: Deserialize +{ + type Value = T; + + fn deserialize(self, deserializer: D) -> Result + where D: Deserializer + { + T::deserialize(deserializer) + } +} + /////////////////////////////////////////////////////////////////////////////// /// `Deserializer` is a trait that can deserialize values by threading a `Visitor` trait through a @@ -244,7 +388,7 @@ pub trait Deserialize: Sized { /// with the `deserialize_*` methods how it should parse the next value. One downside though to /// only supporting the `deserialize_*` types is that it does not allow for deserializing into a /// generic `json::Value`-esque type. -pub trait Deserializer { +pub trait Deserializer: Sized { /// The error type that can be returned if some error occurs during deserialization. type Error: Error; @@ -641,10 +785,26 @@ pub trait SeqVisitor { /// The error type that can be returned if some error occurs during deserialization. type Error: Error; - /// This returns a `Ok(Some(value))` for the next value in the sequence, or `Ok(None)` if there - /// are no more remaining items. + /// This returns `Ok(Some(value))` for the next value in the sequence, or + /// `Ok(None)` if there are no more remaining items. + /// + /// `Deserialize` implementations should typically use `SeqVisitor::visit` + /// instead. + fn visit_seed(&mut self, seed: T) -> Result, Self::Error> + where T: DeserializeSeed; + + /// This returns `Ok(Some(value))` for the next value in the sequence, or + /// `Ok(None)` if there are no more remaining items. + /// + /// This method exists as a convenience for `Deserialize` implementations. + /// `SeqVisitor` implementations should not need to override the default + /// behavior. + #[inline] fn visit(&mut self) -> Result, Self::Error> - where T: Deserialize; + where T: Deserialize + { + self.visit_seed(PhantomData) + } /// Return the lower and upper bound of items remaining in the sequence. #[inline] @@ -656,6 +816,13 @@ pub trait SeqVisitor { impl<'a, V> SeqVisitor for &'a mut V where V: SeqVisitor { type Error = V::Error; + #[inline] + fn visit_seed(&mut self, seed: T) -> Result, V::Error> + where T: DeserializeSeed + { + (**self).visit_seed(seed) + } + #[inline] fn visit(&mut self) -> Result, V::Error> where T: Deserialize @@ -678,30 +845,81 @@ pub trait MapVisitor { /// The error type that can be returned if some error occurs during deserialization. type Error: Error; - /// This returns a `Ok(Some((key, value)))` for the next (key-value) pair in the map, or - /// `Ok(None)` if there are no more remaining items. + /// This returns `Ok(Some(key))` for the next key in the map, or `Ok(None)` + /// if there are no more remaining entries. + /// + /// `Deserialize` implementations should typically use + /// `MapVisitor::visit_key` or `MapVisitor::visit` instead. + fn visit_key_seed(&mut self, seed: K) -> Result, Self::Error> + where K: DeserializeSeed; + + /// This returns a `Ok(value)` for the next value in the map. + /// + /// `Deserialize` implementations should typically use + /// `MapVisitor::visit_value` instead. + fn visit_value_seed(&mut self, seed: V) -> Result + where V: DeserializeSeed; + + /// This returns `Ok(Some((key, value)))` for the next (key-value) pair in + /// the map, or `Ok(None)` if there are no more remaining items. + /// + /// `MapVisitor` implementations should override the default behavior if a + /// more efficient implementation is possible. + /// + /// `Deserialize` implementations should typically use `MapVisitor::visit` + /// instead. #[inline] - fn visit(&mut self) -> Result, Self::Error> - where K: Deserialize, - V: Deserialize, + fn visit_seed(&mut self, key_seed: K, vseed: V) -> Result, Self::Error> + where K: DeserializeSeed, + V: DeserializeSeed { - match try!(self.visit_key()) { + match try!(self.visit_key_seed(key_seed)) { Some(key) => { - let value = try!(self.visit_value()); + let value = try!(self.visit_value_seed(vseed)); Ok(Some((key, value))) } None => Ok(None) } } - /// This returns a `Ok(Some(key))` for the next key in the map, or `Ok(None)` if there are no - /// more remaining items. + /// This returns `Ok(Some(key))` for the next key in the map, or `Ok(None)` + /// if there are no more remaining entries. + /// + /// This method exists as a convenience for `Deserialize` implementations. + /// `MapVisitor` implementations should not need to override the default + /// behavior. + #[inline] fn visit_key(&mut self) -> Result, Self::Error> - where K: Deserialize; + where K: Deserialize + { + self.visit_key_seed(PhantomData) + } /// This returns a `Ok(value)` for the next value in the map. + /// + /// This method exists as a convenience for `Deserialize` implementations. + /// `MapVisitor` implementations should not need to override the default + /// behavior. + #[inline] fn visit_value(&mut self) -> Result - where V: Deserialize; + where V: Deserialize + { + self.visit_value_seed(PhantomData) + } + + /// This returns `Ok(Some((key, value)))` for the next (key-value) pair in + /// the map, or `Ok(None)` if there are no more remaining items. + /// + /// This method exists as a convenience for `Deserialize` implementations. + /// `MapVisitor` implementations should not need to override the default + /// behavior. + #[inline] + fn visit(&mut self) -> Result, Self::Error> + where K: Deserialize, + V: Deserialize, + { + self.visit_seed(PhantomData, PhantomData) + } /// Return the lower and upper bound of items remaining in the sequence. #[inline] @@ -709,17 +927,57 @@ pub trait MapVisitor { (0, None) } - /// Report that the struct has a field that wasn't deserialized + /// Report that the struct has a field that wasn't deserialized. The + /// MapVisitor may consider this an error or it may return a default value + /// for the field. + /// + /// `Deserialize` implementations should typically use + /// `MapVisitor::missing_field` instead. + fn missing_field_seed(&mut self, _seed: V, field: &'static str) -> Result + where V: DeserializeSeed + { + Err(Error::missing_field(field)) + } + + /// Report that the struct has a field that wasn't deserialized. The + /// MapVisitor may consider this an error or it may return a default value + /// for the field. + /// + /// This method exists as a convenience for `Deserialize` implementations. + /// `MapVisitor` implementations should not need to override the default + /// behavior. fn missing_field(&mut self, field: &'static str) -> Result where V: Deserialize, { - Err(Error::missing_field(field)) + self.missing_field_seed(PhantomData, field) } } impl<'a, V_> MapVisitor for &'a mut V_ where V_: MapVisitor { type Error = V_::Error; + #[inline] + fn visit_key_seed(&mut self, seed: K) -> Result, Self::Error> + where K: DeserializeSeed + { + (**self).visit_key_seed(seed) + } + + #[inline] + fn visit_value_seed(&mut self, seed: V) -> Result + where V: DeserializeSeed + { + (**self).visit_value_seed(seed) + } + + #[inline] + fn visit_seed(&mut self, kseed: K, value_seed: V) -> Result, Self::Error> + where K: DeserializeSeed, + V: DeserializeSeed + { + (**self).visit_seed(kseed, value_seed) + } + #[inline] fn visit(&mut self) -> Result, V_::Error> where K: Deserialize, @@ -747,6 +1005,13 @@ impl<'a, V_> MapVisitor for &'a mut V_ where V_: MapVisitor { (**self).size_hint() } + #[inline] + fn missing_field_seed(&mut self, seed: V, field: &'static str) -> Result + where V: DeserializeSeed + { + (**self).missing_field_seed(seed, field) + } + #[inline] fn missing_field(&mut self, field: &'static str) -> Result where V: Deserialize @@ -760,7 +1025,7 @@ impl<'a, V_> MapVisitor for &'a mut V_ where V_: MapVisitor { /// `EnumVisitor` is a visitor that is created by the `Deserializer` and passed /// to the `Deserialize` in order to identify which variant of an enum to /// deserialize. -pub trait EnumVisitor { +pub trait EnumVisitor: Sized { /// The error type that can be returned if some error occurs during deserialization. type Error: Error; /// The `Visitor` that will be used to deserialize the content of the enum @@ -768,14 +1033,29 @@ pub trait EnumVisitor { type Variant: VariantVisitor; /// `visit_variant` is called to identify which variant to deserialize. + /// + /// `Deserialize` implementations should typically use + /// `EnumVisitor::visit_variant` instead. + fn visit_variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> + where V: DeserializeSeed; + + /// `visit_variant` is called to identify which variant to deserialize. + /// + /// This method exists as a convenience for `Deserialize` implementations. + /// `EnumVisitor` implementations should not need to override the default + /// behavior. + #[inline] fn visit_variant(self) -> Result<(V, Self::Variant), Self::Error> - where V: Deserialize; + where V: Deserialize + { + self.visit_variant_seed(PhantomData) + } } /// `VariantVisitor` is a visitor that is created by the `Deserializer` and /// passed to the `Deserialize` to deserialize the content of a particular enum /// variant. -pub trait VariantVisitor { +pub trait VariantVisitor: Sized { /// The error type that can be returned if some error occurs during deserialization. type Error: Error; @@ -784,8 +1064,24 @@ pub trait VariantVisitor { /// `visit_newtype` is called when deserializing a variant with a single value. /// A good default is often to use the `visit_tuple` method to deserialize a `(value,)`. + /// + /// `Deserialize` implementations should typically use + /// `VariantVisitor::visit_newtype` instead. + fn visit_newtype_seed(self, seed: T) -> Result + where T: DeserializeSeed; + + /// `visit_newtype` is called when deserializing a variant with a single value. + /// A good default is often to use the `visit_tuple` method to deserialize a `(value,)`. + /// + /// This method exists as a convenience for `Deserialize` implementations. + /// `VariantVisitor` implementations should not need to override the default + /// behavior. + #[inline] fn visit_newtype(self) -> Result - where T: Deserialize; + where T: Deserialize + { + self.visit_newtype_seed(PhantomData) + } /// `visit_tuple` is called when deserializing a tuple-like variant. /// If no tuple variants are expected, yield a diff --git a/serde/src/de/value.rs b/serde/src/de/value.rs index c6472474..9e2bdf76 100644 --- a/serde/src/de/value.rs +++ b/serde/src/de/value.rs @@ -298,10 +298,10 @@ impl<'a, E> de::EnumVisitor for StrDeserializer<'a, E> type Error = E; type Variant = private::UnitOnly; - fn visit_variant(self) -> Result<(T, Self::Variant), Self::Error> - where T: de::Deserialize, + fn visit_variant_seed(self, seed: T) -> Result<(T::Value, Self::Variant), Self::Error> + where T: de::DeserializeSeed, { - de::Deserialize::deserialize(self).map(private::unit_only) + seed.deserialize(self).map(private::unit_only) } } @@ -357,10 +357,10 @@ impl<'a, E> de::EnumVisitor for StringDeserializer type Error = E; type Variant = private::UnitOnly; - fn visit_variant(self) -> Result<(T, Self::Variant), Self::Error> - where T: de::Deserialize, + fn visit_variant_seed(self, seed: T) -> Result<(T::Value, Self::Variant), Self::Error> + where T: de::DeserializeSeed, { - de::Deserialize::deserialize(self).map(private::unit_only) + seed.deserialize(self).map(private::unit_only) } } @@ -419,10 +419,10 @@ impl<'a, E> de::EnumVisitor for CowStrDeserializer<'a, E> type Error = E; type Variant = private::UnitOnly; - fn visit_variant(self) -> Result<(T, Self::Variant), Self::Error> - where T: de::Deserialize, + fn visit_variant_seed(self, seed: T) -> Result<(T::Value, Self::Variant), Self::Error> + where T: de::DeserializeSeed, { - de::Deserialize::deserialize(self).map(private::unit_only) + seed.deserialize(self).map(private::unit_only) } } @@ -480,13 +480,13 @@ impl de::SeqVisitor for SeqDeserializer { type Error = E; - fn visit(&mut self) -> Result, Self::Error> - where V: de::Deserialize + fn visit_seed(&mut self, seed: V) -> Result, Self::Error> + where V: de::DeserializeSeed { match self.iter.next() { Some(value) => { self.len -= 1; - de::Deserialize::deserialize(value.into_deserializer()).map(Some) + seed.deserialize(value.into_deserializer()).map(Some) } None => Ok(None), } @@ -687,24 +687,24 @@ impl de::MapVisitor for MapDeserializer { type Error = E; - fn visit_key(&mut self) -> Result, Self::Error> - where T: de::Deserialize, + fn visit_key_seed(&mut self, seed: T) -> Result, Self::Error> + where T: de::DeserializeSeed, { match self.next() { Some((key, value)) => { self.value = Some(value); - de::Deserialize::deserialize(key.into_deserializer()).map(Some) + seed.deserialize(key.into_deserializer()).map(Some) } None => Ok(None), } } - fn visit_value(&mut self) -> Result - where T: de::Deserialize, + fn visit_value_seed(&mut self, seed: T) -> Result + where T: de::DeserializeSeed, { match self.value.take() { Some(value) => { - de::Deserialize::deserialize(value.into_deserializer()) + seed.deserialize(value.into_deserializer()) } None => { Err(de::Error::end_of_stream()) @@ -712,14 +712,14 @@ impl de::MapVisitor for MapDeserializer } } - fn visit(&mut self) -> Result, Self::Error> - where TK: de::Deserialize, - TV: de::Deserialize + fn visit_seed(&mut self, kseed: TK, vseed: TV) -> Result, Self::Error> + where TK: de::DeserializeSeed, + TV: de::DeserializeSeed { match self.next() { Some((key, value)) => { - let key = try!(de::Deserialize::deserialize(key.into_deserializer())); - let value = try!(de::Deserialize::deserialize(value.into_deserializer())); + let key = try!(kseed.deserialize(key.into_deserializer())); + let value = try!(vseed.deserialize(value.into_deserializer())); Ok(Some((key, value))) } None => Ok(None) @@ -741,13 +741,13 @@ impl de::SeqVisitor for MapDeserializer { type Error = E; - fn visit(&mut self) -> Result, Self::Error> - where T: de::Deserialize, + fn visit_seed(&mut self, seed: T) -> Result, Self::Error> + where T: de::DeserializeSeed, { match self.next() { Some((k, v)) => { let de = PairDeserializer(k, v, PhantomData); - de::Deserialize::deserialize(de).map(Some) + seed.deserialize(de).map(Some) } None => Ok(None), } @@ -813,13 +813,13 @@ impl de::SeqVisitor for PairVisitor { type Error = E; - fn visit(&mut self) -> Result, Self::Error> - where T: de::Deserialize, + fn visit_seed(&mut self, seed: T) -> Result, Self::Error> + where T: de::DeserializeSeed, { if let Some(k) = self.0.take() { - de::Deserialize::deserialize(k.into_deserializer()).map(Some) + seed.deserialize(k.into_deserializer()).map(Some) } else if let Some(v) = self.1.take() { - de::Deserialize::deserialize(v.into_deserializer()).map(Some) + seed.deserialize(v.into_deserializer()).map(Some) } else { Ok(None) } @@ -995,8 +995,8 @@ mod private { Ok(()) } - fn visit_newtype(self) -> Result - where T: de::Deserialize, + fn visit_newtype_seed(self, _seed: T) -> Result + where T: de::DeserializeSeed, { Err(de::Error::invalid_type(de::Type::NewtypeVariant)) }