Move utf8 encode to where it is used

This commit is contained in:
David Tolnay
2017-04-07 09:50:38 -07:00
parent 726eea9a97
commit 2e9cc6e98a
3 changed files with 6 additions and 8 deletions
+48
View File
@@ -0,0 +1,48 @@
const TAG_CONT: u8 = 0b1000_0000;
const TAG_TWO_B: u8 = 0b1100_0000;
const TAG_THREE_B: u8 = 0b1110_0000;
const TAG_FOUR_B: u8 = 0b1111_0000;
const MAX_ONE_B: u32 = 0x80;
const MAX_TWO_B: u32 = 0x800;
const MAX_THREE_B: u32 = 0x10000;
#[inline]
pub fn encode(c: char) -> Encode {
let code = c as u32;
let mut buf = [0; 4];
let pos = if code < MAX_ONE_B {
buf[3] = code as u8;
3
} else if code < MAX_TWO_B {
buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
2
} else if code < MAX_THREE_B {
buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
1
} else {
buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
buf[3] = (code & 0x3F) as u8 | TAG_CONT;
0
};
Encode {
buf: buf,
pos: pos,
}
}
pub struct Encode {
buf: [u8; 4],
pos: usize,
}
impl Encode {
// FIXME: use this from_utf8_unchecked, since we know it can never fail
pub fn as_str(&self) -> &str {
::core::str::from_utf8(&self.buf[self.pos..]).unwrap()
}
}