Auto merge of #46919 - michaelwoerister:new-leb128, r=sfackler
Speed up leb128 encoding and decoding for unsigned values. Make the implementation for some leb128 functions potentially faster. @Mark-Simulacrum, could you please trigger a perf.rlo run?
This commit is contained in:
commit
816d765716
2 changed files with 143 additions and 80 deletions
|
@ -9,7 +9,7 @@
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn write_to_vec(vec: &mut Vec<u8>, position: usize, byte: u8) {
|
pub fn write_to_vec(vec: &mut Vec<u8>, position: usize, byte: u8) {
|
||||||
if position == vec.len() {
|
if position == vec.len() {
|
||||||
vec.push(byte);
|
vec.push(byte);
|
||||||
} else {
|
} else {
|
||||||
|
@ -17,25 +17,32 @@ fn write_to_vec(vec: &mut Vec<u8>, position: usize, byte: u8) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_pointer_width = "32")]
|
||||||
|
const USIZE_LEB128_SIZE: usize = 5;
|
||||||
|
#[cfg(target_pointer_width = "64")]
|
||||||
|
const USIZE_LEB128_SIZE: usize = 10;
|
||||||
|
|
||||||
|
macro_rules! leb128_size {
|
||||||
|
(u16) => (3);
|
||||||
|
(u32) => (5);
|
||||||
|
(u64) => (10);
|
||||||
|
(u128) => (19);
|
||||||
|
(usize) => (USIZE_LEB128_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! impl_write_unsigned_leb128 {
|
||||||
|
($fn_name:ident, $int_ty:ident) => (
|
||||||
#[inline]
|
#[inline]
|
||||||
/// encodes an integer using unsigned leb128 encoding and stores
|
pub fn $fn_name(out: &mut Vec<u8>, start_position: usize, mut value: $int_ty) -> usize {
|
||||||
/// the result using a callback function.
|
let mut position = start_position;
|
||||||
///
|
for _ in 0 .. leb128_size!($int_ty) {
|
||||||
/// The callback `write` is called once for each position
|
|
||||||
/// that is to be written to with the byte to be encoded
|
|
||||||
/// at that position.
|
|
||||||
pub fn write_unsigned_leb128_to<W>(mut value: u128, mut write: W) -> usize
|
|
||||||
where W: FnMut(usize, u8)
|
|
||||||
{
|
|
||||||
let mut position = 0;
|
|
||||||
loop {
|
|
||||||
let mut byte = (value & 0x7F) as u8;
|
let mut byte = (value & 0x7F) as u8;
|
||||||
value >>= 7;
|
value >>= 7;
|
||||||
if value != 0 {
|
if value != 0 {
|
||||||
byte |= 0x80;
|
byte |= 0x80;
|
||||||
}
|
}
|
||||||
|
|
||||||
write(position, byte);
|
write_to_vec(out, position, byte);
|
||||||
position += 1;
|
position += 1;
|
||||||
|
|
||||||
if value == 0 {
|
if value == 0 {
|
||||||
|
@ -43,30 +50,53 @@ pub fn write_unsigned_leb128_to<W>(mut value: u128, mut write: W) -> usize
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
position
|
position - start_position
|
||||||
|
}
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write_unsigned_leb128(out: &mut Vec<u8>, start_position: usize, value: u128) -> usize {
|
impl_write_unsigned_leb128!(write_u16_leb128, u16);
|
||||||
write_unsigned_leb128_to(value, |i, v| write_to_vec(out, start_position+i, v))
|
impl_write_unsigned_leb128!(write_u32_leb128, u32);
|
||||||
}
|
impl_write_unsigned_leb128!(write_u64_leb128, u64);
|
||||||
|
impl_write_unsigned_leb128!(write_u128_leb128, u128);
|
||||||
|
impl_write_unsigned_leb128!(write_usize_leb128, usize);
|
||||||
|
|
||||||
|
|
||||||
|
macro_rules! impl_read_unsigned_leb128 {
|
||||||
|
($fn_name:ident, $int_ty:ident) => (
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn read_unsigned_leb128(data: &[u8], start_position: usize) -> (u128, usize) {
|
pub fn $fn_name(slice: &[u8]) -> ($int_ty, usize) {
|
||||||
let mut result = 0;
|
let mut result: $int_ty = 0;
|
||||||
let mut shift = 0;
|
let mut shift = 0;
|
||||||
let mut position = start_position;
|
let mut position = 0;
|
||||||
loop {
|
|
||||||
let byte = data[position];
|
for _ in 0 .. leb128_size!($int_ty) {
|
||||||
|
let byte = unsafe {
|
||||||
|
*slice.get_unchecked(position)
|
||||||
|
};
|
||||||
position += 1;
|
position += 1;
|
||||||
result |= ((byte & 0x7F) as u128) << shift;
|
result |= ((byte & 0x7F) as $int_ty) << shift;
|
||||||
if (byte & 0x80) == 0 {
|
if (byte & 0x80) == 0 {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
shift += 7;
|
shift += 7;
|
||||||
}
|
}
|
||||||
|
|
||||||
(result, position - start_position)
|
// Do a single bounds check at the end instead of for every byte.
|
||||||
|
assert!(position <= slice.len());
|
||||||
|
|
||||||
|
(result, position)
|
||||||
}
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_read_unsigned_leb128!(read_u16_leb128, u16);
|
||||||
|
impl_read_unsigned_leb128!(read_u32_leb128, u32);
|
||||||
|
impl_read_unsigned_leb128!(read_u64_leb128, u64);
|
||||||
|
impl_read_unsigned_leb128!(read_u128_leb128, u128);
|
||||||
|
impl_read_unsigned_leb128!(read_usize_leb128, usize);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
/// encodes an integer using signed leb128 encoding and stores
|
/// encodes an integer using signed leb128 encoding and stores
|
||||||
|
@ -130,25 +160,35 @@ pub fn read_signed_leb128(data: &[u8], start_position: usize) -> (i128, usize) {
|
||||||
(result, position - start_position)
|
(result, position - start_position)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
macro_rules! impl_test_unsigned_leb128 {
|
||||||
|
($test_name:ident, $write_fn_name:ident, $read_fn_name:ident, $int_ty:ident) => (
|
||||||
#[test]
|
#[test]
|
||||||
fn test_unsigned_leb128() {
|
fn $test_name() {
|
||||||
let mut stream = Vec::with_capacity(10000);
|
let mut stream = Vec::new();
|
||||||
|
|
||||||
for x in 0..62 {
|
for x in 0..62 {
|
||||||
let pos = stream.len();
|
let pos = stream.len();
|
||||||
let bytes_written = write_unsigned_leb128(&mut stream, pos, 3 << x);
|
let bytes_written = $write_fn_name(&mut stream, pos, (3u64 << x) as $int_ty);
|
||||||
assert_eq!(stream.len(), pos + bytes_written);
|
assert_eq!(stream.len(), pos + bytes_written);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut position = 0;
|
let mut position = 0;
|
||||||
for x in 0..62 {
|
for x in 0..62 {
|
||||||
let expected = 3 << x;
|
let expected = (3u64 << x) as $int_ty;
|
||||||
let (actual, bytes_read) = read_unsigned_leb128(&stream, position);
|
let (actual, bytes_read) = $read_fn_name(&stream[position ..]);
|
||||||
assert_eq!(expected, actual);
|
assert_eq!(expected, actual);
|
||||||
position += bytes_read;
|
position += bytes_read;
|
||||||
}
|
}
|
||||||
assert_eq!(stream.len(), position);
|
assert_eq!(stream.len(), position);
|
||||||
}
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_test_unsigned_leb128!(test_u16_leb128, write_u16_leb128, read_u16_leb128, u16);
|
||||||
|
impl_test_unsigned_leb128!(test_u32_leb128, write_u32_leb128, read_u32_leb128, u32);
|
||||||
|
impl_test_unsigned_leb128!(test_u64_leb128, write_u64_leb128, read_u64_leb128, u64);
|
||||||
|
impl_test_unsigned_leb128!(test_u128_leb128, write_u128_leb128, read_u128_leb128, u128);
|
||||||
|
impl_test_unsigned_leb128!(test_usize_leb128, write_usize_leb128, read_usize_leb128, usize);
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_signed_leb128() {
|
fn test_signed_leb128() {
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
// option. This file may not be copied, modified, or distributed
|
// option. This file may not be copied, modified, or distributed
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
use leb128::{read_signed_leb128, read_unsigned_leb128, write_signed_leb128, write_unsigned_leb128};
|
use leb128::{self, read_signed_leb128, write_signed_leb128};
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
use serialize;
|
use serialize;
|
||||||
|
@ -35,9 +35,9 @@ impl<'a> Encoder<'a> {
|
||||||
|
|
||||||
|
|
||||||
macro_rules! write_uleb128 {
|
macro_rules! write_uleb128 {
|
||||||
($enc:expr, $value:expr) => {{
|
($enc:expr, $value:expr, $fun:ident) => {{
|
||||||
let pos = $enc.cursor.position() as usize;
|
let pos = $enc.cursor.position() as usize;
|
||||||
let bytes_written = write_unsigned_leb128($enc.cursor.get_mut(), pos, $value as u128);
|
let bytes_written = leb128::$fun($enc.cursor.get_mut(), pos, $value);
|
||||||
$enc.cursor.set_position((pos + bytes_written) as u64);
|
$enc.cursor.set_position((pos + bytes_written) as u64);
|
||||||
Ok(())
|
Ok(())
|
||||||
}}
|
}}
|
||||||
|
@ -55,61 +55,76 @@ macro_rules! write_sleb128 {
|
||||||
impl<'a> serialize::Encoder for Encoder<'a> {
|
impl<'a> serialize::Encoder for Encoder<'a> {
|
||||||
type Error = io::Error;
|
type Error = io::Error;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_nil(&mut self) -> EncodeResult {
|
fn emit_nil(&mut self) -> EncodeResult {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_usize(&mut self, v: usize) -> EncodeResult {
|
fn emit_usize(&mut self, v: usize) -> EncodeResult {
|
||||||
write_uleb128!(self, v)
|
write_uleb128!(self, v, write_usize_leb128)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_u128(&mut self, v: u128) -> EncodeResult {
|
fn emit_u128(&mut self, v: u128) -> EncodeResult {
|
||||||
write_uleb128!(self, v)
|
write_uleb128!(self, v, write_u128_leb128)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_u64(&mut self, v: u64) -> EncodeResult {
|
fn emit_u64(&mut self, v: u64) -> EncodeResult {
|
||||||
write_uleb128!(self, v)
|
write_uleb128!(self, v, write_u64_leb128)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_u32(&mut self, v: u32) -> EncodeResult {
|
fn emit_u32(&mut self, v: u32) -> EncodeResult {
|
||||||
write_uleb128!(self, v)
|
write_uleb128!(self, v, write_u32_leb128)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_u16(&mut self, v: u16) -> EncodeResult {
|
fn emit_u16(&mut self, v: u16) -> EncodeResult {
|
||||||
write_uleb128!(self, v)
|
write_uleb128!(self, v, write_u16_leb128)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_u8(&mut self, v: u8) -> EncodeResult {
|
fn emit_u8(&mut self, v: u8) -> EncodeResult {
|
||||||
let _ = self.cursor.write_all(&[v]);
|
let pos = self.cursor.position() as usize;
|
||||||
|
leb128::write_to_vec(self.cursor.get_mut(), pos, v);
|
||||||
|
self.cursor.set_position((pos + 1) as u64);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_isize(&mut self, v: isize) -> EncodeResult {
|
fn emit_isize(&mut self, v: isize) -> EncodeResult {
|
||||||
write_sleb128!(self, v)
|
write_sleb128!(self, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_i128(&mut self, v: i128) -> EncodeResult {
|
fn emit_i128(&mut self, v: i128) -> EncodeResult {
|
||||||
write_sleb128!(self, v)
|
write_sleb128!(self, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_i64(&mut self, v: i64) -> EncodeResult {
|
fn emit_i64(&mut self, v: i64) -> EncodeResult {
|
||||||
write_sleb128!(self, v)
|
write_sleb128!(self, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_i32(&mut self, v: i32) -> EncodeResult {
|
fn emit_i32(&mut self, v: i32) -> EncodeResult {
|
||||||
write_sleb128!(self, v)
|
write_sleb128!(self, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_i16(&mut self, v: i16) -> EncodeResult {
|
fn emit_i16(&mut self, v: i16) -> EncodeResult {
|
||||||
write_sleb128!(self, v)
|
write_sleb128!(self, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_i8(&mut self, v: i8) -> EncodeResult {
|
fn emit_i8(&mut self, v: i8) -> EncodeResult {
|
||||||
let as_u8: u8 = unsafe { ::std::mem::transmute(v) };
|
let as_u8: u8 = unsafe { ::std::mem::transmute(v) };
|
||||||
let _ = self.cursor.write_all(&[as_u8]);
|
self.emit_u8(as_u8)
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_bool(&mut self, v: bool) -> EncodeResult {
|
fn emit_bool(&mut self, v: bool) -> EncodeResult {
|
||||||
self.emit_u8(if v {
|
self.emit_u8(if v {
|
||||||
1
|
1
|
||||||
|
@ -118,20 +133,24 @@ impl<'a> serialize::Encoder for Encoder<'a> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_f64(&mut self, v: f64) -> EncodeResult {
|
fn emit_f64(&mut self, v: f64) -> EncodeResult {
|
||||||
let as_u64: u64 = unsafe { ::std::mem::transmute(v) };
|
let as_u64: u64 = unsafe { ::std::mem::transmute(v) };
|
||||||
self.emit_u64(as_u64)
|
self.emit_u64(as_u64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_f32(&mut self, v: f32) -> EncodeResult {
|
fn emit_f32(&mut self, v: f32) -> EncodeResult {
|
||||||
let as_u32: u32 = unsafe { ::std::mem::transmute(v) };
|
let as_u32: u32 = unsafe { ::std::mem::transmute(v) };
|
||||||
self.emit_u32(as_u32)
|
self.emit_u32(as_u32)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_char(&mut self, v: char) -> EncodeResult {
|
fn emit_char(&mut self, v: char) -> EncodeResult {
|
||||||
self.emit_u32(v as u32)
|
self.emit_u32(v as u32)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn emit_str(&mut self, v: &str) -> EncodeResult {
|
fn emit_str(&mut self, v: &str) -> EncodeResult {
|
||||||
self.emit_usize(v.len())?;
|
self.emit_usize(v.len())?;
|
||||||
let _ = self.cursor.write_all(v.as_bytes());
|
let _ = self.cursor.write_all(v.as_bytes());
|
||||||
|
@ -140,6 +159,7 @@ impl<'a> serialize::Encoder for Encoder<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Encoder<'a> {
|
impl<'a> Encoder<'a> {
|
||||||
|
#[inline]
|
||||||
pub fn position(&self) -> usize {
|
pub fn position(&self) -> usize {
|
||||||
self.cursor.position() as usize
|
self.cursor.position() as usize
|
||||||
}
|
}
|
||||||
|
@ -162,14 +182,17 @@ impl<'a> Decoder<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn position(&self) -> usize {
|
pub fn position(&self) -> usize {
|
||||||
self.position
|
self.position
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn set_position(&mut self, pos: usize) {
|
pub fn set_position(&mut self, pos: usize) {
|
||||||
self.position = pos
|
self.position = pos
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn advance(&mut self, bytes: usize) {
|
pub fn advance(&mut self, bytes: usize) {
|
||||||
self.position += bytes;
|
self.position += bytes;
|
||||||
}
|
}
|
||||||
|
@ -187,10 +210,10 @@ impl<'a> Decoder<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! read_uleb128 {
|
macro_rules! read_uleb128 {
|
||||||
($dec:expr, $t:ty) => ({
|
($dec:expr, $t:ty, $fun:ident) => ({
|
||||||
let (value, bytes_read) = read_unsigned_leb128($dec.data, $dec.position);
|
let (value, bytes_read) = leb128::$fun(&$dec.data[$dec.position ..]);
|
||||||
$dec.position += bytes_read;
|
$dec.position += bytes_read;
|
||||||
Ok(value as $t)
|
Ok(value)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -213,22 +236,22 @@ impl<'a> serialize::Decoder for Decoder<'a> {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn read_u128(&mut self) -> Result<u128, Self::Error> {
|
fn read_u128(&mut self) -> Result<u128, Self::Error> {
|
||||||
read_uleb128!(self, u128)
|
read_uleb128!(self, u128, read_u128_leb128)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn read_u64(&mut self) -> Result<u64, Self::Error> {
|
fn read_u64(&mut self) -> Result<u64, Self::Error> {
|
||||||
read_uleb128!(self, u64)
|
read_uleb128!(self, u64, read_u64_leb128)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn read_u32(&mut self) -> Result<u32, Self::Error> {
|
fn read_u32(&mut self) -> Result<u32, Self::Error> {
|
||||||
read_uleb128!(self, u32)
|
read_uleb128!(self, u32, read_u32_leb128)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn read_u16(&mut self) -> Result<u16, Self::Error> {
|
fn read_u16(&mut self) -> Result<u16, Self::Error> {
|
||||||
read_uleb128!(self, u16)
|
read_uleb128!(self, u16, read_u16_leb128)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -240,7 +263,7 @@ impl<'a> serialize::Decoder for Decoder<'a> {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn read_usize(&mut self) -> Result<usize, Self::Error> {
|
fn read_usize(&mut self) -> Result<usize, Self::Error> {
|
||||||
read_uleb128!(self, usize)
|
read_uleb128!(self, usize, read_usize_leb128)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
|
Loading…
Reference in a new issue