auto merge of #8174 : DaGenix/rust/digest-improvements, r=brson

Same content as #8097, but bors had an issue with that pull request. Opening a new one.
This commit is contained in:
bors 2013-08-02 19:29:00 -07:00
commit efd6eafeb4
6 changed files with 952 additions and 890 deletions

View file

@ -0,0 +1,327 @@
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::num::One;
use std::vec::bytes::{MutableByteVector, copy_memory};
/// Write a u64 into a vector, which must be 8 bytes long. The value is written in big-endian
/// format.
pub fn write_u64_be(dst: &mut[u8], input: u64) {
use std::cast::transmute;
use std::unstable::intrinsics::to_be64;
assert!(dst.len() == 8);
unsafe {
let x: *mut i64 = transmute(dst.unsafe_mut_ref(0));
*x = to_be64(input as i64);
}
}
/// Write a u32 into a vector, which must be 4 bytes long. The value is written in big-endian
/// format.
pub fn write_u32_be(dst: &mut[u8], input: u32) {
use std::cast::transmute;
use std::unstable::intrinsics::to_be32;
assert!(dst.len() == 4);
unsafe {
let x: *mut i32 = transmute(dst.unsafe_mut_ref(0));
*x = to_be32(input as i32);
}
}
/// Read a vector of bytes into a vector of u64s. The values are read in big-endian format.
pub fn read_u64v_be(dst: &mut[u64], input: &[u8]) {
use std::cast::transmute;
use std::unstable::intrinsics::to_be64;
assert!(dst.len() * 8 == input.len());
unsafe {
let mut x: *mut i64 = transmute(dst.unsafe_mut_ref(0));
let mut y: *i64 = transmute(input.unsafe_ref(0));
do dst.len().times() {
*x = to_be64(*y);
x = x.offset(1);
y = y.offset(1);
}
}
}
/// Read a vector of bytes into a vector of u32s. The values are read in big-endian format.
pub fn read_u32v_be(dst: &mut[u32], input: &[u8]) {
use std::cast::transmute;
use std::unstable::intrinsics::to_be32;
assert!(dst.len() * 4 == input.len());
unsafe {
let mut x: *mut i32 = transmute(dst.unsafe_mut_ref(0));
let mut y: *i32 = transmute(input.unsafe_ref(0));
do dst.len().times() {
*x = to_be32(*y);
x = x.offset(1);
y = y.offset(1);
}
}
}
/// Returns true if adding the two parameters will result in integer overflow
pub fn will_add_overflow<T: Int + Unsigned>(x: T, y: T) -> bool {
// This doesn't handle negative values! Don't copy this code elsewhere without considering if
// negative values are important to you!
let max: T = Bounded::max_value();
return x > max - y;
}
/// Shifts the second parameter and then adds it to the first. fails!() if there would be unsigned
/// integer overflow.
pub fn shift_add_check_overflow<T: Int + Unsigned + Clone>(x: T, mut y: T, shift: T) -> T {
if y.leading_zeros() < shift {
fail!("Could not add values - integer overflow.");
}
y = y << shift;
if will_add_overflow(x.clone(), y.clone()) {
fail!("Could not add values - integer overflow.");
}
return x + y;
}
/// Shifts the second parameter and then adds it to the first, which is a tuple where the first
/// element is the high order value. fails!() if there would be unsigned integer overflow.
pub fn shift_add_check_overflow_tuple
<T: Int + Unsigned + Clone>
(x: (T, T), mut y: T, shift: T) -> (T, T) {
if y.leading_zeros() < shift {
fail!("Could not add values - integer overflow.");
}
y = y << shift;
match x {
(hi, low) => {
let one: T = One::one();
if will_add_overflow(low.clone(), y.clone()) {
if will_add_overflow(hi.clone(), one.clone()) {
fail!("Could not add values - integer overflow.");
} else {
return (hi + one, low + y);
}
} else {
return (hi, low + y);
}
}
}
}
/// A FixedBuffer, likes its name implies, is a fixed size buffer. When the buffer becomes full, it
/// must be processed. The input() method takes care of processing and then clearing the buffer
/// automatically. However, other methods do not and require the caller to process the buffer. Any
/// method that modifies the buffer directory or provides the caller with bytes that can be modifies
/// results in those bytes being marked as used by the buffer.
pub trait FixedBuffer {
/// Input a vector of bytes. If the buffer becomes full, proccess it with the provided
/// function and then clear the buffer.
fn input(&mut self, input: &[u8], func: &fn(&[u8]));
/// Reset the buffer.
fn reset(&mut self);
/// Zero the buffer up until the specified index. The buffer position currently must not be
/// greater than that index.
fn zero_until(&mut self, idx: uint);
/// Get a slice of the buffer of the specified size. There must be at least that many bytes
/// remaining in the buffer.
fn next<'s>(&'s mut self, len: uint) -> &'s mut [u8];
/// Get the current buffer. The buffer must already be full. This clears the buffer as well.
fn full_buffer<'s>(&'s mut self) -> &'s [u8];
/// Get the current position of the buffer.
fn position(&self) -> uint;
/// Get the number of bytes remaining in the buffer until it is full.
fn remaining(&self) -> uint;
/// Get the size of the buffer
fn size(&self) -> uint;
}
macro_rules! impl_fixed_buffer( ($name:ident, $size:expr) => (
impl FixedBuffer for $name {
fn input(&mut self, input: &[u8], func: &fn(&[u8])) {
let mut i = 0;
// FIXME: #6304 - This local variable shouldn't be necessary.
let size = $size;
// If there is already data in the buffer, copy as much as we can into it and process
// the data if the buffer becomes full.
if self.buffer_idx != 0 {
let buffer_remaining = size - self.buffer_idx;
if input.len() >= buffer_remaining {
copy_memory(
self.buffer.mut_slice(self.buffer_idx, size),
input.slice_to(buffer_remaining),
buffer_remaining);
self.buffer_idx = 0;
func(self.buffer);
i += buffer_remaining;
} else {
copy_memory(
self.buffer.mut_slice(self.buffer_idx, self.buffer_idx + input.len()),
input,
input.len());
self.buffer_idx += input.len();
return;
}
}
// While we have at least a full buffer size chunks's worth of data, process that data
// without copying it into the buffer
while input.len() - i >= size {
func(input.slice(i, i + size));
i += size;
}
// Copy any input data into the buffer. At this point in the method, the ammount of
// data left in the input vector will be less than the buffer size and the buffer will
// be empty.
let input_remaining = input.len() - i;
copy_memory(
self.buffer.mut_slice(0, input_remaining),
input.slice_from(i),
input.len() - i);
self.buffer_idx += input_remaining;
}
fn reset(&mut self) {
self.buffer_idx = 0;
}
fn zero_until(&mut self, idx: uint) {
assert!(idx >= self.buffer_idx);
self.buffer.mut_slice(self.buffer_idx, idx).set_memory(0);
self.buffer_idx = idx;
}
fn next<'s>(&'s mut self, len: uint) -> &'s mut [u8] {
self.buffer_idx += len;
return self.buffer.mut_slice(self.buffer_idx - len, self.buffer_idx);
}
fn full_buffer<'s>(&'s mut self) -> &'s [u8] {
assert!(self.buffer_idx == $size);
self.buffer_idx = 0;
return self.buffer.slice_to($size);
}
fn position(&self) -> uint { self.buffer_idx }
fn remaining(&self) -> uint { $size - self.buffer_idx }
fn size(&self) -> uint { $size }
}
))
/// A fixed size buffer of 64 bytes useful for cryptographic operations.
pub struct FixedBuffer64 {
priv buffer: [u8, ..64],
priv buffer_idx: uint,
}
impl FixedBuffer64 {
/// Create a new buffer
pub fn new() -> FixedBuffer64 {
return FixedBuffer64 {
buffer: [0u8, ..64],
buffer_idx: 0
};
}
}
impl_fixed_buffer!(FixedBuffer64, 64)
/// A fixed size buffer of 128 bytes useful for cryptographic operations.
pub struct FixedBuffer128 {
priv buffer: [u8, ..128],
priv buffer_idx: uint,
}
impl FixedBuffer128 {
/// Create a new buffer
pub fn new() -> FixedBuffer128 {
return FixedBuffer128 {
buffer: [0u8, ..128],
buffer_idx: 0
};
}
}
impl_fixed_buffer!(FixedBuffer128, 128)
/// The StandardPadding trait adds a method useful for various hash algorithms to a FixedBuffer
/// struct.
pub trait StandardPadding {
/// Add standard padding to the buffer. The buffer must not be full when this method is called
/// and is guaranteed to have exactly rem remaining bytes when it returns. If there are not at
/// least rem bytes available, the buffer will be zero padded, processed, cleared, and then
/// filled with zeros again until only rem bytes are remaining.
fn standard_padding(&mut self, rem: uint, func: &fn(&[u8]));
}
impl <T: FixedBuffer> StandardPadding for T {
fn standard_padding(&mut self, rem: uint, func: &fn(&[u8])) {
let size = self.size();
self.next(1)[0] = 128;
if self.remaining() < rem {
self.zero_until(size);
func(self.full_buffer());
}
self.zero_until(size - rem);
}
}
#[cfg(test)]
mod test {
use std::rand::IsaacRng;
use std::rand::RngUtil;
use std::vec;
use digest::Digest;
/// Feed 1,000,000 'a's into the digest with varying input sizes and check that the result is
/// correct.
pub fn test_digest_1million_random<D: Digest>(digest: &mut D, blocksize: uint, expected: &str) {
let total_size = 1000000;
let buffer = vec::from_elem(blocksize * 2, 'a' as u8);
let mut rng = IsaacRng::new_unseeded();
let mut count = 0;
digest.reset();
while count < total_size {
let next: uint = rng.gen_uint_range(0, 2 * blocksize + 1);
let remaining = total_size - count;
let size = if next > remaining { remaining } else { next };
digest.input(buffer.slice_to(size));
count += size;
}
let result_str = digest.result_str();
assert!(expected == result_str);
}
}

View file

@ -8,10 +8,10 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::uint;
use std::vec;
/**
* The Digest trait specifies an interface common to digest functions, such as SHA-1 and the SHA-2
* family of digest functions.
@ -28,6 +28,10 @@ pub trait Digest {
/**
* Retrieve the digest result. This method may be called multiple times.
*
* # Arguments
*
* * out - the vector to hold the result. Must be large enough to contain output_bits().
*/
fn result(&mut self, out: &mut [u8]);
@ -41,6 +45,27 @@ pub trait Digest {
* Get the output size in bits.
*/
fn output_bits(&self) -> uint;
/**
* Convenience functon that feeds a string into a digest
*
* # Arguments
*
* * in The string to feed into the digest
*/
fn input_str(&mut self, input: &str) {
self.input(input.as_bytes());
}
/**
* Convenience functon that retrieves the result of a digest as a
* ~str in hexadecimal format.
*/
fn result_str(&mut self) -> ~str {
let mut buf = vec::from_elem((self.output_bits()+7)/8, 0u8);
self.result(buf);
return to_hex(buf);
}
}
fn to_hex(rr: &[u8]) -> ~str {
@ -54,34 +79,3 @@ fn to_hex(rr: &[u8]) -> ~str {
}
return s;
}
/// Contains utility methods for Digests.
/// FIXME: #7339: Convert to default methods when issues with them are resolved.
pub trait DigestUtil {
/**
* Convenience functon that feeds a string into a digest
*
* # Arguments
*
* * in The string to feed into the digest
*/
fn input_str(&mut self, input: &str);
/**
* Convenience functon that retrieves the result of a digest as a
* ~str in hexadecimal format.
*/
fn result_str(&mut self) -> ~str;
}
impl<D: Digest> DigestUtil for D {
fn input_str(&mut self, input: &str) {
self.input(input.as_bytes());
}
fn result_str(&mut self) -> ~str {
let mut buf = vec::from_elem((self.output_bits()+7)/8, 0u8);
self.result(buf);
return to_hex(buf);
}
}

View file

@ -23,6 +23,8 @@
*/
use cryptoutil::{write_u32_be, read_u32v_be, shift_add_check_overflow, FixedBuffer, FixedBuffer64,
StandardPadding};
use digest::Digest;
/*
@ -33,7 +35,6 @@ use digest::Digest;
// Some unexported constants
static DIGEST_BUF_LEN: uint = 5u;
static MSG_BLOCK_LEN: uint = 64u;
static WORK_BUF_LEN: uint = 80u;
static K0: u32 = 0x5A827999u32;
static K1: u32 = 0x6ED9EBA1u32;
@ -43,58 +44,38 @@ static K3: u32 = 0xCA62C1D6u32;
/// Structure representing the state of a Sha1 computation
pub struct Sha1 {
priv h: [u32, ..DIGEST_BUF_LEN],
priv len_low: u32,
priv len_high: u32,
priv msg_block: [u8, ..MSG_BLOCK_LEN],
priv msg_block_idx: uint,
priv length_bits: u64,
priv buffer: FixedBuffer64,
priv computed: bool,
priv work_buf: [u32, ..WORK_BUF_LEN]
}
fn add_input(st: &mut Sha1, msg: &[u8]) {
assert!((!st.computed));
foreach element in msg.iter() {
st.msg_block[st.msg_block_idx] = *element;
st.msg_block_idx += 1;
st.len_low += 8;
if st.len_low == 0 {
st.len_high += 1;
if st.len_high == 0 {
// FIXME: Need better failure mode (#2346)
fail!();
}
}
if st.msg_block_idx == MSG_BLOCK_LEN { process_msg_block(st); }
}
// Assumes that msg.len() can be converted to u64 without overflow
st.length_bits = shift_add_check_overflow(st.length_bits, msg.len() as u64, 3);
st.buffer.input(msg, |d: &[u8]| { process_msg_block(d, &mut st.h); });
}
fn process_msg_block(st: &mut Sha1) {
fn process_msg_block(data: &[u8], h: &mut [u32, ..DIGEST_BUF_LEN]) {
let mut t: int; // Loop counter
let mut w = st.work_buf;
let mut w = [0u32, ..WORK_BUF_LEN];
// Initialize the first 16 words of the vector w
t = 0;
while t < 16 {
let mut tmp;
tmp = (st.msg_block[t * 4] as u32) << 24u32;
tmp = tmp | (st.msg_block[t * 4 + 1] as u32) << 16u32;
tmp = tmp | (st.msg_block[t * 4 + 2] as u32) << 8u32;
tmp = tmp | (st.msg_block[t * 4 + 3] as u32);
w[t] = tmp;
t += 1;
}
read_u32v_be(w.mut_slice(0, 16), data);
// Initialize the rest of vector w
t = 16;
while t < 80 {
let val = w[t - 3] ^ w[t - 8] ^ w[t - 14] ^ w[t - 16];
w[t] = circular_shift(1, val);
t += 1;
}
let mut a = st.h[0];
let mut b = st.h[1];
let mut c = st.h[2];
let mut d = st.h[3];
let mut e = st.h[4];
let mut a = h[0];
let mut b = h[1];
let mut c = h[2];
let mut d = h[3];
let mut e = h[4];
let mut temp: u32;
t = 0;
while t < 20 {
@ -135,12 +116,11 @@ fn process_msg_block(st: &mut Sha1) {
a = temp;
t += 1;
}
st.h[0] = st.h[0] + a;
st.h[1] = st.h[1] + b;
st.h[2] = st.h[2] + c;
st.h[3] = st.h[3] + d;
st.h[4] = st.h[4] + e;
st.msg_block_idx = 0;
h[0] += a;
h[1] += b;
h[2] += c;
h[3] += d;
h[4] += e;
}
fn circular_shift(bits: u32, word: u32) -> u32 {
@ -148,60 +128,20 @@ fn circular_shift(bits: u32, word: u32) -> u32 {
}
fn mk_result(st: &mut Sha1, rs: &mut [u8]) {
if !st.computed { pad_msg(st); st.computed = true; }
let mut i = 0;
foreach ptr_hpart in st.h.mut_iter() {
let hpart = *ptr_hpart;
rs[i] = (hpart >> 24u32 & 0xFFu32) as u8;
rs[i+1] = (hpart >> 16u32 & 0xFFu32) as u8;
rs[i+2] = (hpart >> 8u32 & 0xFFu32) as u8;
rs[i+3] = (hpart & 0xFFu32) as u8;
i += 4;
}
}
if !st.computed {
st.buffer.standard_padding(8, |d: &[u8]| { process_msg_block(d, &mut st.h) });
write_u32_be(st.buffer.next(4), (st.length_bits >> 32) as u32 );
write_u32_be(st.buffer.next(4), st.length_bits as u32);
process_msg_block(st.buffer.full_buffer(), &mut st.h);
/*
* According to the standard, the message must be padded to an even
* 512 bits. The first padding bit must be a '1'. The last 64 bits
* represent the length of the original message. All bits in between
* should be 0. This function will pad the message according to those
* rules by filling the msg_block vector accordingly. It will also
* call process_msg_block() appropriately. When it returns, it
* can be assumed that the message digest has been computed.
*/
fn pad_msg(st: &mut Sha1) {
/*
* Check to see if the current message block is too small to hold
* the initial padding bits and length. If so, we will pad the
* block, process it, and then continue padding into a second block.
*/
if st.msg_block_idx > 55 {
st.msg_block[st.msg_block_idx] = 0x80;
st.msg_block_idx += 1;
while st.msg_block_idx < MSG_BLOCK_LEN {
st.msg_block[st.msg_block_idx] = 0;
st.msg_block_idx += 1;
}
process_msg_block(st);
} else {
st.msg_block[st.msg_block_idx] = 0x80;
st.msg_block_idx += 1;
}
while st.msg_block_idx < 56 {
st.msg_block[st.msg_block_idx] = 0u8;
st.msg_block_idx += 1;
st.computed = true;
}
// Store the message length as the last 8 octets
st.msg_block[56] = (st.len_high >> 24u32 & 0xFFu32) as u8;
st.msg_block[57] = (st.len_high >> 16u32 & 0xFFu32) as u8;
st.msg_block[58] = (st.len_high >> 8u32 & 0xFFu32) as u8;
st.msg_block[59] = (st.len_high & 0xFFu32) as u8;
st.msg_block[60] = (st.len_low >> 24u32 & 0xFFu32) as u8;
st.msg_block[61] = (st.len_low >> 16u32 & 0xFFu32) as u8;
st.msg_block[62] = (st.len_low >> 8u32 & 0xFFu32) as u8;
st.msg_block[63] = (st.len_low & 0xFFu32) as u8;
process_msg_block(st);
write_u32_be(rs.mut_slice(0, 4), st.h[0]);
write_u32_be(rs.mut_slice(4, 8), st.h[1]);
write_u32_be(rs.mut_slice(8, 12), st.h[2]);
write_u32_be(rs.mut_slice(12, 16), st.h[3]);
write_u32_be(rs.mut_slice(16, 20), st.h[4]);
}
impl Sha1 {
@ -209,12 +149,9 @@ impl Sha1 {
pub fn new() -> Sha1 {
let mut st = Sha1 {
h: [0u32, ..DIGEST_BUF_LEN],
len_low: 0u32,
len_high: 0u32,
msg_block: [0u8, ..MSG_BLOCK_LEN],
msg_block_idx: 0,
length_bits: 0u64,
buffer: FixedBuffer64::new(),
computed: false,
work_buf: [0u32, ..WORK_BUF_LEN]
};
st.reset();
return st;
@ -223,14 +160,13 @@ impl Sha1 {
impl Digest for Sha1 {
pub fn reset(&mut self) {
self.len_low = 0;
self.len_high = 0;
self.msg_block_idx = 0;
self.length_bits = 0;
self.h[0] = 0x67452301u32;
self.h[1] = 0xEFCDAB89u32;
self.h[2] = 0x98BADCFEu32;
self.h[3] = 0x10325476u32;
self.h[4] = 0xC3D2E1F0u32;
self.buffer.reset();
self.computed = false;
}
pub fn input(&mut self, msg: &[u8]) { add_input(self, msg); }
@ -240,8 +176,8 @@ impl Digest for Sha1 {
#[cfg(test)]
mod tests {
use digest::{Digest, DigestUtil};
use cryptoutil::test::test_digest_1million_random;
use digest::Digest;
use sha1::Sha1;
#[deriving(Clone)]
@ -253,15 +189,6 @@ mod tests {
#[test]
fn test() {
fn a_million_letter_a() -> ~str {
let mut i = 0;
let mut rs = ~"";
while i < 100000 {
rs.push_str("aaaaaaaaaa");
i += 1;
}
return rs;
}
// Test messages from FIPS 180-1
let fips_180_1_tests = ~[
@ -289,17 +216,6 @@ mod tests {
],
output_str: ~"84983e441c3bd26ebaae4aa1f95129e5e54670f1"
},
Test {
input: a_million_letter_a(),
output: ~[
0x34u8, 0xAAu8, 0x97u8, 0x3Cu8,
0xD4u8, 0xC4u8, 0xDAu8, 0xA4u8,
0xF6u8, 0x1Eu8, 0xEBu8, 0x2Bu8,
0xDBu8, 0xADu8, 0x27u8, 0x31u8,
0x65u8, 0x34u8, 0x01u8, 0x6Fu8,
],
output_str: ~"34aa973cd4c4daa4f61eeb2bdbad27316534016f"
},
];
// Examples from wikipedia
@ -366,6 +282,15 @@ mod tests {
sh.reset();
}
}
#[test]
fn test_1million_random_sha1() {
let mut sh = Sha1::new();
test_digest_1million_random(
&mut sh,
64,
"34aa973cd4c4daa4f61eeb2bdbad27316534016f");
}
}
#[cfg(test)]

File diff suppressed because it is too large Load diff

View file

@ -67,6 +67,8 @@ pub mod dlist;
pub mod treemap;
// Crypto
#[path="crypto/cryptoutil.rs"]
mod cryptoutil;
#[path="crypto/digest.rs"]
pub mod digest;
#[path="crypto/sha1.rs"]

View file

@ -10,8 +10,7 @@
#[allow(missing_doc)];
use digest::DigestUtil;
use digest::Digest;
use json;
use sha1::Sha1;
use serialize::{Encoder, Encodable, Decoder, Decodable};