Improve sort tests and benchmarks

This commit is contained in:
Stjepan Glavina 2017-06-24 16:51:16 +02:00
parent 7e76505e01
commit 12205f1450
5 changed files with 175 additions and 91 deletions

View file

@ -17,6 +17,7 @@
#![feature(sort_unstable)] #![feature(sort_unstable)]
#![feature(test)] #![feature(test)]
extern crate rand;
extern crate test; extern crate test;
mod btree; mod btree;

View file

@ -8,9 +8,11 @@
// option. This file may not be copied, modified, or distributed // option. This file may not be copied, modified, or distributed
// except according to those terms. // except according to those terms.
use std::{mem, ptr}; use std::__rand::{thread_rng};
use std::__rand::{Rng, thread_rng}; use std::mem;
use std::ptr;
use rand::{Rng, SeedableRng, XorShiftRng};
use test::{Bencher, black_box}; use test::{Bencher, black_box};
#[bench] #[bench]
@ -191,17 +193,17 @@ fn gen_descending(len: usize) -> Vec<u64> {
} }
fn gen_random(len: usize) -> Vec<u64> { fn gen_random(len: usize) -> Vec<u64> {
let mut rng = thread_rng(); let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
rng.gen_iter::<u64>().take(len).collect() rng.gen_iter::<u64>().take(len).collect()
} }
fn gen_random_bytes(len: usize) -> Vec<u8> { fn gen_random_bytes(len: usize) -> Vec<u8> {
let mut rng = thread_rng(); let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
rng.gen_iter::<u8>().take(len).collect() rng.gen_iter::<u8>().take(len).collect()
} }
fn gen_mostly_ascending(len: usize) -> Vec<u64> { fn gen_mostly_ascending(len: usize) -> Vec<u64> {
let mut rng = thread_rng(); let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
let mut v = gen_ascending(len); let mut v = gen_ascending(len);
for _ in (0usize..).take_while(|x| x * x <= len) { for _ in (0usize..).take_while(|x| x * x <= len) {
let x = rng.gen::<usize>() % len; let x = rng.gen::<usize>() % len;
@ -212,7 +214,7 @@ fn gen_mostly_ascending(len: usize) -> Vec<u64> {
} }
fn gen_mostly_descending(len: usize) -> Vec<u64> { fn gen_mostly_descending(len: usize) -> Vec<u64> {
let mut rng = thread_rng(); let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
let mut v = gen_descending(len); let mut v = gen_descending(len);
for _ in (0usize..).take_while(|x| x * x <= len) { for _ in (0usize..).take_while(|x| x * x <= len) {
let x = rng.gen::<usize>() % len; let x = rng.gen::<usize>() % len;
@ -223,7 +225,7 @@ fn gen_mostly_descending(len: usize) -> Vec<u64> {
} }
fn gen_strings(len: usize) -> Vec<String> { fn gen_strings(len: usize) -> Vec<String> {
let mut rng = thread_rng(); let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
let mut v = vec![]; let mut v = vec![];
for _ in 0..len { for _ in 0..len {
let n = rng.gen::<usize>() % 20 + 1; let n = rng.gen::<usize>() % 20 + 1;
@ -233,7 +235,7 @@ fn gen_strings(len: usize) -> Vec<String> {
} }
fn gen_big_random(len: usize) -> Vec<[u64; 16]> { fn gen_big_random(len: usize) -> Vec<[u64; 16]> {
let mut rng = thread_rng(); let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
rng.gen_iter().map(|x| [x; 16]).take(len).collect() rng.gen_iter().map(|x| [x; 16]).take(len).collect()
} }
@ -241,18 +243,32 @@ macro_rules! sort {
($f:ident, $name:ident, $gen:expr, $len:expr) => { ($f:ident, $name:ident, $gen:expr, $len:expr) => {
#[bench] #[bench]
fn $name(b: &mut Bencher) { fn $name(b: &mut Bencher) {
b.iter(|| $gen($len).$f()); let v = $gen($len);
b.iter(|| v.clone().$f());
b.bytes = $len * mem::size_of_val(&$gen(1)[0]) as u64; b.bytes = $len * mem::size_of_val(&$gen(1)[0]) as u64;
} }
} }
} }
macro_rules! sort_strings {
($f:ident, $name:ident, $gen:expr, $len:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
let v = $gen($len);
let v = v.iter().map(|s| &**s).collect::<Vec<&str>>();
b.iter(|| v.clone().$f());
b.bytes = $len * mem::size_of::<&str>() as u64;
}
}
}
macro_rules! sort_expensive { macro_rules! sort_expensive {
($f:ident, $name:ident, $gen:expr, $len:expr) => { ($f:ident, $name:ident, $gen:expr, $len:expr) => {
#[bench] #[bench]
fn $name(b: &mut Bencher) { fn $name(b: &mut Bencher) {
let v = $gen($len);
b.iter(|| { b.iter(|| {
let mut v = $gen($len); let mut v = v.clone();
let mut count = 0; let mut count = 0;
v.$f(|a: &u64, b: &u64| { v.$f(|a: &u64, b: &u64| {
count += 1; count += 1;
@ -263,7 +279,7 @@ macro_rules! sort_expensive {
}); });
black_box(count); black_box(count);
}); });
b.bytes = $len as u64 * mem::size_of::<u64>() as u64; b.bytes = $len * mem::size_of_val(&$gen(1)[0]) as u64;
} }
} }
} }
@ -271,30 +287,30 @@ macro_rules! sort_expensive {
sort!(sort, sort_small_ascending, gen_ascending, 10); sort!(sort, sort_small_ascending, gen_ascending, 10);
sort!(sort, sort_small_descending, gen_descending, 10); sort!(sort, sort_small_descending, gen_descending, 10);
sort!(sort, sort_small_random, gen_random, 10); sort!(sort, sort_small_random, gen_random, 10);
sort!(sort, sort_small_big_random, gen_big_random, 10); sort!(sort, sort_small_big, gen_big_random, 10);
sort!(sort, sort_medium_random, gen_random, 100); sort!(sort, sort_medium_random, gen_random, 100);
sort!(sort, sort_large_ascending, gen_ascending, 10000); sort!(sort, sort_large_ascending, gen_ascending, 10000);
sort!(sort, sort_large_descending, gen_descending, 10000); sort!(sort, sort_large_descending, gen_descending, 10000);
sort!(sort, sort_large_mostly_ascending, gen_mostly_ascending, 10000); sort!(sort, sort_large_mostly_ascending, gen_mostly_ascending, 10000);
sort!(sort, sort_large_mostly_descending, gen_mostly_descending, 10000); sort!(sort, sort_large_mostly_descending, gen_mostly_descending, 10000);
sort!(sort, sort_large_random, gen_random, 10000); sort!(sort, sort_large_random, gen_random, 10000);
sort!(sort, sort_large_big_random, gen_big_random, 10000); sort!(sort, sort_large_big, gen_big_random, 10000);
sort!(sort, sort_large_strings, gen_strings, 10000); sort_strings!(sort, sort_large_strings, gen_strings, 10000);
sort_expensive!(sort_by, sort_large_random_expensive, gen_random, 10000); sort_expensive!(sort_by, sort_large_expensive, gen_random, 10000);
sort!(sort_unstable, sort_unstable_small_ascending, gen_ascending, 10); sort!(sort_unstable, sort_unstable_small_ascending, gen_ascending, 10);
sort!(sort_unstable, sort_unstable_small_descending, gen_descending, 10); sort!(sort_unstable, sort_unstable_small_descending, gen_descending, 10);
sort!(sort_unstable, sort_unstable_small_random, gen_random, 10); sort!(sort_unstable, sort_unstable_small_random, gen_random, 10);
sort!(sort_unstable, sort_unstable_small_big_random, gen_big_random, 10); sort!(sort_unstable, sort_unstable_small_big, gen_big_random, 10);
sort!(sort_unstable, sort_unstable_medium_random, gen_random, 100); sort!(sort_unstable, sort_unstable_medium_random, gen_random, 100);
sort!(sort_unstable, sort_unstable_large_ascending, gen_ascending, 10000); sort!(sort_unstable, sort_unstable_large_ascending, gen_ascending, 10000);
sort!(sort_unstable, sort_unstable_large_descending, gen_descending, 10000); sort!(sort_unstable, sort_unstable_large_descending, gen_descending, 10000);
sort!(sort_unstable, sort_unstable_large_mostly_ascending, gen_mostly_ascending, 10000); sort!(sort_unstable, sort_unstable_large_mostly_ascending, gen_mostly_ascending, 10000);
sort!(sort_unstable, sort_unstable_large_mostly_descending, gen_mostly_descending, 10000); sort!(sort_unstable, sort_unstable_large_mostly_descending, gen_mostly_descending, 10000);
sort!(sort_unstable, sort_unstable_large_random, gen_random, 10000); sort!(sort_unstable, sort_unstable_large_random, gen_random, 10000);
sort!(sort_unstable, sort_unstable_large_big_random, gen_big_random, 10000); sort!(sort_unstable, sort_unstable_large_big, gen_big_random, 10000);
sort!(sort_unstable, sort_unstable_large_strings, gen_strings, 10000); sort_strings!(sort_unstable, sort_unstable_large_strings, gen_strings, 10000);
sort_expensive!(sort_unstable_by, sort_unstable_large_random_expensive, gen_random, 10000); sort_expensive!(sort_unstable_by, sort_unstable_large_expensive, gen_random, 10000);
macro_rules! reverse { macro_rules! reverse {
($name:ident, $ty:ty, $f:expr) => { ($name:ident, $ty:ty, $f:expr) => {

View file

@ -1794,7 +1794,7 @@ unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F)
impl<T> Drop for MergeHole<T> { impl<T> Drop for MergeHole<T> {
fn drop(&mut self) { fn drop(&mut self) {
// `T` is not a zero-sized type, so it's okay to divide by it's size. // `T` is not a zero-sized type, so it's okay to divide by its size.
let len = (self.end as usize - self.start as usize) / mem::size_of::<T>(); let len = (self.end as usize - self.start as usize) / mem::size_of::<T>();
unsafe { ptr::copy_nonoverlapping(self.start, self.dest, len); } unsafe { ptr::copy_nonoverlapping(self.start, self.dest, len); }
} }
@ -1908,7 +1908,7 @@ fn merge_sort<T, F>(v: &mut [T], mut is_less: F)
// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
// algorithm should continue building a new run instead, `None` is returned. // algorithm should continue building a new run instead, `None` is returned.
// //
// TimSort is infamous for it's buggy implementations, as described here: // TimSort is infamous for its buggy implementations, as described here:
// http://envisage-project.eu/timsort-specification-and-verification/ // http://envisage-project.eu/timsort-specification-and-verification/
// //
// The gist of the story is: we must enforce the invariants on the top four runs on the stack. // The gist of the story is: we must enforce the invariants on the top four runs on the stack.

View file

@ -396,18 +396,44 @@ fn test_sort() {
let mut rng = thread_rng(); let mut rng = thread_rng();
for len in (2..25).chain(500..510) { for len in (2..25).chain(500..510) {
for _ in 0..100 { for &modulus in &[5, 10, 100, 1000] {
let mut v: Vec<_> = rng.gen_iter::<i32>().take(len).collect(); for _ in 0..10 {
let mut v1 = v.clone(); let orig: Vec<_> = rng.gen_iter::<i32>()
.map(|x| x % modulus)
.take(len)
.collect();
v.sort(); // Sort in default order.
assert!(v.windows(2).all(|w| w[0] <= w[1])); let mut v = orig.clone();
v.sort();
assert!(v.windows(2).all(|w| w[0] <= w[1]));
v1.sort_by(|a, b| a.cmp(b)); // Sort in ascending order.
assert!(v1.windows(2).all(|w| w[0] <= w[1])); let mut v = orig.clone();
v.sort_by(|a, b| a.cmp(b));
assert!(v.windows(2).all(|w| w[0] <= w[1]));
v1.sort_by(|a, b| b.cmp(a)); // Sort in descending order.
assert!(v1.windows(2).all(|w| w[0] >= w[1])); let mut v = orig.clone();
v.sort_by(|a, b| b.cmp(a));
assert!(v.windows(2).all(|w| w[0] >= w[1]));
// Sort with many pre-sorted runs.
let mut v = orig.clone();
v.sort();
v.reverse();
for _ in 0..5 {
let a = rng.gen::<usize>() % len;
let b = rng.gen::<usize>() % len;
if a < b {
v[a..b].reverse();
} else {
v.swap(a, b);
}
}
v.sort();
assert!(v.windows(2).all(|w| w[0] <= w[1]));
}
} }
} }

View file

@ -10,14 +10,17 @@
// ignore-emscripten no threads support // ignore-emscripten no threads support
#![feature(rand)]
#![feature(const_fn)] #![feature(const_fn)]
#![feature(rand)]
#![feature(sort_unstable)]
use std::__rand::{thread_rng, Rng}; use std::__rand::{thread_rng, Rng};
use std::panic;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread;
use std::cell::Cell; use std::cell::Cell;
use std::cmp::Ordering;
use std::panic;
use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize};
use std::sync::atomic::Ordering::Relaxed;
use std::thread;
const MAX_LEN: usize = 80; const MAX_LEN: usize = 80;
@ -45,54 +48,85 @@ static DROP_COUNTS: [AtomicUsize; MAX_LEN] = [
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
]; ];
#[derive(Clone, PartialEq, PartialOrd, Eq, Ord)] static VERSIONS: AtomicUsize = ATOMIC_USIZE_INIT;
#[derive(Clone, Eq)]
struct DropCounter { struct DropCounter {
x: u32, x: u32,
id: usize, id: usize,
version: Cell<usize>,
}
impl PartialEq for DropCounter {
fn eq(&self, other: &Self) -> bool {
self.partial_cmp(other) == Some(Ordering::Equal)
}
}
impl PartialOrd for DropCounter {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.version.set(self.version.get() + 1);
other.version.set(other.version.get() + 1);
VERSIONS.fetch_add(2, Relaxed);
self.x.partial_cmp(&other.x)
}
}
impl Ord for DropCounter {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap()
}
} }
impl Drop for DropCounter { impl Drop for DropCounter {
fn drop(&mut self) { fn drop(&mut self) {
DROP_COUNTS[self.id].fetch_add(1, Ordering::Relaxed); DROP_COUNTS[self.id].fetch_add(1, Relaxed);
VERSIONS.fetch_sub(self.version.get(), Relaxed);
} }
} }
fn test(input: &[DropCounter]) { macro_rules! test {
let len = input.len(); ($input:ident, $func:ident) => {
let len = $input.len();
// Work out the total number of comparisons required to sort // Work out the total number of comparisons required to sort
// this array... // this array...
let mut count = 0usize; let mut count = 0usize;
input.to_owned().sort_by(|a, b| { count += 1; a.cmp(b) }); $input.to_owned().$func(|a, b| { count += 1; a.cmp(b) });
// ... and then panic on each and every single one. // ... and then panic on each and every single one.
for panic_countdown in 0..count { for panic_countdown in 0..count {
// Refresh the counters. // Refresh the counters.
for i in 0..len { VERSIONS.store(0, Relaxed);
DROP_COUNTS[i].store(0, Ordering::Relaxed); for i in 0..len {
} DROP_COUNTS[i].store(0, Relaxed);
}
let v = input.to_owned(); let v = $input.to_owned();
let _ = thread::spawn(move || { let _ = thread::spawn(move || {
let mut v = v; let mut v = v;
let mut panic_countdown = panic_countdown; let mut panic_countdown = panic_countdown;
v.sort_by(|a, b| { v.$func(|a, b| {
if panic_countdown == 0 { if panic_countdown == 0 {
SILENCE_PANIC.with(|s| s.set(true)); SILENCE_PANIC.with(|s| s.set(true));
panic!(); panic!();
} }
panic_countdown -= 1; panic_countdown -= 1;
a.cmp(b) a.cmp(b)
}) })
}).join(); }).join();
// Check that the number of things dropped is exactly // Check that the number of things dropped is exactly
// what we expect (i.e. the contents of `v`). // what we expect (i.e. the contents of `v`).
for (i, c) in DROP_COUNTS.iter().enumerate().take(len) { for (i, c) in DROP_COUNTS.iter().enumerate().take(len) {
let count = c.load(Ordering::Relaxed); let count = c.load(Relaxed);
assert!(count == 1, assert!(count == 1,
"found drop count == {} for i == {}, len == {}", "found drop count == {} for i == {}, len == {}",
count, i, len); count, i, len);
}
// Check that the most recent versions of values were dropped.
assert_eq!(VERSIONS.load(Relaxed), 0);
} }
} }
} }
@ -106,33 +140,40 @@ fn main() {
prev(info); prev(info);
} }
})); }));
for len in (1..20).chain(70..MAX_LEN) { for len in (1..20).chain(70..MAX_LEN) {
// Test on a random array. for &modulus in &[5, 20, 50] {
let mut rng = thread_rng(); for &has_runs in &[false, true] {
let input = (0..len).map(|id| { let mut rng = thread_rng();
DropCounter { let mut input = (0..len)
x: rng.next_u32(), .map(|id| {
id: id, DropCounter {
} x: rng.next_u32() % modulus,
}).collect::<Vec<_>>(); id: id,
test(&input); version: Cell::new(0),
}
})
.collect::<Vec<_>>();
// Test on a sorted array with two elements randomly swapped, creating several natural if has_runs {
// runs of random lengths. Such arrays have very high chances of hitting all code paths in for c in &mut input {
// the merge procedure. c.x = c.id as u32;
for _ in 0..5 { }
let mut input = (0..len).map(|i|
DropCounter { for _ in 0..5 {
x: i as u32, let a = rng.gen::<usize>() % len;
id: i, let b = rng.gen::<usize>() % len;
if a < b {
input[a..b].reverse();
} else {
input.swap(a, b);
}
}
} }
).collect::<Vec<_>>();
let a = rng.gen::<usize>() % len; test!(input, sort_by);
let b = rng.gen::<usize>() % len; test!(input, sort_unstable_by);
input.swap(a, b); }
test(&input);
} }
} }
} }