Improve sort tests and benchmarks

This commit is contained in:
Stjepan Glavina 2017-06-24 16:51:16 +02:00
parent 7e76505e01
commit 12205f1450
5 changed files with 175 additions and 91 deletions

View file

@ -17,6 +17,7 @@
#![feature(sort_unstable)]
#![feature(test)]
extern crate rand;
extern crate test;
mod btree;

View file

@ -8,9 +8,11 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::{mem, ptr};
use std::__rand::{Rng, thread_rng};
use std::__rand::{thread_rng};
use std::mem;
use std::ptr;
use rand::{Rng, SeedableRng, XorShiftRng};
use test::{Bencher, black_box};
#[bench]
@ -191,17 +193,17 @@ fn gen_descending(len: usize) -> Vec<u64> {
}
fn gen_random(len: usize) -> Vec<u64> {
let mut rng = thread_rng();
let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
rng.gen_iter::<u64>().take(len).collect()
}
fn gen_random_bytes(len: usize) -> Vec<u8> {
let mut rng = thread_rng();
let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
rng.gen_iter::<u8>().take(len).collect()
}
fn gen_mostly_ascending(len: usize) -> Vec<u64> {
let mut rng = thread_rng();
let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
let mut v = gen_ascending(len);
for _ in (0usize..).take_while(|x| x * x <= len) {
let x = rng.gen::<usize>() % len;
@ -212,7 +214,7 @@ fn gen_mostly_ascending(len: usize) -> Vec<u64> {
}
fn gen_mostly_descending(len: usize) -> Vec<u64> {
let mut rng = thread_rng();
let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
let mut v = gen_descending(len);
for _ in (0usize..).take_while(|x| x * x <= len) {
let x = rng.gen::<usize>() % len;
@ -223,7 +225,7 @@ fn gen_mostly_descending(len: usize) -> Vec<u64> {
}
fn gen_strings(len: usize) -> Vec<String> {
let mut rng = thread_rng();
let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
let mut v = vec![];
for _ in 0..len {
let n = rng.gen::<usize>() % 20 + 1;
@ -233,7 +235,7 @@ fn gen_strings(len: usize) -> Vec<String> {
}
fn gen_big_random(len: usize) -> Vec<[u64; 16]> {
let mut rng = thread_rng();
let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
rng.gen_iter().map(|x| [x; 16]).take(len).collect()
}
@ -241,18 +243,32 @@ macro_rules! sort {
($f:ident, $name:ident, $gen:expr, $len:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
b.iter(|| $gen($len).$f());
let v = $gen($len);
b.iter(|| v.clone().$f());
b.bytes = $len * mem::size_of_val(&$gen(1)[0]) as u64;
}
}
}
macro_rules! sort_strings {
($f:ident, $name:ident, $gen:expr, $len:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
let v = $gen($len);
let v = v.iter().map(|s| &**s).collect::<Vec<&str>>();
b.iter(|| v.clone().$f());
b.bytes = $len * mem::size_of::<&str>() as u64;
}
}
}
macro_rules! sort_expensive {
($f:ident, $name:ident, $gen:expr, $len:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
let v = $gen($len);
b.iter(|| {
let mut v = $gen($len);
let mut v = v.clone();
let mut count = 0;
v.$f(|a: &u64, b: &u64| {
count += 1;
@ -263,7 +279,7 @@ macro_rules! sort_expensive {
});
black_box(count);
});
b.bytes = $len as u64 * mem::size_of::<u64>() as u64;
b.bytes = $len * mem::size_of_val(&$gen(1)[0]) as u64;
}
}
}
@ -271,30 +287,30 @@ macro_rules! sort_expensive {
sort!(sort, sort_small_ascending, gen_ascending, 10);
sort!(sort, sort_small_descending, gen_descending, 10);
sort!(sort, sort_small_random, gen_random, 10);
sort!(sort, sort_small_big_random, gen_big_random, 10);
sort!(sort, sort_small_big, gen_big_random, 10);
sort!(sort, sort_medium_random, gen_random, 100);
sort!(sort, sort_large_ascending, gen_ascending, 10000);
sort!(sort, sort_large_descending, gen_descending, 10000);
sort!(sort, sort_large_mostly_ascending, gen_mostly_ascending, 10000);
sort!(sort, sort_large_mostly_descending, gen_mostly_descending, 10000);
sort!(sort, sort_large_random, gen_random, 10000);
sort!(sort, sort_large_big_random, gen_big_random, 10000);
sort!(sort, sort_large_strings, gen_strings, 10000);
sort_expensive!(sort_by, sort_large_random_expensive, gen_random, 10000);
sort!(sort, sort_large_big, gen_big_random, 10000);
sort_strings!(sort, sort_large_strings, gen_strings, 10000);
sort_expensive!(sort_by, sort_large_expensive, gen_random, 10000);
sort!(sort_unstable, sort_unstable_small_ascending, gen_ascending, 10);
sort!(sort_unstable, sort_unstable_small_descending, gen_descending, 10);
sort!(sort_unstable, sort_unstable_small_random, gen_random, 10);
sort!(sort_unstable, sort_unstable_small_big_random, gen_big_random, 10);
sort!(sort_unstable, sort_unstable_small_big, gen_big_random, 10);
sort!(sort_unstable, sort_unstable_medium_random, gen_random, 100);
sort!(sort_unstable, sort_unstable_large_ascending, gen_ascending, 10000);
sort!(sort_unstable, sort_unstable_large_descending, gen_descending, 10000);
sort!(sort_unstable, sort_unstable_large_mostly_ascending, gen_mostly_ascending, 10000);
sort!(sort_unstable, sort_unstable_large_mostly_descending, gen_mostly_descending, 10000);
sort!(sort_unstable, sort_unstable_large_random, gen_random, 10000);
sort!(sort_unstable, sort_unstable_large_big_random, gen_big_random, 10000);
sort!(sort_unstable, sort_unstable_large_strings, gen_strings, 10000);
sort_expensive!(sort_unstable_by, sort_unstable_large_random_expensive, gen_random, 10000);
sort!(sort_unstable, sort_unstable_large_big, gen_big_random, 10000);
sort_strings!(sort_unstable, sort_unstable_large_strings, gen_strings, 10000);
sort_expensive!(sort_unstable_by, sort_unstable_large_expensive, gen_random, 10000);
macro_rules! reverse {
($name:ident, $ty:ty, $f:expr) => {

View file

@ -1794,7 +1794,7 @@ unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F)
impl<T> Drop for MergeHole<T> {
fn drop(&mut self) {
// `T` is not a zero-sized type, so it's okay to divide by it's size.
// `T` is not a zero-sized type, so it's okay to divide by its size.
let len = (self.end as usize - self.start as usize) / mem::size_of::<T>();
unsafe { ptr::copy_nonoverlapping(self.start, self.dest, len); }
}
@ -1908,7 +1908,7 @@ fn merge_sort<T, F>(v: &mut [T], mut is_less: F)
// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
// algorithm should continue building a new run instead, `None` is returned.
//
// TimSort is infamous for it's buggy implementations, as described here:
// TimSort is infamous for its buggy implementations, as described here:
// http://envisage-project.eu/timsort-specification-and-verification/
//
// The gist of the story is: we must enforce the invariants on the top four runs on the stack.

View file

@ -396,18 +396,44 @@ fn test_sort() {
let mut rng = thread_rng();
for len in (2..25).chain(500..510) {
for _ in 0..100 {
let mut v: Vec<_> = rng.gen_iter::<i32>().take(len).collect();
let mut v1 = v.clone();
for &modulus in &[5, 10, 100, 1000] {
for _ in 0..10 {
let orig: Vec<_> = rng.gen_iter::<i32>()
.map(|x| x % modulus)
.take(len)
.collect();
v.sort();
assert!(v.windows(2).all(|w| w[0] <= w[1]));
// Sort in default order.
let mut v = orig.clone();
v.sort();
assert!(v.windows(2).all(|w| w[0] <= w[1]));
v1.sort_by(|a, b| a.cmp(b));
assert!(v1.windows(2).all(|w| w[0] <= w[1]));
// Sort in ascending order.
let mut v = orig.clone();
v.sort_by(|a, b| a.cmp(b));
assert!(v.windows(2).all(|w| w[0] <= w[1]));
v1.sort_by(|a, b| b.cmp(a));
assert!(v1.windows(2).all(|w| w[0] >= w[1]));
// Sort in descending order.
let mut v = orig.clone();
v.sort_by(|a, b| b.cmp(a));
assert!(v.windows(2).all(|w| w[0] >= w[1]));
// Sort with many pre-sorted runs.
let mut v = orig.clone();
v.sort();
v.reverse();
for _ in 0..5 {
let a = rng.gen::<usize>() % len;
let b = rng.gen::<usize>() % len;
if a < b {
v[a..b].reverse();
} else {
v.swap(a, b);
}
}
v.sort();
assert!(v.windows(2).all(|w| w[0] <= w[1]));
}
}
}

View file

@ -10,14 +10,17 @@
// ignore-emscripten no threads support
#![feature(rand)]
#![feature(const_fn)]
#![feature(rand)]
#![feature(sort_unstable)]
use std::__rand::{thread_rng, Rng};
use std::panic;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread;
use std::cell::Cell;
use std::cmp::Ordering;
use std::panic;
use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize};
use std::sync::atomic::Ordering::Relaxed;
use std::thread;
const MAX_LEN: usize = 80;
@ -45,54 +48,85 @@ static DROP_COUNTS: [AtomicUsize; MAX_LEN] = [
AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
];
#[derive(Clone, PartialEq, PartialOrd, Eq, Ord)]
static VERSIONS: AtomicUsize = ATOMIC_USIZE_INIT;
#[derive(Clone, Eq)]
struct DropCounter {
x: u32,
id: usize,
version: Cell<usize>,
}
impl PartialEq for DropCounter {
fn eq(&self, other: &Self) -> bool {
self.partial_cmp(other) == Some(Ordering::Equal)
}
}
impl PartialOrd for DropCounter {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.version.set(self.version.get() + 1);
other.version.set(other.version.get() + 1);
VERSIONS.fetch_add(2, Relaxed);
self.x.partial_cmp(&other.x)
}
}
impl Ord for DropCounter {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap()
}
}
impl Drop for DropCounter {
fn drop(&mut self) {
DROP_COUNTS[self.id].fetch_add(1, Ordering::Relaxed);
DROP_COUNTS[self.id].fetch_add(1, Relaxed);
VERSIONS.fetch_sub(self.version.get(), Relaxed);
}
}
fn test(input: &[DropCounter]) {
let len = input.len();
macro_rules! test {
($input:ident, $func:ident) => {
let len = $input.len();
// Work out the total number of comparisons required to sort
// this array...
let mut count = 0usize;
input.to_owned().sort_by(|a, b| { count += 1; a.cmp(b) });
// Work out the total number of comparisons required to sort
// this array...
let mut count = 0usize;
$input.to_owned().$func(|a, b| { count += 1; a.cmp(b) });
// ... and then panic on each and every single one.
for panic_countdown in 0..count {
// Refresh the counters.
for i in 0..len {
DROP_COUNTS[i].store(0, Ordering::Relaxed);
}
// ... and then panic on each and every single one.
for panic_countdown in 0..count {
// Refresh the counters.
VERSIONS.store(0, Relaxed);
for i in 0..len {
DROP_COUNTS[i].store(0, Relaxed);
}
let v = input.to_owned();
let _ = thread::spawn(move || {
let mut v = v;
let mut panic_countdown = panic_countdown;
v.sort_by(|a, b| {
if panic_countdown == 0 {
SILENCE_PANIC.with(|s| s.set(true));
panic!();
}
panic_countdown -= 1;
a.cmp(b)
})
}).join();
let v = $input.to_owned();
let _ = thread::spawn(move || {
let mut v = v;
let mut panic_countdown = panic_countdown;
v.$func(|a, b| {
if panic_countdown == 0 {
SILENCE_PANIC.with(|s| s.set(true));
panic!();
}
panic_countdown -= 1;
a.cmp(b)
})
}).join();
// Check that the number of things dropped is exactly
// what we expect (i.e. the contents of `v`).
for (i, c) in DROP_COUNTS.iter().enumerate().take(len) {
let count = c.load(Ordering::Relaxed);
assert!(count == 1,
"found drop count == {} for i == {}, len == {}",
count, i, len);
// Check that the number of things dropped is exactly
// what we expect (i.e. the contents of `v`).
for (i, c) in DROP_COUNTS.iter().enumerate().take(len) {
let count = c.load(Relaxed);
assert!(count == 1,
"found drop count == {} for i == {}, len == {}",
count, i, len);
}
// Check that the most recent versions of values were dropped.
assert_eq!(VERSIONS.load(Relaxed), 0);
}
}
}
@ -106,33 +140,40 @@ fn main() {
prev(info);
}
}));
for len in (1..20).chain(70..MAX_LEN) {
// Test on a random array.
let mut rng = thread_rng();
let input = (0..len).map(|id| {
DropCounter {
x: rng.next_u32(),
id: id,
}
}).collect::<Vec<_>>();
test(&input);
for &modulus in &[5, 20, 50] {
for &has_runs in &[false, true] {
let mut rng = thread_rng();
let mut input = (0..len)
.map(|id| {
DropCounter {
x: rng.next_u32() % modulus,
id: id,
version: Cell::new(0),
}
})
.collect::<Vec<_>>();
// Test on a sorted array with two elements randomly swapped, creating several natural
// runs of random lengths. Such arrays have very high chances of hitting all code paths in
// the merge procedure.
for _ in 0..5 {
let mut input = (0..len).map(|i|
DropCounter {
x: i as u32,
id: i,
if has_runs {
for c in &mut input {
c.x = c.id as u32;
}
for _ in 0..5 {
let a = rng.gen::<usize>() % len;
let b = rng.gen::<usize>() % len;
if a < b {
input[a..b].reverse();
} else {
input.swap(a, b);
}
}
}
).collect::<Vec<_>>();
let a = rng.gen::<usize>() % len;
let b = rng.gen::<usize>() % len;
input.swap(a, b);
test(&input);
test!(input, sort_by);
test!(input, sort_unstable_by);
}
}
}
}