Improve sort tests and benchmarks

2017-06-24 16:51:16 +02:00 · 2017-06-24 16:51:16 +02:00 · 12205f1450
commit 12205f1450
parent 7e76505e01
5 changed files with 175 additions and 91 deletions
--- a/src/liballoc/benches/lib.rs
+++ b/src/liballoc/benches/lib.rs
@ -17,6 +17,7 @@
 #![feature(sort_unstable)]
 #![feature(test)]

+extern crate rand;
 extern crate test;

 mod btree;
--- a/src/liballoc/benches/slice.rs
+++ b/src/liballoc/benches/slice.rs
@ -8,9 +8,11 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

-use std::{mem, ptr};
-use std::__rand::{Rng, thread_rng};
+use std::__rand::{thread_rng};
+use std::mem;
+use std::ptr;

+use rand::{Rng, SeedableRng, XorShiftRng};
 use test::{Bencher, black_box};

 #[bench]
@ -191,17 +193,17 @@ fn gen_descending(len: usize) -> Vec<u64> {
 }

 fn gen_random(len: usize) -> Vec<u64> {
-    let mut rng = thread_rng();
+    let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
    rng.gen_iter::<u64>().take(len).collect()
 }

 fn gen_random_bytes(len: usize) -> Vec<u8> {
-    let mut rng = thread_rng();
+    let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
    rng.gen_iter::<u8>().take(len).collect()
 }

 fn gen_mostly_ascending(len: usize) -> Vec<u64> {
-    let mut rng = thread_rng();
+    let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
    let mut v = gen_ascending(len);
    for _ in (0usize..).take_while(|x| x * x <= len) {
        let x = rng.gen::<usize>() % len;
@ -212,7 +214,7 @@ fn gen_mostly_ascending(len: usize) -> Vec<u64> {
 }

 fn gen_mostly_descending(len: usize) -> Vec<u64> {
-    let mut rng = thread_rng();
+    let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
    let mut v = gen_descending(len);
    for _ in (0usize..).take_while(|x| x * x <= len) {
        let x = rng.gen::<usize>() % len;
@ -223,7 +225,7 @@ fn gen_mostly_descending(len: usize) -> Vec<u64> {
 }

 fn gen_strings(len: usize) -> Vec<String> {
-    let mut rng = thread_rng();
+    let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
    let mut v = vec![];
    for _ in 0..len {
        let n = rng.gen::<usize>() % 20 + 1;
@ -233,7 +235,7 @@ fn gen_strings(len: usize) -> Vec<String> {
 }

 fn gen_big_random(len: usize) -> Vec<[u64; 16]> {
-    let mut rng = thread_rng();
+    let mut rng = XorShiftRng::from_seed([0, 1, 2, 3]);
    rng.gen_iter().map(|x| [x; 16]).take(len).collect()
 }

@ -241,18 +243,32 @@ macro_rules! sort {
    ($f:ident, $name:ident, $gen:expr, $len:expr) => {
        #[bench]
        fn $name(b: &mut Bencher) {
-            b.iter(|| $gen($len).$f());
+            let v = $gen($len);
+            b.iter(|| v.clone().$f());
            b.bytes = $len * mem::size_of_val(&$gen(1)[0]) as u64;
        }
    }
 }

+macro_rules! sort_strings {
+    ($f:ident, $name:ident, $gen:expr, $len:expr) => {
+        #[bench]
+        fn $name(b: &mut Bencher) {
+            let v = $gen($len);
+            let v = v.iter().map(|s| &**s).collect::<Vec<&str>>();
+            b.iter(|| v.clone().$f());
+            b.bytes = $len * mem::size_of::<&str>() as u64;
+        }
+    }
+}
+
 macro_rules! sort_expensive {
    ($f:ident, $name:ident, $gen:expr, $len:expr) => {
        #[bench]
        fn $name(b: &mut Bencher) {
+            let v = $gen($len);
            b.iter(|| {
-                let mut v = $gen($len);
+                let mut v = v.clone();
                let mut count = 0;
                v.$f(|a: &u64, b: &u64| {
                    count += 1;
@ -263,7 +279,7 @@ macro_rules! sort_expensive {
                });
                black_box(count);
            });
-            b.bytes = $len as u64 * mem::size_of::<u64>() as u64;
+            b.bytes = $len * mem::size_of_val(&$gen(1)[0]) as u64;
        }
    }
 }
@ -271,30 +287,30 @@ macro_rules! sort_expensive {
 sort!(sort, sort_small_ascending, gen_ascending, 10);
 sort!(sort, sort_small_descending, gen_descending, 10);
 sort!(sort, sort_small_random, gen_random, 10);
-sort!(sort, sort_small_big_random, gen_big_random, 10);
+sort!(sort, sort_small_big, gen_big_random, 10);
 sort!(sort, sort_medium_random, gen_random, 100);
 sort!(sort, sort_large_ascending, gen_ascending, 10000);
 sort!(sort, sort_large_descending, gen_descending, 10000);
 sort!(sort, sort_large_mostly_ascending, gen_mostly_ascending, 10000);
 sort!(sort, sort_large_mostly_descending, gen_mostly_descending, 10000);
 sort!(sort, sort_large_random, gen_random, 10000);
-sort!(sort, sort_large_big_random, gen_big_random, 10000);
-sort!(sort, sort_large_strings, gen_strings, 10000);
-sort_expensive!(sort_by, sort_large_random_expensive, gen_random, 10000);
+sort!(sort, sort_large_big, gen_big_random, 10000);
+sort_strings!(sort, sort_large_strings, gen_strings, 10000);
+sort_expensive!(sort_by, sort_large_expensive, gen_random, 10000);

 sort!(sort_unstable, sort_unstable_small_ascending, gen_ascending, 10);
 sort!(sort_unstable, sort_unstable_small_descending, gen_descending, 10);
 sort!(sort_unstable, sort_unstable_small_random, gen_random, 10);
-sort!(sort_unstable, sort_unstable_small_big_random, gen_big_random, 10);
+sort!(sort_unstable, sort_unstable_small_big, gen_big_random, 10);
 sort!(sort_unstable, sort_unstable_medium_random, gen_random, 100);
 sort!(sort_unstable, sort_unstable_large_ascending, gen_ascending, 10000);
 sort!(sort_unstable, sort_unstable_large_descending, gen_descending, 10000);
 sort!(sort_unstable, sort_unstable_large_mostly_ascending, gen_mostly_ascending, 10000);
 sort!(sort_unstable, sort_unstable_large_mostly_descending, gen_mostly_descending, 10000);
 sort!(sort_unstable, sort_unstable_large_random, gen_random, 10000);
-sort!(sort_unstable, sort_unstable_large_big_random, gen_big_random, 10000);
-sort!(sort_unstable, sort_unstable_large_strings, gen_strings, 10000);
-sort_expensive!(sort_unstable_by, sort_unstable_large_random_expensive, gen_random, 10000);
+sort!(sort_unstable, sort_unstable_large_big, gen_big_random, 10000);
+sort_strings!(sort_unstable, sort_unstable_large_strings, gen_strings, 10000);
+sort_expensive!(sort_unstable_by, sort_unstable_large_expensive, gen_random, 10000);

 macro_rules! reverse {
    ($name:ident, $ty:ty, $f:expr) => {
--- a/src/liballoc/slice.rs
+++ b/src/liballoc/slice.rs
@ -1794,7 +1794,7 @@ unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F)

    impl<T> Drop for MergeHole<T> {
        fn drop(&mut self) {
-            // `T` is not a zero-sized type, so it's okay to divide by it's size.
+            // `T` is not a zero-sized type, so it's okay to divide by its size.
            let len = (self.end as usize - self.start as usize) / mem::size_of::<T>();
            unsafe { ptr::copy_nonoverlapping(self.start, self.dest, len); }
        }
@ -1908,7 +1908,7 @@ fn merge_sort<T, F>(v: &mut [T], mut is_less: F)
    // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
    // algorithm should continue building a new run instead, `None` is returned.
    //
-    // TimSort is infamous for it's buggy implementations, as described here:
+    // TimSort is infamous for its buggy implementations, as described here:
    // http://envisage-project.eu/timsort-specification-and-verification/
    //
    // The gist of the story is: we must enforce the invariants on the top four runs on the stack.
--- a/src/liballoc/tests/slice.rs
+++ b/src/liballoc/tests/slice.rs
@ -396,18 +396,44 @@ fn test_sort() {
    let mut rng = thread_rng();

    for len in (2..25).chain(500..510) {
-        for _ in 0..100 {
-            let mut v: Vec<_> = rng.gen_iter::<i32>().take(len).collect();
-            let mut v1 = v.clone();
+        for &modulus in &[5, 10, 100, 1000] {
+            for _ in 0..10 {
+                let orig: Vec<_> = rng.gen_iter::<i32>()
+                    .map(|x| x % modulus)
+                    .take(len)
+                    .collect();

-            v.sort();
-            assert!(v.windows(2).all(|w| w[0] <= w[1]));
+                // Sort in default order.
+                let mut v = orig.clone();
+                v.sort();
+                assert!(v.windows(2).all(|w| w[0] <= w[1]));

-            v1.sort_by(|a, b| a.cmp(b));
-            assert!(v1.windows(2).all(|w| w[0] <= w[1]));
+                // Sort in ascending order.
+                let mut v = orig.clone();
+                v.sort_by(|a, b| a.cmp(b));
+                assert!(v.windows(2).all(|w| w[0] <= w[1]));

-            v1.sort_by(|a, b| b.cmp(a));
-            assert!(v1.windows(2).all(|w| w[0] >= w[1]));
+                // Sort in descending order.
+                let mut v = orig.clone();
+                v.sort_by(|a, b| b.cmp(a));
+                assert!(v.windows(2).all(|w| w[0] >= w[1]));
+
+                // Sort with many pre-sorted runs.
+                let mut v = orig.clone();
+                v.sort();
+                v.reverse();
+                for _ in 0..5 {
+                    let a = rng.gen::<usize>() % len;
+                    let b = rng.gen::<usize>() % len;
+                    if a < b {
+                        v[a..b].reverse();
+                    } else {
+                        v.swap(a, b);
+                    }
+                }
+                v.sort();
+                assert!(v.windows(2).all(|w| w[0] <= w[1]));
+            }
        }
    }

--- a/src/test/run-pass/vector-sort-panic-safe.rs
+++ b/src/test/run-pass/vector-sort-panic-safe.rs
@ -10,14 +10,17 @@

 // ignore-emscripten no threads support

-#![feature(rand)]
 #![feature(const_fn)]
+#![feature(rand)]
+#![feature(sort_unstable)]

 use std::__rand::{thread_rng, Rng};
-use std::panic;
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::thread;
 use std::cell::Cell;
+use std::cmp::Ordering;
+use std::panic;
+use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize};
+use std::sync::atomic::Ordering::Relaxed;
+use std::thread;

 const MAX_LEN: usize = 80;

@ -45,54 +48,85 @@ static DROP_COUNTS: [AtomicUsize; MAX_LEN] = [
    AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0),
 ];

-#[derive(Clone, PartialEq, PartialOrd, Eq, Ord)]
+static VERSIONS: AtomicUsize = ATOMIC_USIZE_INIT;
+
+#[derive(Clone, Eq)]
 struct DropCounter {
    x: u32,
    id: usize,
+    version: Cell<usize>,
+}
+
+impl PartialEq for DropCounter {
+    fn eq(&self, other: &Self) -> bool {
+        self.partial_cmp(other) == Some(Ordering::Equal)
+    }
+}
+
+impl PartialOrd for DropCounter {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        self.version.set(self.version.get() + 1);
+        other.version.set(other.version.get() + 1);
+        VERSIONS.fetch_add(2, Relaxed);
+        self.x.partial_cmp(&other.x)
+    }
+}
+
+impl Ord for DropCounter {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.partial_cmp(other).unwrap()
+    }
 }

 impl Drop for DropCounter {
    fn drop(&mut self) {
-        DROP_COUNTS[self.id].fetch_add(1, Ordering::Relaxed);
+        DROP_COUNTS[self.id].fetch_add(1, Relaxed);
+        VERSIONS.fetch_sub(self.version.get(), Relaxed);
    }
 }

-fn test(input: &[DropCounter]) {
-    let len = input.len();
+macro_rules! test {
+    ($input:ident, $func:ident) => {
+        let len = $input.len();

-    // Work out the total number of comparisons required to sort
-    // this array...
-    let mut count = 0usize;
-    input.to_owned().sort_by(|a, b| { count += 1; a.cmp(b) });
+        // Work out the total number of comparisons required to sort
+        // this array...
+        let mut count = 0usize;
+        $input.to_owned().$func(|a, b| { count += 1; a.cmp(b) });

-    // ... and then panic on each and every single one.
-    for panic_countdown in 0..count {
-        // Refresh the counters.
-        for i in 0..len {
-            DROP_COUNTS[i].store(0, Ordering::Relaxed);
-        }
+        // ... and then panic on each and every single one.
+        for panic_countdown in 0..count {
+            // Refresh the counters.
+            VERSIONS.store(0, Relaxed);
+            for i in 0..len {
+                DROP_COUNTS[i].store(0, Relaxed);
+            }

-        let v = input.to_owned();
-        let _ = thread::spawn(move || {
-            let mut v = v;
-            let mut panic_countdown = panic_countdown;
-            v.sort_by(|a, b| {
-                if panic_countdown == 0 {
-                    SILENCE_PANIC.with(|s| s.set(true));
-                    panic!();
-                }
-                panic_countdown -= 1;
-                a.cmp(b)
-            })
-        }).join();
+            let v = $input.to_owned();
+            let _ = thread::spawn(move || {
+                let mut v = v;
+                let mut panic_countdown = panic_countdown;
+                v.$func(|a, b| {
+                    if panic_countdown == 0 {
+                        SILENCE_PANIC.with(|s| s.set(true));
+                        panic!();
+                    }
+                    panic_countdown -= 1;
+                    a.cmp(b)
+                })
+            }).join();

-        // Check that the number of things dropped is exactly
-        // what we expect (i.e. the contents of `v`).
-        for (i, c) in DROP_COUNTS.iter().enumerate().take(len) {
-            let count = c.load(Ordering::Relaxed);
-            assert!(count == 1,
-                    "found drop count == {} for i == {}, len == {}",
-                    count, i, len);
+            // Check that the number of things dropped is exactly
+            // what we expect (i.e. the contents of `v`).
+            for (i, c) in DROP_COUNTS.iter().enumerate().take(len) {
+                let count = c.load(Relaxed);
+                assert!(count == 1,
+                        "found drop count == {} for i == {}, len == {}",
+                        count, i, len);
+            }
+
+            // Check that the most recent versions of values were dropped.
+            assert_eq!(VERSIONS.load(Relaxed), 0);
        }
    }
 }
@ -106,33 +140,40 @@ fn main() {
            prev(info);
        }
    }));
+
    for len in (1..20).chain(70..MAX_LEN) {
-        // Test on a random array.
-        let mut rng = thread_rng();
-        let input = (0..len).map(|id| {
-            DropCounter {
-                x: rng.next_u32(),
-                id: id,
-            }
-        }).collect::<Vec<_>>();
-        test(&input);
+        for &modulus in &[5, 20, 50] {
+            for &has_runs in &[false, true] {
+                let mut rng = thread_rng();
+                let mut input = (0..len)
+                    .map(|id| {
+                        DropCounter {
+                            x: rng.next_u32() % modulus,
+                            id: id,
+                            version: Cell::new(0),
+                        }
+                    })
+                    .collect::<Vec<_>>();

-        // Test on a sorted array with two elements randomly swapped, creating several natural
-        // runs of random lengths. Such arrays have very high chances of hitting all code paths in
-        // the merge procedure.
-        for _ in 0..5 {
-            let mut input = (0..len).map(|i|
-                DropCounter {
-                    x: i as u32,
-                    id: i,
+                if has_runs {
+                    for c in &mut input {
+                        c.x = c.id as u32;
+                    }
+
+                    for _ in 0..5 {
+                        let a = rng.gen::<usize>() % len;
+                        let b = rng.gen::<usize>() % len;
+                        if a < b {
+                            input[a..b].reverse();
+                        } else {
+                            input.swap(a, b);
+                        }
+                    }
                }
-            ).collect::<Vec<_>>();

-            let a = rng.gen::<usize>() % len;
-            let b = rng.gen::<usize>() % len;
-            input.swap(a, b);
-
-            test(&input);
+                test!(input, sort_by);
+                test!(input, sort_unstable_by);
+            }
        }
    }
 }