collections: Optimize Vec when cloning from a slice

llvm is currently not able to conver `Vec::extend` into a memcpy
for `Copy` types, which results in methods like `Vec::push_all`
to run twice as slow as it should be running. This patch takes
the unsafe `Vec::clone` optimization to speed up all the operations
that are cloning a slice into a `Vec`.

before:

test vec::tests::bench_clone_from_0000_0000                ... bench:        12 ns/iter (+/- 2)
test vec::tests::bench_clone_from_0000_0010                ... bench:       125 ns/iter (+/- 4) = 80 MB/s
test vec::tests::bench_clone_from_0000_0100                ... bench:       360 ns/iter (+/- 33) = 277 MB/s
test vec::tests::bench_clone_from_0000_1000                ... bench:      2601 ns/iter (+/- 175) = 384 MB/s
test vec::tests::bench_clone_from_0010_0000                ... bench:        12 ns/iter (+/- 2)
test vec::tests::bench_clone_from_0010_0010                ... bench:       125 ns/iter (+/- 10) = 80 MB/s
test vec::tests::bench_clone_from_0010_0100                ... bench:       361 ns/iter (+/- 28) = 277 MB/s
test vec::tests::bench_clone_from_0100_0010                ... bench:       131 ns/iter (+/- 13) = 76 MB/s
test vec::tests::bench_clone_from_0100_0100                ... bench:       360 ns/iter (+/- 9) = 277 MB/s
test vec::tests::bench_clone_from_0100_1000                ... bench:      2575 ns/iter (+/- 168) = 388 MB/s
test vec::tests::bench_clone_from_1000_0100                ... bench:       356 ns/iter (+/- 20) = 280 MB/s
test vec::tests::bench_clone_from_1000_1000                ... bench:      2605 ns/iter (+/- 167) = 383 MB/s
test vec::tests::bench_from_slice_0000                     ... bench:        11 ns/iter (+/- 0)
test vec::tests::bench_from_slice_0010                     ... bench:       115 ns/iter (+/- 5) = 86 MB/s
test vec::tests::bench_from_slice_0100                     ... bench:       309 ns/iter (+/- 170) = 323 MB/s
test vec::tests::bench_from_slice_1000                     ... bench:      2065 ns/iter (+/- 198) = 484 MB/s
test vec::tests::bench_push_all_0000_0000                  ... bench:         7 ns/iter (+/- 0)
test vec::tests::bench_push_all_0000_0010                  ... bench:        79 ns/iter (+/- 7) = 126 MB/s
test vec::tests::bench_push_all_0000_0100                  ... bench:       342 ns/iter (+/- 18) = 292 MB/s
test vec::tests::bench_push_all_0000_1000                  ... bench:      2873 ns/iter (+/- 75) = 348 MB/s
test vec::tests::bench_push_all_0010_0010                  ... bench:       154 ns/iter (+/- 8) = 64 MB/s
test vec::tests::bench_push_all_0100_0100                  ... bench:       518 ns/iter (+/- 18) = 193 MB/s
test vec::tests::bench_push_all_1000_1000                  ... bench:      4490 ns/iter (+/- 223) = 222 MB/s

after:

test vec::tests::bench_clone_from_0000_0000                ... bench:        12 ns/iter (+/- 1)
test vec::tests::bench_clone_from_0000_0010                ... bench:       123 ns/iter (+/- 5) = 81 MB/s
test vec::tests::bench_clone_from_0000_0100                ... bench:       367 ns/iter (+/- 23) = 272 MB/s
test vec::tests::bench_clone_from_0000_1000                ... bench:      2618 ns/iter (+/- 252) = 381 MB/s
test vec::tests::bench_clone_from_0010_0000                ... bench:        12 ns/iter (+/- 1)
test vec::tests::bench_clone_from_0010_0010                ... bench:       124 ns/iter (+/- 7) = 80 MB/s
test vec::tests::bench_clone_from_0010_0100                ... bench:       369 ns/iter (+/- 34) = 271 MB/s
test vec::tests::bench_clone_from_0100_0010                ... bench:       123 ns/iter (+/- 6) = 81 MB/s
test vec::tests::bench_clone_from_0100_0100                ... bench:       371 ns/iter (+/- 25) = 269 MB/s
test vec::tests::bench_clone_from_0100_1000                ... bench:      2713 ns/iter (+/- 532) = 368 MB/s
test vec::tests::bench_clone_from_1000_0100                ... bench:       369 ns/iter (+/- 14) = 271 MB/s
test vec::tests::bench_clone_from_1000_1000                ... bench:      2611 ns/iter (+/- 194) = 382 MB/s
test vec::tests::bench_from_slice_0000                     ... bench:         7 ns/iter (+/- 0)
test vec::tests::bench_from_slice_0010                     ... bench:       108 ns/iter (+/- 4) = 92 MB/s
test vec::tests::bench_from_slice_0100                     ... bench:       235 ns/iter (+/- 24) = 425 MB/s
test vec::tests::bench_from_slice_1000                     ... bench:      1318 ns/iter (+/- 96) = 758 MB/s
test vec::tests::bench_push_all_0000_0000                  ... bench:         7 ns/iter (+/- 0)
test vec::tests::bench_push_all_0000_0010                  ... bench:        70 ns/iter (+/- 4) = 142 MB/s
test vec::tests::bench_push_all_0000_0100                  ... bench:       176 ns/iter (+/- 16) = 568 MB/s
test vec::tests::bench_push_all_0000_1000                  ... bench:      1125 ns/iter (+/- 94) = 888 MB/s
test vec::tests::bench_push_all_0010_0010                  ... bench:       159 ns/iter (+/- 15) = 62 MB/s
test vec::tests::bench_push_all_0100_0100                  ... bench:       363 ns/iter (+/- 12) = 275 MB/s
test vec::tests::bench_push_all_1000_1000                  ... bench:      2860 ns/iter (+/- 415) = 349 MB/s
This commit is contained in:
Erick Tryzelaar 2014-07-05 23:11:18 -07:00
parent 065b98d577
commit f1ea540e90

View file

@ -197,7 +197,9 @@ impl<T: Clone> Vec<T> {
/// ```
#[inline]
pub fn from_slice(values: &[T]) -> Vec<T> {
values.iter().map(|x| x.clone()).collect()
let mut vector = Vec::with_capacity(values.len());
vector.push_all(values);
vector
}
/// Constructs a `Vec` with copies of a value.
@ -238,7 +240,10 @@ impl<T: Clone> Vec<T> {
/// ```
#[inline]
pub fn push_all(&mut self, other: &[T]) {
self.extend(other.iter().map(|e| e.clone()));
unsafe {
self.reserve_additional(other.len());
unsafe_push_all_clone(self, other)
}
}
/// Grows the `Vec` in-place.
@ -318,41 +323,31 @@ impl<T: Clone> Vec<T> {
#[unstable]
impl<T:Clone> Clone for Vec<T> {
fn clone(&self) -> Vec<T> {
let len = self.len;
let mut vector = Vec::with_capacity(len);
// Unsafe code so this can be optimised to a memcpy (or something
// similarly fast) when T is Copy. LLVM is easily confused, so any
// extra operations during the loop can prevent this optimisation
{
let this_slice = self.as_slice();
while vector.len < len {
unsafe {
let len = vector.len;
ptr::write(
vector.as_mut_slice().unsafe_mut_ref(len),
this_slice.unsafe_ref(len).clone());
}
vector.len += 1;
}
unsafe {
let mut vector = Vec::with_capacity(self.len);
unsafe_push_all_clone(&mut vector, self.as_slice());
vector
}
vector
}
fn clone_from(&mut self, other: &Vec<T>) {
// drop anything in self that will not be overwritten
if self.len() > other.len() {
self.truncate(other.len())
}
unsafe {
// drop anything in self that will not be overwritten
if self.len() > other.len() {
self.truncate(other.len())
}
// reuse the contained values' allocations/resources.
for (place, thing) in self.mut_iter().zip(other.iter()) {
place.clone_from(thing)
}
// reuse the contained values' allocations/resources.
for (place, thing) in self.mut_iter().zip(other.iter()) {
place.clone_from(thing)
}
// self.len <= other.len due to the truncate above, so the
// slice here is always in-bounds.
let len = self.len();
self.extend(other.slice_from(len).iter().map(|x| x.clone()));
// self.len <= other.len due to the truncate above, so the
// slice here is always in-bounds.
let slice = other.slice_from(self.len());
self.reserve_additional(slice.len());
unsafe_push_all_clone(self, slice)
}
}
}
@ -1555,6 +1550,24 @@ pub mod raw {
}
}
// Unsafe code so this can be optimised to a memcpy (or something similarly
// fast) when T is Copy. LLVM is easily confused, so any extra operations
// during the loop can prevent this optimisation.
//
// WARNING: You must preallocate space on the vector before you call this
// method.
#[inline(always)]
unsafe fn unsafe_push_all_clone<T: Clone>(dst: &mut Vec<T>, src: &[T]) {
let mut dst_len = dst.len();
for i in range(0, src.len()) {
ptr::write(
dst.as_mut_slice().unsafe_mut_ref(dst_len),
src.unsafe_ref(i).clone());
dst_len += 1;
dst.set_len(dst_len);
}
}
#[cfg(test)]
mod tests {