collections: Optimize Vec when cloning from a slice
llvm is currently not able to conver `Vec::extend` into a memcpy for `Copy` types, which results in methods like `Vec::push_all` to run twice as slow as it should be running. This patch takes the unsafe `Vec::clone` optimization to speed up all the operations that are cloning a slice into a `Vec`. before: test vec::tests::bench_clone_from_0000_0000 ... bench: 12 ns/iter (+/- 2) test vec::tests::bench_clone_from_0000_0010 ... bench: 125 ns/iter (+/- 4) = 80 MB/s test vec::tests::bench_clone_from_0000_0100 ... bench: 360 ns/iter (+/- 33) = 277 MB/s test vec::tests::bench_clone_from_0000_1000 ... bench: 2601 ns/iter (+/- 175) = 384 MB/s test vec::tests::bench_clone_from_0010_0000 ... bench: 12 ns/iter (+/- 2) test vec::tests::bench_clone_from_0010_0010 ... bench: 125 ns/iter (+/- 10) = 80 MB/s test vec::tests::bench_clone_from_0010_0100 ... bench: 361 ns/iter (+/- 28) = 277 MB/s test vec::tests::bench_clone_from_0100_0010 ... bench: 131 ns/iter (+/- 13) = 76 MB/s test vec::tests::bench_clone_from_0100_0100 ... bench: 360 ns/iter (+/- 9) = 277 MB/s test vec::tests::bench_clone_from_0100_1000 ... bench: 2575 ns/iter (+/- 168) = 388 MB/s test vec::tests::bench_clone_from_1000_0100 ... bench: 356 ns/iter (+/- 20) = 280 MB/s test vec::tests::bench_clone_from_1000_1000 ... bench: 2605 ns/iter (+/- 167) = 383 MB/s test vec::tests::bench_from_slice_0000 ... bench: 11 ns/iter (+/- 0) test vec::tests::bench_from_slice_0010 ... bench: 115 ns/iter (+/- 5) = 86 MB/s test vec::tests::bench_from_slice_0100 ... bench: 309 ns/iter (+/- 170) = 323 MB/s test vec::tests::bench_from_slice_1000 ... bench: 2065 ns/iter (+/- 198) = 484 MB/s test vec::tests::bench_push_all_0000_0000 ... bench: 7 ns/iter (+/- 0) test vec::tests::bench_push_all_0000_0010 ... bench: 79 ns/iter (+/- 7) = 126 MB/s test vec::tests::bench_push_all_0000_0100 ... bench: 342 ns/iter (+/- 18) = 292 MB/s test vec::tests::bench_push_all_0000_1000 ... bench: 2873 ns/iter (+/- 75) = 348 MB/s test vec::tests::bench_push_all_0010_0010 ... bench: 154 ns/iter (+/- 8) = 64 MB/s test vec::tests::bench_push_all_0100_0100 ... bench: 518 ns/iter (+/- 18) = 193 MB/s test vec::tests::bench_push_all_1000_1000 ... bench: 4490 ns/iter (+/- 223) = 222 MB/s after: test vec::tests::bench_clone_from_0000_0000 ... bench: 12 ns/iter (+/- 1) test vec::tests::bench_clone_from_0000_0010 ... bench: 123 ns/iter (+/- 5) = 81 MB/s test vec::tests::bench_clone_from_0000_0100 ... bench: 367 ns/iter (+/- 23) = 272 MB/s test vec::tests::bench_clone_from_0000_1000 ... bench: 2618 ns/iter (+/- 252) = 381 MB/s test vec::tests::bench_clone_from_0010_0000 ... bench: 12 ns/iter (+/- 1) test vec::tests::bench_clone_from_0010_0010 ... bench: 124 ns/iter (+/- 7) = 80 MB/s test vec::tests::bench_clone_from_0010_0100 ... bench: 369 ns/iter (+/- 34) = 271 MB/s test vec::tests::bench_clone_from_0100_0010 ... bench: 123 ns/iter (+/- 6) = 81 MB/s test vec::tests::bench_clone_from_0100_0100 ... bench: 371 ns/iter (+/- 25) = 269 MB/s test vec::tests::bench_clone_from_0100_1000 ... bench: 2713 ns/iter (+/- 532) = 368 MB/s test vec::tests::bench_clone_from_1000_0100 ... bench: 369 ns/iter (+/- 14) = 271 MB/s test vec::tests::bench_clone_from_1000_1000 ... bench: 2611 ns/iter (+/- 194) = 382 MB/s test vec::tests::bench_from_slice_0000 ... bench: 7 ns/iter (+/- 0) test vec::tests::bench_from_slice_0010 ... bench: 108 ns/iter (+/- 4) = 92 MB/s test vec::tests::bench_from_slice_0100 ... bench: 235 ns/iter (+/- 24) = 425 MB/s test vec::tests::bench_from_slice_1000 ... bench: 1318 ns/iter (+/- 96) = 758 MB/s test vec::tests::bench_push_all_0000_0000 ... bench: 7 ns/iter (+/- 0) test vec::tests::bench_push_all_0000_0010 ... bench: 70 ns/iter (+/- 4) = 142 MB/s test vec::tests::bench_push_all_0000_0100 ... bench: 176 ns/iter (+/- 16) = 568 MB/s test vec::tests::bench_push_all_0000_1000 ... bench: 1125 ns/iter (+/- 94) = 888 MB/s test vec::tests::bench_push_all_0010_0010 ... bench: 159 ns/iter (+/- 15) = 62 MB/s test vec::tests::bench_push_all_0100_0100 ... bench: 363 ns/iter (+/- 12) = 275 MB/s test vec::tests::bench_push_all_1000_1000 ... bench: 2860 ns/iter (+/- 415) = 349 MB/s
This commit is contained in:
parent
065b98d577
commit
f1ea540e90
1 changed files with 44 additions and 31 deletions
|
@ -197,7 +197,9 @@ impl<T: Clone> Vec<T> {
|
|||
/// ```
|
||||
#[inline]
|
||||
pub fn from_slice(values: &[T]) -> Vec<T> {
|
||||
values.iter().map(|x| x.clone()).collect()
|
||||
let mut vector = Vec::with_capacity(values.len());
|
||||
vector.push_all(values);
|
||||
vector
|
||||
}
|
||||
|
||||
/// Constructs a `Vec` with copies of a value.
|
||||
|
@ -238,7 +240,10 @@ impl<T: Clone> Vec<T> {
|
|||
/// ```
|
||||
#[inline]
|
||||
pub fn push_all(&mut self, other: &[T]) {
|
||||
self.extend(other.iter().map(|e| e.clone()));
|
||||
unsafe {
|
||||
self.reserve_additional(other.len());
|
||||
unsafe_push_all_clone(self, other)
|
||||
}
|
||||
}
|
||||
|
||||
/// Grows the `Vec` in-place.
|
||||
|
@ -318,41 +323,31 @@ impl<T: Clone> Vec<T> {
|
|||
#[unstable]
|
||||
impl<T:Clone> Clone for Vec<T> {
|
||||
fn clone(&self) -> Vec<T> {
|
||||
let len = self.len;
|
||||
let mut vector = Vec::with_capacity(len);
|
||||
// Unsafe code so this can be optimised to a memcpy (or something
|
||||
// similarly fast) when T is Copy. LLVM is easily confused, so any
|
||||
// extra operations during the loop can prevent this optimisation
|
||||
{
|
||||
let this_slice = self.as_slice();
|
||||
while vector.len < len {
|
||||
unsafe {
|
||||
let len = vector.len;
|
||||
ptr::write(
|
||||
vector.as_mut_slice().unsafe_mut_ref(len),
|
||||
this_slice.unsafe_ref(len).clone());
|
||||
}
|
||||
vector.len += 1;
|
||||
}
|
||||
unsafe {
|
||||
let mut vector = Vec::with_capacity(self.len);
|
||||
unsafe_push_all_clone(&mut vector, self.as_slice());
|
||||
vector
|
||||
}
|
||||
vector
|
||||
}
|
||||
|
||||
fn clone_from(&mut self, other: &Vec<T>) {
|
||||
// drop anything in self that will not be overwritten
|
||||
if self.len() > other.len() {
|
||||
self.truncate(other.len())
|
||||
}
|
||||
unsafe {
|
||||
// drop anything in self that will not be overwritten
|
||||
if self.len() > other.len() {
|
||||
self.truncate(other.len())
|
||||
}
|
||||
|
||||
// reuse the contained values' allocations/resources.
|
||||
for (place, thing) in self.mut_iter().zip(other.iter()) {
|
||||
place.clone_from(thing)
|
||||
}
|
||||
// reuse the contained values' allocations/resources.
|
||||
for (place, thing) in self.mut_iter().zip(other.iter()) {
|
||||
place.clone_from(thing)
|
||||
}
|
||||
|
||||
// self.len <= other.len due to the truncate above, so the
|
||||
// slice here is always in-bounds.
|
||||
let len = self.len();
|
||||
self.extend(other.slice_from(len).iter().map(|x| x.clone()));
|
||||
// self.len <= other.len due to the truncate above, so the
|
||||
// slice here is always in-bounds.
|
||||
let slice = other.slice_from(self.len());
|
||||
self.reserve_additional(slice.len());
|
||||
unsafe_push_all_clone(self, slice)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1555,6 +1550,24 @@ pub mod raw {
|
|||
}
|
||||
}
|
||||
|
||||
// Unsafe code so this can be optimised to a memcpy (or something similarly
|
||||
// fast) when T is Copy. LLVM is easily confused, so any extra operations
|
||||
// during the loop can prevent this optimisation.
|
||||
//
|
||||
// WARNING: You must preallocate space on the vector before you call this
|
||||
// method.
|
||||
#[inline(always)]
|
||||
unsafe fn unsafe_push_all_clone<T: Clone>(dst: &mut Vec<T>, src: &[T]) {
|
||||
let mut dst_len = dst.len();
|
||||
|
||||
for i in range(0, src.len()) {
|
||||
ptr::write(
|
||||
dst.as_mut_slice().unsafe_mut_ref(dst_len),
|
||||
src.unsafe_ref(i).clone());
|
||||
dst_len += 1;
|
||||
dst.set_len(dst_len);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
Loading…
Reference in a new issue