Auto merge of #88448 - xu-cheng:btree-blk-build, r=Mark-Simulacrum

BTreeMap/BTreeSet::from_iter: use bulk building to improve the performance

Bulk building is a common technique to increase the performance of building a fresh btree map. Instead of inserting items one-by-one, we sort all the items beforehand then create the BtreeMap in bulk.

Benchmark
```
./x.py bench library/alloc --test-args btree::map::from_iter
```

* Before
```
test btree::map::from_iter_rand_100                      ... bench:       3,694 ns/iter (+/- 840)
test btree::map::from_iter_rand_10_000                   ... bench:   1,033,446 ns/iter (+/- 192,950)
test btree::map::from_iter_seq_100                       ... bench:       5,689 ns/iter (+/- 1,259)
test btree::map::from_iter_seq_10_000                    ... bench:     861,033 ns/iter (+/- 118,815)
```

* After
```
test btree::map::from_iter_rand_100                      ... bench:       3,033 ns/iter (+/- 707)
test btree::map::from_iter_rand_10_000                   ... bench:     775,958 ns/iter (+/- 105,152)
test btree::map::from_iter_seq_100                       ... bench:       2,969 ns/iter (+/- 336)
test btree::map::from_iter_seq_10_000                    ... bench:     258,292 ns/iter (+/- 29,364)
```
This commit is contained in:
bors 2021-09-07 02:24:11 +00:00
commit ffaf857045
5 changed files with 151 additions and 10 deletions

View file

@ -54,6 +54,50 @@ macro_rules! map_insert_seq_bench {
};
}
macro_rules! map_from_iter_rand_bench {
($name: ident, $n: expr, $map: ident) => {
#[bench]
pub fn $name(b: &mut Bencher) {
let n: usize = $n;
// setup
let mut rng = thread_rng();
let mut vec = Vec::with_capacity(n);
for _ in 0..n {
let i = rng.gen::<usize>() % n;
vec.push((i, i));
}
// measure
b.iter(|| {
let map: $map<_, _> = vec.iter().copied().collect();
black_box(map);
});
}
};
}
macro_rules! map_from_iter_seq_bench {
($name: ident, $n: expr, $map: ident) => {
#[bench]
pub fn $name(b: &mut Bencher) {
let n: usize = $n;
// setup
let mut vec = Vec::with_capacity(n);
for i in 0..n {
vec.push((i, i));
}
// measure
b.iter(|| {
let map: $map<_, _> = vec.iter().copied().collect();
black_box(map);
});
}
};
}
macro_rules! map_find_rand_bench {
($name: ident, $n: expr, $map: ident) => {
#[bench]
@ -111,6 +155,12 @@ map_insert_rand_bench! {insert_rand_10_000, 10_000, BTreeMap}
map_insert_seq_bench! {insert_seq_100, 100, BTreeMap}
map_insert_seq_bench! {insert_seq_10_000, 10_000, BTreeMap}
map_from_iter_rand_bench! {from_iter_rand_100, 100, BTreeMap}
map_from_iter_rand_bench! {from_iter_rand_10_000, 10_000, BTreeMap}
map_from_iter_seq_bench! {from_iter_seq_100, 100, BTreeMap}
map_from_iter_seq_bench! {from_iter_seq_10_000, 10_000, BTreeMap}
map_find_rand_bench! {find_rand_100, 100, BTreeMap}
map_find_rand_bench! {find_rand_10_000, 10_000, BTreeMap}

View file

@ -0,0 +1,47 @@
use core::iter::Peekable;
/// A iterator for deduping the key of a sorted iterator.
/// When encountering the duplicated key, only the last key-value pair is yielded.
///
/// Used by [`BTreeMap::bulk_build_from_sorted_iter`].
pub struct DedupSortedIter<K, V, I>
where
I: Iterator<Item = (K, V)>,
{
iter: Peekable<I>,
}
impl<K, V, I> DedupSortedIter<K, V, I>
where
I: Iterator<Item = (K, V)>,
{
pub fn new(iter: I) -> Self {
Self { iter: iter.peekable() }
}
}
impl<K, V, I> Iterator for DedupSortedIter<K, V, I>
where
K: Eq,
I: Iterator<Item = (K, V)>,
{
type Item = (K, V);
fn next(&mut self) -> Option<(K, V)> {
loop {
let next = match self.iter.next() {
Some(next) => next,
None => return None,
};
let peeked = match self.iter.peek() {
Some(peeked) => peeked,
None => return Some(next),
};
if next.0 != peeked.0 {
return Some(next);
}
}
}
}

View file

@ -1,3 +1,4 @@
use crate::vec::Vec;
use core::borrow::Borrow;
use core::cmp::Ordering;
use core::fmt::{self, Debug};
@ -9,6 +10,7 @@ use core::ops::{Index, RangeBounds};
use core::ptr;
use super::borrow::DormantMutRef;
use super::dedup_sorted_iter::DedupSortedIter;
use super::navigate::{LazyLeafRange, LeafRange};
use super::node::{self, marker, ForceResult::*, Handle, NodeRef, Root};
use super::search::SearchResult::*;
@ -1285,6 +1287,18 @@ impl<K, V> BTreeMap<K, V> {
pub fn into_values(self) -> IntoValues<K, V> {
IntoValues { inner: self.into_iter() }
}
/// Makes a `BTreeMap` from a sorted iterator.
pub(crate) fn bulk_build_from_sorted_iter<I>(iter: I) -> Self
where
K: Ord,
I: Iterator<Item = (K, V)>,
{
let mut root = Root::new();
let mut length = 0;
root.bulk_push(DedupSortedIter::new(iter), &mut length);
BTreeMap { root: Some(root), length }
}
}
#[stable(feature = "rust1", since = "1.0.0")]
@ -1909,9 +1923,15 @@ impl<K, V> FusedIterator for RangeMut<'_, K, V> {}
#[stable(feature = "rust1", since = "1.0.0")]
impl<K: Ord, V> FromIterator<(K, V)> for BTreeMap<K, V> {
fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> BTreeMap<K, V> {
let mut map = BTreeMap::new();
map.extend(iter);
map
let mut inputs: Vec<_> = iter.into_iter().collect();
if inputs.is_empty() {
return BTreeMap::new();
}
// use stable sort to preserve the insertion order.
inputs.sort_by(|a, b| a.0.cmp(&b.0));
BTreeMap::bulk_build_from_sorted_iter(inputs.into_iter())
}
}
@ -2020,8 +2040,14 @@ impl<K: Ord, V, const N: usize> From<[(K, V); N]> for BTreeMap<K, V> {
/// let map2: BTreeMap<_, _> = [(1, 2), (3, 4)].into();
/// assert_eq!(map1, map2);
/// ```
fn from(arr: [(K, V); N]) -> Self {
core::array::IntoIter::new(arr).collect()
fn from(mut arr: [(K, V); N]) -> Self {
if N == 0 {
return BTreeMap::new();
}
// use stable sort to preserve the insertion order.
arr.sort_by(|a, b| a.0.cmp(&b.0));
BTreeMap::bulk_build_from_sorted_iter(core::array::IntoIter::new(arr))
}
}

View file

@ -1,5 +1,6 @@
mod append;
mod borrow;
mod dedup_sorted_iter;
mod fix;
pub mod map;
mod mem;

View file

@ -1,6 +1,7 @@
// This is pretty much entirely stolen from TreeSet, since BTreeMap has an identical interface
// to TreeMap
use crate::vec::Vec;
use core::borrow::Borrow;
use core::cmp::Ordering::{Equal, Greater, Less};
use core::cmp::{max, min};
@ -1056,9 +1057,17 @@ impl<T> BTreeSet<T> {
#[stable(feature = "rust1", since = "1.0.0")]
impl<T: Ord> FromIterator<T> for BTreeSet<T> {
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> BTreeSet<T> {
let mut set = BTreeSet::new();
set.extend(iter);
set
let mut inputs: Vec<_> = iter.into_iter().collect();
if inputs.is_empty() {
return BTreeSet::new();
}
// use stable sort to preserve the insertion order.
inputs.sort();
let iter = inputs.into_iter().map(|k| (k, ()));
let map = BTreeMap::bulk_build_from_sorted_iter(iter);
BTreeSet { map }
}
}
@ -1071,8 +1080,16 @@ impl<T: Ord, const N: usize> From<[T; N]> for BTreeSet<T> {
/// let set2: BTreeSet<_> = [1, 2, 3, 4].into();
/// assert_eq!(set1, set2);
/// ```
fn from(arr: [T; N]) -> Self {
core::array::IntoIter::new(arr).collect()
fn from(mut arr: [T; N]) -> Self {
if N == 0 {
return BTreeSet::new();
}
// use stable sort to preserve the insertion order.
arr.sort();
let iter = core::array::IntoIter::new(arr).map(|k| (k, ()));
let map = BTreeMap::bulk_build_from_sorted_iter(iter);
BTreeSet { map }
}
}