Auto merge of #88448 - xu-cheng:btree-blk-build, r=Mark-Simulacrum
BTreeMap/BTreeSet::from_iter: use bulk building to improve the performance Bulk building is a common technique to increase the performance of building a fresh btree map. Instead of inserting items one-by-one, we sort all the items beforehand then create the BtreeMap in bulk. Benchmark ``` ./x.py bench library/alloc --test-args btree::map::from_iter ``` * Before ``` test btree::map::from_iter_rand_100 ... bench: 3,694 ns/iter (+/- 840) test btree::map::from_iter_rand_10_000 ... bench: 1,033,446 ns/iter (+/- 192,950) test btree::map::from_iter_seq_100 ... bench: 5,689 ns/iter (+/- 1,259) test btree::map::from_iter_seq_10_000 ... bench: 861,033 ns/iter (+/- 118,815) ``` * After ``` test btree::map::from_iter_rand_100 ... bench: 3,033 ns/iter (+/- 707) test btree::map::from_iter_rand_10_000 ... bench: 775,958 ns/iter (+/- 105,152) test btree::map::from_iter_seq_100 ... bench: 2,969 ns/iter (+/- 336) test btree::map::from_iter_seq_10_000 ... bench: 258,292 ns/iter (+/- 29,364) ```
This commit is contained in:
commit
ffaf857045
5 changed files with 151 additions and 10 deletions
|
@ -54,6 +54,50 @@ macro_rules! map_insert_seq_bench {
|
|||
};
|
||||
}
|
||||
|
||||
macro_rules! map_from_iter_rand_bench {
|
||||
($name: ident, $n: expr, $map: ident) => {
|
||||
#[bench]
|
||||
pub fn $name(b: &mut Bencher) {
|
||||
let n: usize = $n;
|
||||
// setup
|
||||
let mut rng = thread_rng();
|
||||
let mut vec = Vec::with_capacity(n);
|
||||
|
||||
for _ in 0..n {
|
||||
let i = rng.gen::<usize>() % n;
|
||||
vec.push((i, i));
|
||||
}
|
||||
|
||||
// measure
|
||||
b.iter(|| {
|
||||
let map: $map<_, _> = vec.iter().copied().collect();
|
||||
black_box(map);
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! map_from_iter_seq_bench {
|
||||
($name: ident, $n: expr, $map: ident) => {
|
||||
#[bench]
|
||||
pub fn $name(b: &mut Bencher) {
|
||||
let n: usize = $n;
|
||||
// setup
|
||||
let mut vec = Vec::with_capacity(n);
|
||||
|
||||
for i in 0..n {
|
||||
vec.push((i, i));
|
||||
}
|
||||
|
||||
// measure
|
||||
b.iter(|| {
|
||||
let map: $map<_, _> = vec.iter().copied().collect();
|
||||
black_box(map);
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! map_find_rand_bench {
|
||||
($name: ident, $n: expr, $map: ident) => {
|
||||
#[bench]
|
||||
|
@ -111,6 +155,12 @@ map_insert_rand_bench! {insert_rand_10_000, 10_000, BTreeMap}
|
|||
map_insert_seq_bench! {insert_seq_100, 100, BTreeMap}
|
||||
map_insert_seq_bench! {insert_seq_10_000, 10_000, BTreeMap}
|
||||
|
||||
map_from_iter_rand_bench! {from_iter_rand_100, 100, BTreeMap}
|
||||
map_from_iter_rand_bench! {from_iter_rand_10_000, 10_000, BTreeMap}
|
||||
|
||||
map_from_iter_seq_bench! {from_iter_seq_100, 100, BTreeMap}
|
||||
map_from_iter_seq_bench! {from_iter_seq_10_000, 10_000, BTreeMap}
|
||||
|
||||
map_find_rand_bench! {find_rand_100, 100, BTreeMap}
|
||||
map_find_rand_bench! {find_rand_10_000, 10_000, BTreeMap}
|
||||
|
||||
|
|
47
library/alloc/src/collections/btree/dedup_sorted_iter.rs
Normal file
47
library/alloc/src/collections/btree/dedup_sorted_iter.rs
Normal file
|
@ -0,0 +1,47 @@
|
|||
use core::iter::Peekable;
|
||||
|
||||
/// A iterator for deduping the key of a sorted iterator.
|
||||
/// When encountering the duplicated key, only the last key-value pair is yielded.
|
||||
///
|
||||
/// Used by [`BTreeMap::bulk_build_from_sorted_iter`].
|
||||
pub struct DedupSortedIter<K, V, I>
|
||||
where
|
||||
I: Iterator<Item = (K, V)>,
|
||||
{
|
||||
iter: Peekable<I>,
|
||||
}
|
||||
|
||||
impl<K, V, I> DedupSortedIter<K, V, I>
|
||||
where
|
||||
I: Iterator<Item = (K, V)>,
|
||||
{
|
||||
pub fn new(iter: I) -> Self {
|
||||
Self { iter: iter.peekable() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V, I> Iterator for DedupSortedIter<K, V, I>
|
||||
where
|
||||
K: Eq,
|
||||
I: Iterator<Item = (K, V)>,
|
||||
{
|
||||
type Item = (K, V);
|
||||
|
||||
fn next(&mut self) -> Option<(K, V)> {
|
||||
loop {
|
||||
let next = match self.iter.next() {
|
||||
Some(next) => next,
|
||||
None => return None,
|
||||
};
|
||||
|
||||
let peeked = match self.iter.peek() {
|
||||
Some(peeked) => peeked,
|
||||
None => return Some(next),
|
||||
};
|
||||
|
||||
if next.0 != peeked.0 {
|
||||
return Some(next);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,3 +1,4 @@
|
|||
use crate::vec::Vec;
|
||||
use core::borrow::Borrow;
|
||||
use core::cmp::Ordering;
|
||||
use core::fmt::{self, Debug};
|
||||
|
@ -9,6 +10,7 @@ use core::ops::{Index, RangeBounds};
|
|||
use core::ptr;
|
||||
|
||||
use super::borrow::DormantMutRef;
|
||||
use super::dedup_sorted_iter::DedupSortedIter;
|
||||
use super::navigate::{LazyLeafRange, LeafRange};
|
||||
use super::node::{self, marker, ForceResult::*, Handle, NodeRef, Root};
|
||||
use super::search::SearchResult::*;
|
||||
|
@ -1285,6 +1287,18 @@ impl<K, V> BTreeMap<K, V> {
|
|||
pub fn into_values(self) -> IntoValues<K, V> {
|
||||
IntoValues { inner: self.into_iter() }
|
||||
}
|
||||
|
||||
/// Makes a `BTreeMap` from a sorted iterator.
|
||||
pub(crate) fn bulk_build_from_sorted_iter<I>(iter: I) -> Self
|
||||
where
|
||||
K: Ord,
|
||||
I: Iterator<Item = (K, V)>,
|
||||
{
|
||||
let mut root = Root::new();
|
||||
let mut length = 0;
|
||||
root.bulk_push(DedupSortedIter::new(iter), &mut length);
|
||||
BTreeMap { root: Some(root), length }
|
||||
}
|
||||
}
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
|
@ -1909,9 +1923,15 @@ impl<K, V> FusedIterator for RangeMut<'_, K, V> {}
|
|||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl<K: Ord, V> FromIterator<(K, V)> for BTreeMap<K, V> {
|
||||
fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> BTreeMap<K, V> {
|
||||
let mut map = BTreeMap::new();
|
||||
map.extend(iter);
|
||||
map
|
||||
let mut inputs: Vec<_> = iter.into_iter().collect();
|
||||
|
||||
if inputs.is_empty() {
|
||||
return BTreeMap::new();
|
||||
}
|
||||
|
||||
// use stable sort to preserve the insertion order.
|
||||
inputs.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
BTreeMap::bulk_build_from_sorted_iter(inputs.into_iter())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2020,8 +2040,14 @@ impl<K: Ord, V, const N: usize> From<[(K, V); N]> for BTreeMap<K, V> {
|
|||
/// let map2: BTreeMap<_, _> = [(1, 2), (3, 4)].into();
|
||||
/// assert_eq!(map1, map2);
|
||||
/// ```
|
||||
fn from(arr: [(K, V); N]) -> Self {
|
||||
core::array::IntoIter::new(arr).collect()
|
||||
fn from(mut arr: [(K, V); N]) -> Self {
|
||||
if N == 0 {
|
||||
return BTreeMap::new();
|
||||
}
|
||||
|
||||
// use stable sort to preserve the insertion order.
|
||||
arr.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
BTreeMap::bulk_build_from_sorted_iter(core::array::IntoIter::new(arr))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
mod append;
|
||||
mod borrow;
|
||||
mod dedup_sorted_iter;
|
||||
mod fix;
|
||||
pub mod map;
|
||||
mod mem;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// This is pretty much entirely stolen from TreeSet, since BTreeMap has an identical interface
|
||||
// to TreeMap
|
||||
|
||||
use crate::vec::Vec;
|
||||
use core::borrow::Borrow;
|
||||
use core::cmp::Ordering::{Equal, Greater, Less};
|
||||
use core::cmp::{max, min};
|
||||
|
@ -1056,9 +1057,17 @@ impl<T> BTreeSet<T> {
|
|||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl<T: Ord> FromIterator<T> for BTreeSet<T> {
|
||||
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> BTreeSet<T> {
|
||||
let mut set = BTreeSet::new();
|
||||
set.extend(iter);
|
||||
set
|
||||
let mut inputs: Vec<_> = iter.into_iter().collect();
|
||||
|
||||
if inputs.is_empty() {
|
||||
return BTreeSet::new();
|
||||
}
|
||||
|
||||
// use stable sort to preserve the insertion order.
|
||||
inputs.sort();
|
||||
let iter = inputs.into_iter().map(|k| (k, ()));
|
||||
let map = BTreeMap::bulk_build_from_sorted_iter(iter);
|
||||
BTreeSet { map }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1071,8 +1080,16 @@ impl<T: Ord, const N: usize> From<[T; N]> for BTreeSet<T> {
|
|||
/// let set2: BTreeSet<_> = [1, 2, 3, 4].into();
|
||||
/// assert_eq!(set1, set2);
|
||||
/// ```
|
||||
fn from(arr: [T; N]) -> Self {
|
||||
core::array::IntoIter::new(arr).collect()
|
||||
fn from(mut arr: [T; N]) -> Self {
|
||||
if N == 0 {
|
||||
return BTreeSet::new();
|
||||
}
|
||||
|
||||
// use stable sort to preserve the insertion order.
|
||||
arr.sort();
|
||||
let iter = core::array::IntoIter::new(arr).map(|k| (k, ()));
|
||||
let map = BTreeMap::bulk_build_from_sorted_iter(iter);
|
||||
BTreeSet { map }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue