Rollup merge of #56648 - RalfJung:btree, r=sfackler
Fix BTreeMap UB BTreeMap currently causes UB by created a shared reference to a too-small allocation. This PR fixes that by introducing a `NodeHeader` type and using that until we really need access to the key/value arrays. Avoiding run-time checks in `into_key_slice` was somewhat tricky, see the comments embedded in the code. I also adjusted `as_leaf_mut` to return a raw pointer, because creating a mutable reference asserts that there are no aliases to the pointee, but that's not always correct: We use `as_leaf_mut` twice to create two mutable slices for keys and values; the second call overlaps with the first slice and hence is not a unique pointer. Fixes https://github.com/rust-lang/rust/issues/54957 Cc @nikomatsakis @Gankro
This commit is contained in:
commit
9133798b92
1 changed files with 117 additions and 57 deletions
|
@ -58,9 +58,34 @@ pub const CAPACITY: usize = 2 * B - 1;
|
|||
/// these should always be put behind pointers, and specifically behind `BoxedNode` in the owned
|
||||
/// case.
|
||||
///
|
||||
/// We put the metadata first so that its position is the same for every `K` and `V`, in order
|
||||
/// to statically allocate a single dummy node to avoid allocations. This struct is `repr(C)` to
|
||||
/// prevent them from being reordered.
|
||||
/// We have a separate type for the header and rely on it matching the prefix of `LeafNode`, in
|
||||
/// order to statically allocate a single dummy node to avoid allocations. This struct is
|
||||
/// `repr(C)` to prevent them from being reordered. `LeafNode` does not just contain a
|
||||
/// `NodeHeader` because we do not want unnecessary padding between `len` and the keys.
|
||||
/// Crucially, `NodeHeader` can be safely transmuted to different K and V. (This is exploited
|
||||
/// by `as_header`.)
|
||||
/// See `into_key_slice` for an explanation of K2. K2 cannot be safely transmuted around
|
||||
/// because the size of `NodeHeader` depends on its alignment!
|
||||
#[repr(C)]
|
||||
struct NodeHeader<K, V, K2 = ()> {
|
||||
/// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`.
|
||||
/// This either points to an actual node or is null.
|
||||
parent: *const InternalNode<K, V>,
|
||||
|
||||
/// This node's index into the parent node's `edges` array.
|
||||
/// `*node.parent.edges[node.parent_idx]` should be the same thing as `node`.
|
||||
/// This is only guaranteed to be initialized when `parent` is non-null.
|
||||
parent_idx: MaybeUninit<u16>,
|
||||
|
||||
/// The number of keys and values this node stores.
|
||||
///
|
||||
/// This next to `parent_idx` to encourage the compiler to join `len` and
|
||||
/// `parent_idx` into the same 32-bit word, reducing space overhead.
|
||||
len: u16,
|
||||
|
||||
/// See `into_key_slice`.
|
||||
keys_start: [K2; 0],
|
||||
}
|
||||
#[repr(C)]
|
||||
struct LeafNode<K, V> {
|
||||
/// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`.
|
||||
|
@ -98,24 +123,25 @@ impl<K, V> LeafNode<K, V> {
|
|||
len: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V> NodeHeader<K, V> {
|
||||
fn is_shared_root(&self) -> bool {
|
||||
ptr::eq(self, &EMPTY_ROOT_NODE as *const _ as *const _)
|
||||
}
|
||||
}
|
||||
|
||||
// We need to implement Sync here in order to make a static instance.
|
||||
unsafe impl Sync for LeafNode<(), ()> {}
|
||||
unsafe impl Sync for NodeHeader<(), ()> {}
|
||||
|
||||
// An empty node used as a placeholder for the root node, to avoid allocations.
|
||||
// We use () in order to save space, since no operation on an empty tree will
|
||||
// We use just a header in order to save space, since no operation on an empty tree will
|
||||
// ever take a pointer past the first key.
|
||||
static EMPTY_ROOT_NODE: LeafNode<(), ()> = LeafNode {
|
||||
static EMPTY_ROOT_NODE: NodeHeader<(), ()> = NodeHeader {
|
||||
parent: ptr::null(),
|
||||
parent_idx: MaybeUninit::uninitialized(),
|
||||
len: 0,
|
||||
keys: MaybeUninit::uninitialized(),
|
||||
vals: MaybeUninit::uninitialized(),
|
||||
keys_start: [],
|
||||
};
|
||||
|
||||
/// The underlying representation of internal nodes. As with `LeafNode`s, these should be hidden
|
||||
|
@ -281,7 +307,7 @@ impl<K, V> Root<K, V> {
|
|||
.node)
|
||||
};
|
||||
self.height -= 1;
|
||||
self.as_mut().as_leaf_mut().parent = ptr::null();
|
||||
unsafe { (*self.as_mut().as_leaf_mut()).parent = ptr::null(); }
|
||||
|
||||
unsafe {
|
||||
Global.dealloc(NonNull::from(top).cast(), Layout::new::<InternalNode<K, V>>());
|
||||
|
@ -306,6 +332,11 @@ impl<K, V> Root<K, V> {
|
|||
/// `Leaf`, the `NodeRef` points to a leaf node, when this is `Internal` the
|
||||
/// `NodeRef` points to an internal node, and when this is `LeafOrInternal` the
|
||||
/// `NodeRef` could be pointing to either type of node.
|
||||
/// Note that in case of a leaf node, this might still be the shared root! Only turn
|
||||
/// this into a `LeafNode` reference if you know it is not a root! Shared references
|
||||
/// must be dereferencable *for the entire size of their pointee*, so `&InternalNode`
|
||||
/// pointing to the shared root is UB.
|
||||
/// Turning this into a `NodeHeader` is always safe.
|
||||
pub struct NodeRef<BorrowType, K, V, Type> {
|
||||
height: usize,
|
||||
node: NonNull<LeafNode<K, V>>,
|
||||
|
@ -352,7 +383,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
|
|||
/// Finds the length of the node. This is the number of keys or values. In an
|
||||
/// internal node, the number of edges is `len() + 1`.
|
||||
pub fn len(&self) -> usize {
|
||||
self.as_leaf().len as usize
|
||||
self.as_header().len as usize
|
||||
}
|
||||
|
||||
/// Returns the height of this node in the whole tree. Zero height denotes the
|
||||
|
@ -382,14 +413,19 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
|
|||
}
|
||||
}
|
||||
|
||||
fn as_leaf(&self) -> &LeafNode<K, V> {
|
||||
/// Assert that this is indeed a proper leaf node, and not the shared root.
|
||||
unsafe fn as_leaf(&self) -> &LeafNode<K, V> {
|
||||
self.node.as_ref()
|
||||
}
|
||||
|
||||
fn as_header(&self) -> &NodeHeader<K, V> {
|
||||
unsafe {
|
||||
self.node.as_ref()
|
||||
&*(self.node.as_ptr() as *const NodeHeader<K, V>)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_shared_root(&self) -> bool {
|
||||
self.as_leaf().is_shared_root()
|
||||
self.as_header().is_shared_root()
|
||||
}
|
||||
|
||||
pub fn keys(&self) -> &[K] {
|
||||
|
@ -418,7 +454,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
|
|||
>,
|
||||
Self
|
||||
> {
|
||||
let parent_as_leaf = self.as_leaf().parent as *const LeafNode<K, V>;
|
||||
let parent_as_leaf = self.as_header().parent as *const LeafNode<K, V>;
|
||||
if let Some(non_zero) = NonNull::new(parent_as_leaf as *mut _) {
|
||||
Ok(Handle {
|
||||
node: NodeRef {
|
||||
|
@ -427,7 +463,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
|
|||
root: self.root,
|
||||
_marker: PhantomData
|
||||
},
|
||||
idx: unsafe { usize::from(*self.as_leaf().parent_idx.get_ref()) },
|
||||
idx: unsafe { usize::from(*self.as_header().parent_idx.get_ref()) },
|
||||
_marker: PhantomData
|
||||
})
|
||||
} else {
|
||||
|
@ -534,10 +570,10 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
|
|||
}
|
||||
}
|
||||
|
||||
fn as_leaf_mut(&mut self) -> &mut LeafNode<K, V> {
|
||||
unsafe {
|
||||
self.node.as_mut()
|
||||
}
|
||||
/// Returns a raw ptr to avoid asserting exclusive access to the entire node.
|
||||
fn as_leaf_mut(&mut self) -> *mut LeafNode<K, V> {
|
||||
// We are mutable, so we cannot be the root, so accessing this as a leaf is okay.
|
||||
self.node.as_ptr()
|
||||
}
|
||||
|
||||
fn keys_mut(&mut self) -> &mut [K] {
|
||||
|
@ -551,28 +587,50 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
|
|||
|
||||
impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Immut<'a>, K, V, Type> {
|
||||
fn into_key_slice(self) -> &'a [K] {
|
||||
// When taking a pointer to the keys, if our key has a stricter
|
||||
// alignment requirement than the shared root does, then the pointer
|
||||
// would be out of bounds, which LLVM assumes will not happen. If the
|
||||
// alignment is more strict, we need to make an empty slice that doesn't
|
||||
// use an out of bounds pointer.
|
||||
// We have to be careful here because we might be pointing to the shared root.
|
||||
// In that case, we must not create an `&LeafNode`. We could just return
|
||||
// an empty slice whenever the length is 0 (this includes the shared root),
|
||||
// but we want to avoid that run-time check.
|
||||
// Instead, we create a slice pointing into the node whenever possible.
|
||||
// We can sometimes do this even for the shared root, as the slice will be
|
||||
// empty. We cannot *always* do this because if the type is too highly
|
||||
// aligned, the offset of `keys` in a "full node" might be outside the bounds
|
||||
// of the header! So we do an alignment check first, that will be
|
||||
// evaluated at compile-time, and only do any run-time check in the rare case
|
||||
// that the alignment is very big.
|
||||
if mem::align_of::<K>() > mem::align_of::<LeafNode<(), ()>>() && self.is_shared_root() {
|
||||
&[]
|
||||
} else {
|
||||
// Here either it's not the root, or the alignment is less strict,
|
||||
// in which case the keys pointer will point "one-past-the-end" of
|
||||
// the node, which is allowed by LLVM.
|
||||
// Thanks to the alignment check above, we know that `keys` will be
|
||||
// in-bounds of some allocation even if this is the shared root!
|
||||
// (We might be one-past-the-end, but that is allowed by LLVM.)
|
||||
// Getting the pointer is tricky though. `NodeHeader` does not have a `keys`
|
||||
// field because we want its size to not depend on the alignment of `K`
|
||||
// (needed becuase `as_header` should be safe). We cannot call `as_leaf`
|
||||
// because we might be the shared root.
|
||||
// For this reason, `NodeHeader` has this `K2` parameter (that's usually `()`
|
||||
// and hence just adds a size-0-align-1 field, not affecting layout).
|
||||
// We know that we can transmute `NodeHeader<K, V, ()>` to `NodeHeader<K, V, K>`
|
||||
// because we did the alignment check above, and hence `NodeHeader<K, V, K>`
|
||||
// is not bigger than `NodeHeader<K, V, ()>`! Then we can use `NodeHeader<K, V, K>`
|
||||
// to compute the pointer where the keys start.
|
||||
// This entire hack will become unnecessary once
|
||||
// <https://github.com/rust-lang/rfcs/pull/2582> lands, then we can just take a raw
|
||||
// pointer to the `keys` field of `*const InternalNode<K, V>`.
|
||||
|
||||
// This is a non-debug-assert because it can be completely compile-time evaluated.
|
||||
assert!(mem::size_of::<NodeHeader<K, V>>() == mem::size_of::<NodeHeader<K, V, K>>());
|
||||
let header = self.as_header() as *const _ as *const NodeHeader<K, V, K>;
|
||||
let keys = unsafe { &(*header).keys_start as *const _ as *const K };
|
||||
unsafe {
|
||||
slice::from_raw_parts(
|
||||
self.as_leaf().keys.as_ptr() as *const K,
|
||||
self.len()
|
||||
)
|
||||
slice::from_raw_parts(keys, self.len())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn into_val_slice(self) -> &'a [V] {
|
||||
debug_assert!(!self.is_shared_root());
|
||||
// We cannot be the root, so `as_leaf` is okay
|
||||
unsafe {
|
||||
slice::from_raw_parts(
|
||||
self.as_leaf().vals.as_ptr() as *const V,
|
||||
|
@ -602,7 +660,7 @@ impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
|
|||
} else {
|
||||
unsafe {
|
||||
slice::from_raw_parts_mut(
|
||||
self.as_leaf_mut().keys.as_mut_ptr() as *mut K,
|
||||
(*self.as_leaf_mut()).keys.as_mut_ptr() as *mut K,
|
||||
self.len()
|
||||
)
|
||||
}
|
||||
|
@ -613,7 +671,7 @@ impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
|
|||
debug_assert!(!self.is_shared_root());
|
||||
unsafe {
|
||||
slice::from_raw_parts_mut(
|
||||
self.as_leaf_mut().vals.as_mut_ptr() as *mut V,
|
||||
(*self.as_leaf_mut()).vals.as_mut_ptr() as *mut V,
|
||||
self.len()
|
||||
)
|
||||
}
|
||||
|
@ -637,9 +695,9 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Leaf> {
|
|||
unsafe {
|
||||
ptr::write(self.keys_mut().get_unchecked_mut(idx), key);
|
||||
ptr::write(self.vals_mut().get_unchecked_mut(idx), val);
|
||||
}
|
||||
|
||||
self.as_leaf_mut().len += 1;
|
||||
(*self.as_leaf_mut()).len += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds a key/value pair to the beginning of the node.
|
||||
|
@ -651,9 +709,9 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Leaf> {
|
|||
unsafe {
|
||||
slice_insert(self.keys_mut(), 0, key);
|
||||
slice_insert(self.vals_mut(), 0, val);
|
||||
}
|
||||
|
||||
self.as_leaf_mut().len += 1;
|
||||
(*self.as_leaf_mut()).len += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -672,7 +730,7 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Internal> {
|
|||
ptr::write(self.vals_mut().get_unchecked_mut(idx), val);
|
||||
ptr::write(self.as_internal_mut().edges.get_unchecked_mut(idx + 1), edge.node);
|
||||
|
||||
self.as_leaf_mut().len += 1;
|
||||
(*self.as_leaf_mut()).len += 1;
|
||||
|
||||
Handle::new_edge(self.reborrow_mut(), idx + 1).correct_parent_link();
|
||||
}
|
||||
|
@ -708,7 +766,7 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Internal> {
|
|||
edge.node
|
||||
);
|
||||
|
||||
self.as_leaf_mut().len += 1;
|
||||
(*self.as_leaf_mut()).len += 1;
|
||||
|
||||
self.correct_all_childrens_parent_links();
|
||||
}
|
||||
|
@ -732,12 +790,12 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal> {
|
|||
ForceResult::Internal(internal) => {
|
||||
let edge = ptr::read(internal.as_internal().edges.get_unchecked(idx + 1));
|
||||
let mut new_root = Root { node: edge, height: internal.height - 1 };
|
||||
new_root.as_mut().as_leaf_mut().parent = ptr::null();
|
||||
(*new_root.as_mut().as_leaf_mut()).parent = ptr::null();
|
||||
Some(new_root)
|
||||
}
|
||||
};
|
||||
|
||||
self.as_leaf_mut().len -= 1;
|
||||
(*self.as_leaf_mut()).len -= 1;
|
||||
(key, val, edge)
|
||||
}
|
||||
}
|
||||
|
@ -765,7 +823,7 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal> {
|
|||
);
|
||||
|
||||
let mut new_root = Root { node: edge, height: internal.height - 1 };
|
||||
new_root.as_mut().as_leaf_mut().parent = ptr::null();
|
||||
(*new_root.as_mut().as_leaf_mut()).parent = ptr::null();
|
||||
|
||||
for i in 0..old_len {
|
||||
Handle::new_edge(internal.reborrow_mut(), i).correct_parent_link();
|
||||
|
@ -775,7 +833,7 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal> {
|
|||
}
|
||||
};
|
||||
|
||||
self.as_leaf_mut().len -= 1;
|
||||
(*self.as_leaf_mut()).len -= 1;
|
||||
|
||||
(key, val, edge)
|
||||
}
|
||||
|
@ -966,7 +1024,7 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::Edge
|
|||
slice_insert(self.node.keys_mut(), self.idx, key);
|
||||
slice_insert(self.node.vals_mut(), self.idx, val);
|
||||
|
||||
self.node.as_leaf_mut().len += 1;
|
||||
(*self.node.as_leaf_mut()).len += 1;
|
||||
|
||||
self.node.vals_mut().get_unchecked_mut(self.idx)
|
||||
}
|
||||
|
@ -1009,8 +1067,10 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
|
|||
let idx = self.idx as u16;
|
||||
let ptr = self.node.as_internal_mut() as *mut _;
|
||||
let mut child = self.descend();
|
||||
child.as_leaf_mut().parent = ptr;
|
||||
child.as_leaf_mut().parent_idx.set(idx);
|
||||
unsafe {
|
||||
(*child.as_leaf_mut()).parent = ptr;
|
||||
(*child.as_leaf_mut()).parent_idx.set(idx);
|
||||
}
|
||||
}
|
||||
|
||||
/// Unsafely asserts to the compiler some static information about whether the underlying
|
||||
|
@ -1158,7 +1218,7 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::KV>
|
|||
new_len
|
||||
);
|
||||
|
||||
self.node.as_leaf_mut().len = self.idx as u16;
|
||||
(*self.node.as_leaf_mut()).len = self.idx as u16;
|
||||
new_node.len = new_len as u16;
|
||||
|
||||
(
|
||||
|
@ -1180,7 +1240,7 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::KV>
|
|||
unsafe {
|
||||
let k = slice_remove(self.node.keys_mut(), self.idx);
|
||||
let v = slice_remove(self.node.vals_mut(), self.idx);
|
||||
self.node.as_leaf_mut().len -= 1;
|
||||
(*self.node.as_leaf_mut()).len -= 1;
|
||||
(self.left_edge(), k, v)
|
||||
}
|
||||
}
|
||||
|
@ -1221,7 +1281,7 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
|
|||
new_len + 1
|
||||
);
|
||||
|
||||
self.node.as_leaf_mut().len = self.idx as u16;
|
||||
(*self.node.as_leaf_mut()).len = self.idx as u16;
|
||||
new_node.data.len = new_len as u16;
|
||||
|
||||
let mut new_root = Root {
|
||||
|
@ -1295,9 +1355,9 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
|
|||
for i in self.idx+1..self.node.len() {
|
||||
Handle::new_edge(self.node.reborrow_mut(), i).correct_parent_link();
|
||||
}
|
||||
self.node.as_leaf_mut().len -= 1;
|
||||
(*self.node.as_leaf_mut()).len -= 1;
|
||||
|
||||
left_node.as_leaf_mut().len += right_len as u16 + 1;
|
||||
(*left_node.as_leaf_mut()).len += right_len as u16 + 1;
|
||||
|
||||
if self.node.height > 1 {
|
||||
ptr::copy_nonoverlapping(
|
||||
|
@ -1407,8 +1467,8 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
|
|||
move_kv(left_kv, new_left_len, parent_kv, 0, 1);
|
||||
}
|
||||
|
||||
left_node.reborrow_mut().as_leaf_mut().len -= count as u16;
|
||||
right_node.reborrow_mut().as_leaf_mut().len += count as u16;
|
||||
(*left_node.reborrow_mut().as_leaf_mut()).len -= count as u16;
|
||||
(*right_node.reborrow_mut().as_leaf_mut()).len += count as u16;
|
||||
|
||||
match (left_node.force(), right_node.force()) {
|
||||
(ForceResult::Internal(left), ForceResult::Internal(mut right)) => {
|
||||
|
@ -1468,8 +1528,8 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
|
|||
new_right_len);
|
||||
}
|
||||
|
||||
left_node.reborrow_mut().as_leaf_mut().len += count as u16;
|
||||
right_node.reborrow_mut().as_leaf_mut().len -= count as u16;
|
||||
(*left_node.reborrow_mut().as_leaf_mut()).len += count as u16;
|
||||
(*right_node.reborrow_mut().as_leaf_mut()).len -= count as u16;
|
||||
|
||||
match (left_node.force(), right_node.force()) {
|
||||
(ForceResult::Internal(left), ForceResult::Internal(mut right)) => {
|
||||
|
@ -1560,8 +1620,8 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal>, ma
|
|||
|
||||
move_kv(left_kv, left_new_len, right_kv, 0, right_new_len);
|
||||
|
||||
left_node.reborrow_mut().as_leaf_mut().len = left_new_len as u16;
|
||||
right_node.reborrow_mut().as_leaf_mut().len = right_new_len as u16;
|
||||
(*left_node.reborrow_mut().as_leaf_mut()).len = left_new_len as u16;
|
||||
(*right_node.reborrow_mut().as_leaf_mut()).len = right_new_len as u16;
|
||||
|
||||
match (left_node.force(), right_node.force()) {
|
||||
(ForceResult::Internal(left), ForceResult::Internal(right)) => {
|
||||
|
|
Loading…
Reference in a new issue