Rollup merge of #56648 - RalfJung:btree, r=sfackler

Fix BTreeMap UB

BTreeMap currently causes UB by created a shared reference to a too-small allocation.  This PR fixes that by introducing a `NodeHeader` type and using that until we really need access to the key/value arrays.  Avoiding run-time checks in `into_key_slice` was somewhat tricky, see the comments embedded in the code.

I also adjusted `as_leaf_mut` to return a raw pointer, because creating a mutable reference asserts that there are no aliases to the pointee, but that's not always correct: We use `as_leaf_mut` twice to create two mutable slices for keys and values; the second call overlaps with the first slice and hence is not a unique pointer.

Fixes https://github.com/rust-lang/rust/issues/54957

Cc @nikomatsakis @Gankro
This commit is contained in:
Mazdak Farrokhzad 2018-12-16 14:08:19 +01:00 committed by GitHub
commit 9133798b92
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -58,9 +58,34 @@ pub const CAPACITY: usize = 2 * B - 1;
/// these should always be put behind pointers, and specifically behind `BoxedNode` in the owned
/// case.
///
/// We put the metadata first so that its position is the same for every `K` and `V`, in order
/// to statically allocate a single dummy node to avoid allocations. This struct is `repr(C)` to
/// prevent them from being reordered.
/// We have a separate type for the header and rely on it matching the prefix of `LeafNode`, in
/// order to statically allocate a single dummy node to avoid allocations. This struct is
/// `repr(C)` to prevent them from being reordered. `LeafNode` does not just contain a
/// `NodeHeader` because we do not want unnecessary padding between `len` and the keys.
/// Crucially, `NodeHeader` can be safely transmuted to different K and V. (This is exploited
/// by `as_header`.)
/// See `into_key_slice` for an explanation of K2. K2 cannot be safely transmuted around
/// because the size of `NodeHeader` depends on its alignment!
#[repr(C)]
struct NodeHeader<K, V, K2 = ()> {
/// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`.
/// This either points to an actual node or is null.
parent: *const InternalNode<K, V>,
/// This node's index into the parent node's `edges` array.
/// `*node.parent.edges[node.parent_idx]` should be the same thing as `node`.
/// This is only guaranteed to be initialized when `parent` is non-null.
parent_idx: MaybeUninit<u16>,
/// The number of keys and values this node stores.
///
/// This next to `parent_idx` to encourage the compiler to join `len` and
/// `parent_idx` into the same 32-bit word, reducing space overhead.
len: u16,
/// See `into_key_slice`.
keys_start: [K2; 0],
}
#[repr(C)]
struct LeafNode<K, V> {
/// We use `*const` as opposed to `*mut` so as to be covariant in `K` and `V`.
@ -98,24 +123,25 @@ impl<K, V> LeafNode<K, V> {
len: 0
}
}
}
impl<K, V> NodeHeader<K, V> {
fn is_shared_root(&self) -> bool {
ptr::eq(self, &EMPTY_ROOT_NODE as *const _ as *const _)
}
}
// We need to implement Sync here in order to make a static instance.
unsafe impl Sync for LeafNode<(), ()> {}
unsafe impl Sync for NodeHeader<(), ()> {}
// An empty node used as a placeholder for the root node, to avoid allocations.
// We use () in order to save space, since no operation on an empty tree will
// We use just a header in order to save space, since no operation on an empty tree will
// ever take a pointer past the first key.
static EMPTY_ROOT_NODE: LeafNode<(), ()> = LeafNode {
static EMPTY_ROOT_NODE: NodeHeader<(), ()> = NodeHeader {
parent: ptr::null(),
parent_idx: MaybeUninit::uninitialized(),
len: 0,
keys: MaybeUninit::uninitialized(),
vals: MaybeUninit::uninitialized(),
keys_start: [],
};
/// The underlying representation of internal nodes. As with `LeafNode`s, these should be hidden
@ -281,7 +307,7 @@ impl<K, V> Root<K, V> {
.node)
};
self.height -= 1;
self.as_mut().as_leaf_mut().parent = ptr::null();
unsafe { (*self.as_mut().as_leaf_mut()).parent = ptr::null(); }
unsafe {
Global.dealloc(NonNull::from(top).cast(), Layout::new::<InternalNode<K, V>>());
@ -306,6 +332,11 @@ impl<K, V> Root<K, V> {
/// `Leaf`, the `NodeRef` points to a leaf node, when this is `Internal` the
/// `NodeRef` points to an internal node, and when this is `LeafOrInternal` the
/// `NodeRef` could be pointing to either type of node.
/// Note that in case of a leaf node, this might still be the shared root! Only turn
/// this into a `LeafNode` reference if you know it is not a root! Shared references
/// must be dereferencable *for the entire size of their pointee*, so `&InternalNode`
/// pointing to the shared root is UB.
/// Turning this into a `NodeHeader` is always safe.
pub struct NodeRef<BorrowType, K, V, Type> {
height: usize,
node: NonNull<LeafNode<K, V>>,
@ -352,7 +383,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
/// Finds the length of the node. This is the number of keys or values. In an
/// internal node, the number of edges is `len() + 1`.
pub fn len(&self) -> usize {
self.as_leaf().len as usize
self.as_header().len as usize
}
/// Returns the height of this node in the whole tree. Zero height denotes the
@ -382,14 +413,19 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
}
}
fn as_leaf(&self) -> &LeafNode<K, V> {
/// Assert that this is indeed a proper leaf node, and not the shared root.
unsafe fn as_leaf(&self) -> &LeafNode<K, V> {
self.node.as_ref()
}
fn as_header(&self) -> &NodeHeader<K, V> {
unsafe {
self.node.as_ref()
&*(self.node.as_ptr() as *const NodeHeader<K, V>)
}
}
pub fn is_shared_root(&self) -> bool {
self.as_leaf().is_shared_root()
self.as_header().is_shared_root()
}
pub fn keys(&self) -> &[K] {
@ -418,7 +454,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
>,
Self
> {
let parent_as_leaf = self.as_leaf().parent as *const LeafNode<K, V>;
let parent_as_leaf = self.as_header().parent as *const LeafNode<K, V>;
if let Some(non_zero) = NonNull::new(parent_as_leaf as *mut _) {
Ok(Handle {
node: NodeRef {
@ -427,7 +463,7 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
root: self.root,
_marker: PhantomData
},
idx: unsafe { usize::from(*self.as_leaf().parent_idx.get_ref()) },
idx: unsafe { usize::from(*self.as_header().parent_idx.get_ref()) },
_marker: PhantomData
})
} else {
@ -534,10 +570,10 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
}
}
fn as_leaf_mut(&mut self) -> &mut LeafNode<K, V> {
unsafe {
self.node.as_mut()
}
/// Returns a raw ptr to avoid asserting exclusive access to the entire node.
fn as_leaf_mut(&mut self) -> *mut LeafNode<K, V> {
// We are mutable, so we cannot be the root, so accessing this as a leaf is okay.
self.node.as_ptr()
}
fn keys_mut(&mut self) -> &mut [K] {
@ -551,28 +587,50 @@ impl<'a, K, V, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Immut<'a>, K, V, Type> {
fn into_key_slice(self) -> &'a [K] {
// When taking a pointer to the keys, if our key has a stricter
// alignment requirement than the shared root does, then the pointer
// would be out of bounds, which LLVM assumes will not happen. If the
// alignment is more strict, we need to make an empty slice that doesn't
// use an out of bounds pointer.
// We have to be careful here because we might be pointing to the shared root.
// In that case, we must not create an `&LeafNode`. We could just return
// an empty slice whenever the length is 0 (this includes the shared root),
// but we want to avoid that run-time check.
// Instead, we create a slice pointing into the node whenever possible.
// We can sometimes do this even for the shared root, as the slice will be
// empty. We cannot *always* do this because if the type is too highly
// aligned, the offset of `keys` in a "full node" might be outside the bounds
// of the header! So we do an alignment check first, that will be
// evaluated at compile-time, and only do any run-time check in the rare case
// that the alignment is very big.
if mem::align_of::<K>() > mem::align_of::<LeafNode<(), ()>>() && self.is_shared_root() {
&[]
} else {
// Here either it's not the root, or the alignment is less strict,
// in which case the keys pointer will point "one-past-the-end" of
// the node, which is allowed by LLVM.
// Thanks to the alignment check above, we know that `keys` will be
// in-bounds of some allocation even if this is the shared root!
// (We might be one-past-the-end, but that is allowed by LLVM.)
// Getting the pointer is tricky though. `NodeHeader` does not have a `keys`
// field because we want its size to not depend on the alignment of `K`
// (needed becuase `as_header` should be safe). We cannot call `as_leaf`
// because we might be the shared root.
// For this reason, `NodeHeader` has this `K2` parameter (that's usually `()`
// and hence just adds a size-0-align-1 field, not affecting layout).
// We know that we can transmute `NodeHeader<K, V, ()>` to `NodeHeader<K, V, K>`
// because we did the alignment check above, and hence `NodeHeader<K, V, K>`
// is not bigger than `NodeHeader<K, V, ()>`! Then we can use `NodeHeader<K, V, K>`
// to compute the pointer where the keys start.
// This entire hack will become unnecessary once
// <https://github.com/rust-lang/rfcs/pull/2582> lands, then we can just take a raw
// pointer to the `keys` field of `*const InternalNode<K, V>`.
// This is a non-debug-assert because it can be completely compile-time evaluated.
assert!(mem::size_of::<NodeHeader<K, V>>() == mem::size_of::<NodeHeader<K, V, K>>());
let header = self.as_header() as *const _ as *const NodeHeader<K, V, K>;
let keys = unsafe { &(*header).keys_start as *const _ as *const K };
unsafe {
slice::from_raw_parts(
self.as_leaf().keys.as_ptr() as *const K,
self.len()
)
slice::from_raw_parts(keys, self.len())
}
}
}
fn into_val_slice(self) -> &'a [V] {
debug_assert!(!self.is_shared_root());
// We cannot be the root, so `as_leaf` is okay
unsafe {
slice::from_raw_parts(
self.as_leaf().vals.as_ptr() as *const V,
@ -602,7 +660,7 @@ impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
} else {
unsafe {
slice::from_raw_parts_mut(
self.as_leaf_mut().keys.as_mut_ptr() as *mut K,
(*self.as_leaf_mut()).keys.as_mut_ptr() as *mut K,
self.len()
)
}
@ -613,7 +671,7 @@ impl<'a, K: 'a, V: 'a, Type> NodeRef<marker::Mut<'a>, K, V, Type> {
debug_assert!(!self.is_shared_root());
unsafe {
slice::from_raw_parts_mut(
self.as_leaf_mut().vals.as_mut_ptr() as *mut V,
(*self.as_leaf_mut()).vals.as_mut_ptr() as *mut V,
self.len()
)
}
@ -637,9 +695,9 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Leaf> {
unsafe {
ptr::write(self.keys_mut().get_unchecked_mut(idx), key);
ptr::write(self.vals_mut().get_unchecked_mut(idx), val);
}
self.as_leaf_mut().len += 1;
(*self.as_leaf_mut()).len += 1;
}
}
/// Adds a key/value pair to the beginning of the node.
@ -651,9 +709,9 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Leaf> {
unsafe {
slice_insert(self.keys_mut(), 0, key);
slice_insert(self.vals_mut(), 0, val);
}
self.as_leaf_mut().len += 1;
(*self.as_leaf_mut()).len += 1;
}
}
}
@ -672,7 +730,7 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Internal> {
ptr::write(self.vals_mut().get_unchecked_mut(idx), val);
ptr::write(self.as_internal_mut().edges.get_unchecked_mut(idx + 1), edge.node);
self.as_leaf_mut().len += 1;
(*self.as_leaf_mut()).len += 1;
Handle::new_edge(self.reborrow_mut(), idx + 1).correct_parent_link();
}
@ -708,7 +766,7 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::Internal> {
edge.node
);
self.as_leaf_mut().len += 1;
(*self.as_leaf_mut()).len += 1;
self.correct_all_childrens_parent_links();
}
@ -732,12 +790,12 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal> {
ForceResult::Internal(internal) => {
let edge = ptr::read(internal.as_internal().edges.get_unchecked(idx + 1));
let mut new_root = Root { node: edge, height: internal.height - 1 };
new_root.as_mut().as_leaf_mut().parent = ptr::null();
(*new_root.as_mut().as_leaf_mut()).parent = ptr::null();
Some(new_root)
}
};
self.as_leaf_mut().len -= 1;
(*self.as_leaf_mut()).len -= 1;
(key, val, edge)
}
}
@ -765,7 +823,7 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal> {
);
let mut new_root = Root { node: edge, height: internal.height - 1 };
new_root.as_mut().as_leaf_mut().parent = ptr::null();
(*new_root.as_mut().as_leaf_mut()).parent = ptr::null();
for i in 0..old_len {
Handle::new_edge(internal.reborrow_mut(), i).correct_parent_link();
@ -775,7 +833,7 @@ impl<'a, K, V> NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal> {
}
};
self.as_leaf_mut().len -= 1;
(*self.as_leaf_mut()).len -= 1;
(key, val, edge)
}
@ -966,7 +1024,7 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::Edge
slice_insert(self.node.keys_mut(), self.idx, key);
slice_insert(self.node.vals_mut(), self.idx, val);
self.node.as_leaf_mut().len += 1;
(*self.node.as_leaf_mut()).len += 1;
self.node.vals_mut().get_unchecked_mut(self.idx)
}
@ -1009,8 +1067,10 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
let idx = self.idx as u16;
let ptr = self.node.as_internal_mut() as *mut _;
let mut child = self.descend();
child.as_leaf_mut().parent = ptr;
child.as_leaf_mut().parent_idx.set(idx);
unsafe {
(*child.as_leaf_mut()).parent = ptr;
(*child.as_leaf_mut()).parent_idx.set(idx);
}
}
/// Unsafely asserts to the compiler some static information about whether the underlying
@ -1158,7 +1218,7 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::KV>
new_len
);
self.node.as_leaf_mut().len = self.idx as u16;
(*self.node.as_leaf_mut()).len = self.idx as u16;
new_node.len = new_len as u16;
(
@ -1180,7 +1240,7 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::KV>
unsafe {
let k = slice_remove(self.node.keys_mut(), self.idx);
let v = slice_remove(self.node.vals_mut(), self.idx);
self.node.as_leaf_mut().len -= 1;
(*self.node.as_leaf_mut()).len -= 1;
(self.left_edge(), k, v)
}
}
@ -1221,7 +1281,7 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
new_len + 1
);
self.node.as_leaf_mut().len = self.idx as u16;
(*self.node.as_leaf_mut()).len = self.idx as u16;
new_node.data.len = new_len as u16;
let mut new_root = Root {
@ -1295,9 +1355,9 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
for i in self.idx+1..self.node.len() {
Handle::new_edge(self.node.reborrow_mut(), i).correct_parent_link();
}
self.node.as_leaf_mut().len -= 1;
(*self.node.as_leaf_mut()).len -= 1;
left_node.as_leaf_mut().len += right_len as u16 + 1;
(*left_node.as_leaf_mut()).len += right_len as u16 + 1;
if self.node.height > 1 {
ptr::copy_nonoverlapping(
@ -1407,8 +1467,8 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
move_kv(left_kv, new_left_len, parent_kv, 0, 1);
}
left_node.reborrow_mut().as_leaf_mut().len -= count as u16;
right_node.reborrow_mut().as_leaf_mut().len += count as u16;
(*left_node.reborrow_mut().as_leaf_mut()).len -= count as u16;
(*right_node.reborrow_mut().as_leaf_mut()).len += count as u16;
match (left_node.force(), right_node.force()) {
(ForceResult::Internal(left), ForceResult::Internal(mut right)) => {
@ -1468,8 +1528,8 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
new_right_len);
}
left_node.reborrow_mut().as_leaf_mut().len += count as u16;
right_node.reborrow_mut().as_leaf_mut().len -= count as u16;
(*left_node.reborrow_mut().as_leaf_mut()).len += count as u16;
(*right_node.reborrow_mut().as_leaf_mut()).len -= count as u16;
match (left_node.force(), right_node.force()) {
(ForceResult::Internal(left), ForceResult::Internal(mut right)) => {
@ -1560,8 +1620,8 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal>, ma
move_kv(left_kv, left_new_len, right_kv, 0, right_new_len);
left_node.reborrow_mut().as_leaf_mut().len = left_new_len as u16;
right_node.reborrow_mut().as_leaf_mut().len = right_new_len as u16;
(*left_node.reborrow_mut().as_leaf_mut()).len = left_new_len as u16;
(*right_node.reborrow_mut().as_leaf_mut()).len = right_new_len as u16;
match (left_node.force(), right_node.force()) {
(ForceResult::Internal(left), ForceResult::Internal(right)) => {