Make DefIdForest cheaper to clone

Since `DefIdForest` contains 0 or 1 elements the large majority of the time, by allocating only in the >1 case we avoid almost all allocations, compared to `Arc<SmallVec<[DefId;1]>>`. This shaves off 0.2% on the benchmark that stresses uninhabitedness checking.
2020-12-03 01:52:24 +00:00 · 2020-12-03 01:52:24 +00:00 · e608d8f4e5
parent 8598c9f6e5
commit e608d8f4e5
3 changed files with 80 additions and 58 deletions
--- a/compiler/rustc_middle/src/query/mod.rs
+++ b/compiler/rustc_middle/src/query/mod.rs
@ -1314,7 +1314,7 @@ rustc_queries! {
        /// check whether the forest is empty.
        query type_uninhabited_from(
            key: ty::ParamEnvAnd<'tcx, Ty<'tcx>>
-        ) -> Arc<ty::inhabitedness::DefIdForest> {
+        ) -> ty::inhabitedness::DefIdForest {
            desc { "computing the inhabitedness of `{:?}`", key }
        }
    }
--- a/compiler/rustc_middle/src/ty/inhabitedness/def_id_forest.rs
+++ b/compiler/rustc_middle/src/ty/inhabitedness/def_id_forest.rs
@ -3,6 +3,9 @@ use crate::ty::{DefId, DefIdTree};
 use rustc_hir::CRATE_HIR_ID;
 use smallvec::SmallVec;
 use std::mem;
+use std::sync::Arc;
+
+use DefIdForest::*;

 /// Represents a forest of `DefId`s closed under the ancestor relation. That is,
 /// if a `DefId` representing a module is contained in the forest then all
@ -11,45 +14,77 @@ use std::mem;
 ///
 /// This is used to represent a set of modules in which a type is visibly
 /// uninhabited.
+///
+/// We store the minimal set of `DefId`s required to represent the whole set. If A and B are
+/// `DefId`s in the `DefIdForest`, and A is a parent of B, then only A will be stored. When this is
+/// used with `type_uninhabited_from`, there will very rarely be more than one `DefId` stored.
 #[derive(Clone, HashStable)]
-pub struct DefIdForest {
-    /// The minimal set of `DefId`s required to represent the whole set.
-    /// If A and B are DefIds in the `DefIdForest`, and A is a descendant
-    /// of B, then only B will be in `root_ids`.
-    /// We use a `SmallVec` here because (for its use for caching inhabitedness)
-    /// it's rare that this will contain even two IDs.
-    root_ids: SmallVec<[DefId; 1]>,
+pub enum DefIdForest {
+    Empty,
+    Single(DefId),
+    /// This variant is very rare.
+    /// Invariant: >1 elements
+    /// We use `Arc` because this is used in the output of a query.
+    Multiple(Arc<[DefId]>),
+}
+
+/// Tests whether a slice of roots contains a given DefId.
+#[inline]
+fn slice_contains(tcx: TyCtxt<'tcx>, slice: &[DefId], id: DefId) -> bool {
+    slice.iter().any(|root_id| tcx.is_descendant_of(id, *root_id))
 }

 impl<'tcx> DefIdForest {
    /// Creates an empty forest.
    pub fn empty() -> DefIdForest {
-        DefIdForest { root_ids: SmallVec::new() }
+        DefIdForest::Empty
    }

    /// Creates a forest consisting of a single tree representing the entire
    /// crate.
    #[inline]
    pub fn full(tcx: TyCtxt<'tcx>) -> DefIdForest {
-        let crate_id = tcx.hir().local_def_id(CRATE_HIR_ID);
-        DefIdForest::from_id(crate_id.to_def_id())
+        DefIdForest::from_id(tcx.hir().local_def_id(CRATE_HIR_ID).to_def_id())
    }

    /// Creates a forest containing a `DefId` and all its descendants.
    pub fn from_id(id: DefId) -> DefIdForest {
-        let mut root_ids = SmallVec::new();
-        root_ids.push(id);
-        DefIdForest { root_ids }
+        DefIdForest::Single(id)
+    }
+
+    fn as_slice(&self) -> &[DefId] {
+        match self {
+            Empty => &[],
+            Single(id) => std::slice::from_ref(id),
+            Multiple(root_ids) => root_ids,
+        }
+    }
+
+    // Only allocates in the rare `Multiple` case.
+    fn from_slice(root_ids: &[DefId]) -> DefIdForest {
+        match root_ids {
+            [] => Empty,
+            [id] => Single(*id),
+            _ => DefIdForest::Multiple(root_ids.into()),
+        }
    }

    /// Tests whether the forest is empty.
    pub fn is_empty(&self) -> bool {
-        self.root_ids.is_empty()
+        match self {
+            Empty => true,
+            Single(..) | Multiple(..) => false,
+        }
+    }
+
+    /// Iterate over the set of roots.
+    fn iter(&self) -> impl Iterator<Item = DefId> + '_ {
+        self.as_slice().iter().copied()
    }

    /// Tests whether the forest contains a given DefId.
    pub fn contains(&self, tcx: TyCtxt<'tcx>, id: DefId) -> bool {
-        self.root_ids.iter().any(|root_id| tcx.is_descendant_of(id, *root_id))
+        slice_contains(tcx, self.as_slice(), id)
    }

    /// Calculate the intersection of a collection of forests.
@ -58,44 +93,28 @@ impl<'tcx> DefIdForest {
        I: IntoIterator<Item = DefIdForest>,
    {
        let mut iter = iter.into_iter();
-        let mut ret = if let Some(first) = iter.next() {
-            first
+        let mut ret: SmallVec<[_; 1]> = if let Some(first) = iter.next() {
+            SmallVec::from_slice(first.as_slice())
        } else {
            return DefIdForest::full(tcx);
        };

-        let mut next_ret = SmallVec::new();
-        let mut old_ret: SmallVec<[DefId; 1]> = SmallVec::new();
+        let mut next_ret: SmallVec<[_; 1]> = SmallVec::new();
        for next_forest in iter {
            // No need to continue if the intersection is already empty.
-            if ret.is_empty() {
-                break;
+            if ret.is_empty() || next_forest.is_empty() {
+                return DefIdForest::empty();
            }

-            // `next_ret` and `old_ret` are empty here.
-            // We keep the elements in `ret` that are also in `next_forest`. Those that aren't are
-            // put back in `ret` via `old_ret`.
-            for id in ret.root_ids.drain(..) {
-                if next_forest.contains(tcx, id) {
-                    next_ret.push(id);
-                } else {
-                    old_ret.push(id);
-                }
-            }
-            ret.root_ids.extend(old_ret.drain(..));
-
+            // We keep the elements in `ret` that are also in `next_forest`.
+            next_ret.extend(ret.iter().copied().filter(|&id| next_forest.contains(tcx, id)));
            // We keep the elements in `next_forest` that are also in `ret`.
-            // You'd think this is not needed because `next_ret` already contains `ret \inter
-            // next_forest`. But those aren't just sets of things. If `ret = [a]`, `next_forest =
-            // [b]` and `b` is a submodule of `a`, then `b` belongs in the intersection but we
-            // didn't catch it in the loop above.
-            next_ret.extend(next_forest.root_ids.into_iter().filter(|&id| ret.contains(tcx, id)));
-            // `next_ret` now contains the intersection of the original `ret` and `next_forest`.
+            next_ret.extend(next_forest.iter().filter(|&id| slice_contains(tcx, &ret, id)));

-            mem::swap(&mut next_ret, &mut ret.root_ids);
-            next_ret.drain(..);
+            mem::swap(&mut next_ret, &mut ret);
+            next_ret.clear();
        }
-        ret
+        DefIdForest::from_slice(&ret)
    }

    /// Calculate the union of a collection of forests.
@ -103,20 +122,26 @@ impl<'tcx> DefIdForest {
    where
        I: IntoIterator<Item = DefIdForest>,
    {
-        let mut ret = DefIdForest::empty();
-        let mut next_ret = SmallVec::new();
+        let mut ret: SmallVec<[_; 1]> = SmallVec::new();
+        let mut next_ret: SmallVec<[_; 1]> = SmallVec::new();
        for next_forest in iter {
-            next_ret.extend(ret.root_ids.drain(..).filter(|&id| !next_forest.contains(tcx, id)));
+            // Union with the empty set is a no-op.
+            if next_forest.is_empty() {
+                continue;
+            }

-            for id in next_forest.root_ids {
-                if !next_ret.contains(&id) {
+            // We add everything in `ret` that is not in `next_forest`.
+            next_ret.extend(ret.iter().copied().filter(|&id| !next_forest.contains(tcx, id)));
+            // We add everything in `next_forest` that we haven't added yet.
+            for id in next_forest.iter() {
+                if !slice_contains(tcx, &next_ret, id) {
                    next_ret.push(id);
                }
            }

-            mem::swap(&mut next_ret, &mut ret.root_ids);
-            next_ret.drain(..);
+            mem::swap(&mut next_ret, &mut ret);
+            next_ret.clear();
        }
-        ret
+        DefIdForest::from_slice(&ret)
    }
 }
--- a/compiler/rustc_middle/src/ty/inhabitedness/mod.rs
+++ b/compiler/rustc_middle/src/ty/inhabitedness/mod.rs
@ -7,8 +7,6 @@ use crate::ty::{AdtDef, FieldDef, Ty, TyS, VariantDef};
 use crate::ty::{AdtKind, Visibility};
 use crate::ty::{DefId, SubstsRef};

-use std::sync::Arc;
-
 mod def_id_forest;

 // The methods in this module calculate `DefIdForest`s of modules in which a
@ -193,7 +191,7 @@ impl<'tcx> TyS<'tcx> {
        tcx: TyCtxt<'tcx>,
        param_env: ty::ParamEnv<'tcx>,
    ) -> DefIdForest {
-        tcx.type_uninhabited_from(param_env.and(self)).as_ref().clone()
+        tcx.type_uninhabited_from(param_env.and(self))
    }
 }

@ -201,10 +199,10 @@ impl<'tcx> TyS<'tcx> {
 pub(crate) fn type_uninhabited_from<'tcx>(
    tcx: TyCtxt<'tcx>,
    key: ty::ParamEnvAnd<'tcx, Ty<'tcx>>,
-) -> Arc<DefIdForest> {
+) -> DefIdForest {
    let ty = key.value;
    let param_env = key.param_env;
-    let forest = match *ty.kind() {
+    match *ty.kind() {
        Adt(def, substs) => def.uninhabited_from(tcx, substs, param_env),

        Never => DefIdForest::full(tcx),
@ -229,6 +227,5 @@ pub(crate) fn type_uninhabited_from<'tcx>(
        Ref(..) => DefIdForest::empty(),

        _ => DefIdForest::empty(),
-    };
-    Arc::new(forest)
+    }
 }