Auto merge of #106612 - JakobDegen:cleanup-wf, r=tmiasko

Document wf constraints on control flow in cleanup blocks Was recently made aware of [this code](a377893da2/compiler/rustc_codegen_ssa/src/mir/analyze.rs (L247-L368)), which has this potential ICE: a377893da2/compiler/rustc_codegen_ssa/src/mir/analyze.rs (L308-L314) Roughly speaking, the code there is attempting to partition the cleanup blocks into funclets that satisfy a "unique successor" property, and the ICE is set off if that's not possible. This PR documents the well-formedness constraints that MIR must satisfy to avoid setting off that ICE. The constraints documented are slightly stronger than the cases in which the ICE would have been set off in that code. This is necessary though, since whether or not that ICE gets set off can depend on iteration order in some graphs. This sort of constraint is kind of ugly, but I don't know a better alternative at the moment. It's worth knowing that two important optimizations are still correct: - Removing edges in the cfg: Fewer edges => fewer paths => stronger dominance relations => more contractions, and more contractions can't turn a forest into not-a-forest. - Contracting an edge u -> v when u only has one successor and v only has one predecessor: u already dominated v, so this contraction was going to happen anyway. There is definitely a MIR opt somewhere that can run afoul of this, but I don't know where it is. `@saethlin` was able to set it off though, so maybe he'll be able to shed some light on it. r? `@RalfJung` I suppose, and cc `@tmiasko` who might have insight/opinions on this
2023-01-17 11:34:35 +00:00 · 2023-01-17 11:34:35 +00:00 · f34cc658eb
parent 159ba8a92c 4bc963eba6
commit f34cc658eb
3 changed files with 110 additions and 9 deletions
--- a/compiler/rustc_const_eval/src/transform/validate.rs
+++ b/compiler/rustc_const_eval/src/transform/validate.rs
@ -1,7 +1,8 @@
 //! Validates the MIR to ensure that invariants are upheld.

-use rustc_data_structures::fx::FxHashSet;
+use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_index::bit_set::BitSet;
+use rustc_index::vec::IndexVec;
 use rustc_infer::traits::Reveal;
 use rustc_middle::mir::interpret::Scalar;
 use rustc_middle::mir::visit::NonUseContext::VarDebugInfo;
@ -18,7 +19,7 @@ use rustc_mir_dataflow::storage::always_storage_live_locals;
 use rustc_mir_dataflow::{Analysis, ResultsCursor};
 use rustc_target::abi::{Size, VariantIdx};

-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 enum EdgeKind {
    Unwind,
    Normal,
@ -57,18 +58,20 @@ impl<'tcx> MirPass<'tcx> for Validator {
            .iterate_to_fixpoint()
            .into_results_cursor(body);

-        TypeChecker {
+        let mut checker = TypeChecker {
            when: &self.when,
            body,
            tcx,
            param_env,
            mir_phase,
+            unwind_edge_count: 0,
            reachable_blocks: traversal::reachable_as_bitset(body),
            storage_liveness,
            place_cache: Vec::new(),
            value_cache: Vec::new(),
-        }
-        .visit_body(body);
+        };
+        checker.visit_body(body);
+        checker.check_cleanup_control_flow();
    }
 }

@ -78,6 +81,7 @@ struct TypeChecker<'a, 'tcx> {
    tcx: TyCtxt<'tcx>,
    param_env: ParamEnv<'tcx>,
    mir_phase: MirPhase,
+    unwind_edge_count: usize,
    reachable_blocks: BitSet<BasicBlock>,
    storage_liveness: ResultsCursor<'a, 'tcx, MaybeStorageLive<'static>>,
    place_cache: Vec<PlaceRef<'tcx>>,
@ -102,7 +106,7 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> {
        );
    }

-    fn check_edge(&self, location: Location, bb: BasicBlock, edge_kind: EdgeKind) {
+    fn check_edge(&mut self, location: Location, bb: BasicBlock, edge_kind: EdgeKind) {
        if bb == START_BLOCK {
            self.fail(location, "start block must not have predecessors")
        }
@ -111,10 +115,12 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> {
            match (src.is_cleanup, bb.is_cleanup, edge_kind) {
                // Non-cleanup blocks can jump to non-cleanup blocks along non-unwind edges
                (false, false, EdgeKind::Normal)
-                // Non-cleanup blocks can jump to cleanup blocks along unwind edges
-                | (false, true, EdgeKind::Unwind)
                // Cleanup blocks can jump to cleanup blocks along non-unwind edges
                | (true, true, EdgeKind::Normal) => {}
+                // Non-cleanup blocks can jump to cleanup blocks along unwind edges
+                (false, true, EdgeKind::Unwind) => {
+                    self.unwind_edge_count += 1;
+                }
                // All other jumps are invalid
                _ => {
                    self.fail(
@ -134,6 +140,88 @@ impl<'a, 'tcx> TypeChecker<'a, 'tcx> {
        }
    }

+    fn check_cleanup_control_flow(&self) {
+        if self.unwind_edge_count <= 1 {
+            return;
+        }
+        let doms = self.body.basic_blocks.dominators();
+        let mut post_contract_node = FxHashMap::default();
+        // Reusing the allocation across invocations of the closure
+        let mut dom_path = vec![];
+        let mut get_post_contract_node = |mut bb| {
+            let root = loop {
+                if let Some(root) = post_contract_node.get(&bb) {
+                    break *root;
+                }
+                let parent = doms.immediate_dominator(bb);
+                dom_path.push(bb);
+                if !self.body.basic_blocks[parent].is_cleanup {
+                    break bb;
+                }
+                bb = parent;
+            };
+            for bb in dom_path.drain(..) {
+                post_contract_node.insert(bb, root);
+            }
+            root
+        };
+
+        let mut parent = IndexVec::from_elem(None, &self.body.basic_blocks);
+        for (bb, bb_data) in self.body.basic_blocks.iter_enumerated() {
+            if !bb_data.is_cleanup || !self.reachable_blocks.contains(bb) {
+                continue;
+            }
+            let bb = get_post_contract_node(bb);
+            for s in bb_data.terminator().successors() {
+                let s = get_post_contract_node(s);
+                if s == bb {
+                    continue;
+                }
+                let parent = &mut parent[bb];
+                match parent {
+                    None => {
+                        *parent = Some(s);
+                    }
+                    Some(e) if *e == s => (),
+                    Some(e) => self.fail(
+                        Location { block: bb, statement_index: 0 },
+                        format!(
+                            "Cleanup control flow violation: The blocks dominated by {:?} have edges to both {:?} and {:?}",
+                            bb,
+                            s,
+                            *e
+                        )
+                    ),
+                }
+            }
+        }
+
+        // Check for cycles
+        let mut stack = FxHashSet::default();
+        for i in 0..parent.len() {
+            let mut bb = BasicBlock::from_usize(i);
+            stack.clear();
+            stack.insert(bb);
+            loop {
+                let Some(parent)= parent[bb].take() else {
+                    break
+                };
+                let no_cycle = stack.insert(parent);
+                if !no_cycle {
+                    self.fail(
+                        Location { block: bb, statement_index: 0 },
+                        format!(
+                            "Cleanup control flow violation: Cycle involving edge {:?} -> {:?}",
+                            bb, parent,
+                        ),
+                    );
+                    break;
+                }
+                bb = parent;
+            }
+        }
+    }
+
    /// Check if src can be assigned into dest.
    /// This is not precise, it will accept some incorrect assignments.
    fn mir_assign_valid_types(&self, src: Ty<'tcx>, dest: Ty<'tcx>) -> bool {
--- a/compiler/rustc_data_structures/src/graph/dominators/mod.rs
+++ b/compiler/rustc_data_structures/src/graph/dominators/mod.rs
@ -135,7 +135,10 @@ pub fn dominators<G: ControlFlowGraph>(graph: G) -> Dominators<G::Node> {
        // This loop computes the semi[w] for w.
        semi[w] = w;
        for v in graph.predecessors(pre_order_to_real[w]) {
-            let v = real_to_pre_order[v].unwrap();
+            // Reachable vertices may have unreachable predecessors, so ignore any of them
+            let Some(v) = real_to_pre_order[v] else {
+                continue
+            };

            // eval returns a vertex x from which semi[x] is minimum among
            // vertices semi[v] +> x *> v.
--- a/compiler/rustc_middle/src/mir/syntax.rs
+++ b/compiler/rustc_middle/src/mir/syntax.rs
@ -512,6 +512,16 @@ pub struct CopyNonOverlapping<'tcx> {
 ///     must also be `cleanup`. This is a part of the type system and checked statically, so it is
 ///     still an error to have such an edge in the CFG even if it's known that it won't be taken at
 ///     runtime.
+///  4. The control flow between cleanup blocks must look like an upside down tree. Roughly
+///     speaking, this means that control flow that looks like a V is allowed, while control flow
+///     that looks like a W is not. This is necessary to ensure that landing pad information can be
+///     correctly codegened on MSVC. More precisely:
+///
+///     Begin with the standard control flow graph `G`. Modify `G` as follows: for any two cleanup
+///     vertices `u` and `v` such that `u` dominates `v`, contract `u` and `v` into a single vertex,
+///     deleting self edges and duplicate edges in the process. Now remove all vertices from `G`
+///     that are not cleanup vertices or are not reachable. The resulting graph must be an inverted
+///     tree, that is each vertex may have at most one successor and there may be no cycles.
 #[derive(Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq, TypeFoldable, TypeVisitable)]
 pub enum TerminatorKind<'tcx> {
    /// Block has one successor; we continue execution there.