From 75181dc22f6e25760a95fdc4ad92f9a054506486 Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Mon, 13 Dec 2021 20:56:30 -0600 Subject: [PATCH 1/5] Error if we try to read dep during deserialization --- compiler/rustc_query_impl/src/plumbing.rs | 14 ++++++++++++-- compiler/rustc_query_system/src/dep_graph/graph.rs | 8 ++++++++ compiler/rustc_query_system/src/query/mod.rs | 1 + compiler/rustc_query_system/src/query/plumbing.rs | 6 +++--- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_query_impl/src/plumbing.rs b/compiler/rustc_query_impl/src/plumbing.rs index 6d76d09f619..5ebeae35964 100644 --- a/compiler/rustc_query_impl/src/plumbing.rs +++ b/compiler/rustc_query_impl/src/plumbing.rs @@ -83,12 +83,17 @@ impl QueryContext for QueryCtxt<'_> { &self, token: QueryJobId, diagnostics: Option<&Lock>>, + read_allowed: bool, compute: impl FnOnce() -> R, ) -> R { // The `TyCtxt` stored in TLS has the same global interner lifetime // as `self`, so we use `with_related_context` to relate the 'tcx lifetimes // when accessing the `ImplicitCtxt`. tls::with_related_context(**self, move |current_icx| { + let mut old_read_allowed = false; + if let Some(task_deps) = current_icx.task_deps { + old_read_allowed = std::mem::replace(&mut task_deps.lock().read_allowed, read_allowed); + } // Update the `ImplicitCtxt` to point to our new query job. let new_icx = ImplicitCtxt { tcx: **self, @@ -99,9 +104,14 @@ impl QueryContext for QueryCtxt<'_> { }; // Use the `ImplicitCtxt` while we execute the query. - tls::enter_context(&new_icx, |_| { + let res = tls::enter_context(&new_icx, |_| { rustc_data_structures::stack::ensure_sufficient_stack(compute) - }) + }); + + if let Some(task_deps) = new_icx.task_deps { + task_deps.lock().read_allowed = old_read_allowed; + } + res }) } } diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index a8be1ca34c0..64dcc6f37f1 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -251,6 +251,7 @@ impl DepGraph { reads: SmallVec::new(), read_set: Default::default(), phantom_data: PhantomData, + read_allowed: true, })) }; let result = K::with_deps(task_deps.as_ref(), || task(cx, arg)); @@ -362,6 +363,11 @@ impl DepGraph { if let Some(task_deps) = task_deps { let mut task_deps = task_deps.lock(); let task_deps = &mut *task_deps; + + if !task_deps.read_allowed { + panic!("Illegal read of: {:?}", dep_node_index); + } + if cfg!(debug_assertions) { data.current.total_read_count.fetch_add(1, Relaxed); } @@ -1115,6 +1121,7 @@ pub struct TaskDeps { reads: EdgesVec, read_set: FxHashSet, phantom_data: PhantomData>, + pub read_allowed: bool, } impl Default for TaskDeps { @@ -1125,6 +1132,7 @@ impl Default for TaskDeps { reads: EdgesVec::new(), read_set: FxHashSet::default(), phantom_data: PhantomData, + read_allowed: true, } } } diff --git a/compiler/rustc_query_system/src/query/mod.rs b/compiler/rustc_query_system/src/query/mod.rs index a2f7843baaa..265e0b80d7c 100644 --- a/compiler/rustc_query_system/src/query/mod.rs +++ b/compiler/rustc_query_system/src/query/mod.rs @@ -142,6 +142,7 @@ pub trait QueryContext: HasDepContext { &self, token: QueryJobId, diagnostics: Option<&Lock>>, + read_allowed: bool, compute: impl FnOnce() -> R, ) -> R; } diff --git a/compiler/rustc_query_system/src/query/plumbing.rs b/compiler/rustc_query_system/src/query/plumbing.rs index b08db39e245..e4a177cbffb 100644 --- a/compiler/rustc_query_system/src/query/plumbing.rs +++ b/compiler/rustc_query_system/src/query/plumbing.rs @@ -440,7 +440,7 @@ where // Fast path for when incr. comp. is off. if !dep_graph.is_fully_enabled() { let prof_timer = tcx.dep_context().profiler().query_provider(); - let result = tcx.start_query(job_id, None, || query.compute(*tcx.dep_context(), key)); + let result = tcx.start_query(job_id, None, true, || query.compute(*tcx.dep_context(), key)); let dep_node_index = dep_graph.next_virtual_depnode_index(); prof_timer.finish_with_query_invocation_id(dep_node_index.into()); return (result, dep_node_index); @@ -453,7 +453,7 @@ where // The diagnostics for this query will be promoted to the current session during // `try_mark_green()`, so we can ignore them here. - if let Some(ret) = tcx.start_query(job_id, None, || { + if let Some(ret) = tcx.start_query(job_id, None, false, || { try_load_from_disk_and_cache_in_memory(tcx, &key, &dep_node, query) }) { return ret; @@ -463,7 +463,7 @@ where let prof_timer = tcx.dep_context().profiler().query_provider(); let diagnostics = Lock::new(ThinVec::new()); - let (result, dep_node_index) = tcx.start_query(job_id, Some(&diagnostics), || { + let (result, dep_node_index) = tcx.start_query(job_id, Some(&diagnostics), true, || { if query.anon { return dep_graph.with_anon_task(*tcx.dep_context(), query.dep_kind, || { query.compute(*tcx.dep_context(), key) From 49560e9c4933cbec077156b4645888211893339f Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Tue, 14 Dec 2021 10:59:29 -0500 Subject: [PATCH 2/5] Ban deps only during query loading from disk --- compiler/rustc_query_system/src/query/plumbing.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_query_system/src/query/plumbing.rs b/compiler/rustc_query_system/src/query/plumbing.rs index e4a177cbffb..fd22698e419 100644 --- a/compiler/rustc_query_system/src/query/plumbing.rs +++ b/compiler/rustc_query_system/src/query/plumbing.rs @@ -2,14 +2,14 @@ //! generate the actual methods on tcx which find and execute the provider, //! manage the caches, and so forth. -use crate::dep_graph::{DepContext, DepNode, DepNodeIndex, DepNodeParams}; +use crate::dep_graph::{DepContext, DepNode, DepNodeIndex, DepNodeParams, TaskDeps}; use crate::query::caches::QueryCache; use crate::query::config::{QueryDescription, QueryVtable}; use crate::query::job::{ report_cycle, QueryInfo, QueryJob, QueryJobId, QueryJobInfo, QueryShardJobId, }; use crate::query::{QueryContext, QueryMap, QuerySideEffects, QueryStackFrame}; - +use crate::dep_graph::DepKind; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::{FxHashMap, FxHasher}; #[cfg(parallel_compiler)] @@ -515,7 +515,13 @@ where // Some things are never cached on disk. if query.cache_on_disk { let prof_timer = tcx.dep_context().profiler().incr_cache_loading(); - let result = query.try_load_from_disk(tcx, prev_dep_node_index); + + let mut deps = TaskDeps::default(); + deps.read_allowed = false; + let deps = Lock::new(deps); + let result = CTX::DepKind::with_deps(Some(&deps), || { + query.try_load_from_disk(tcx, prev_dep_node_index) + }); prof_timer.finish_with_query_invocation_id(dep_node_index.into()); if let Some(result) = result { From ab168e69ac13813962a8c395163bae1360ee27b6 Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Tue, 14 Dec 2021 11:13:17 -0500 Subject: [PATCH 3/5] Some cleanup --- compiler/rustc_query_impl/src/plumbing.rs | 14 ++------------ compiler/rustc_query_system/src/query/mod.rs | 1 - compiler/rustc_query_system/src/query/plumbing.rs | 8 ++++---- 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/compiler/rustc_query_impl/src/plumbing.rs b/compiler/rustc_query_impl/src/plumbing.rs index 5ebeae35964..6d76d09f619 100644 --- a/compiler/rustc_query_impl/src/plumbing.rs +++ b/compiler/rustc_query_impl/src/plumbing.rs @@ -83,17 +83,12 @@ impl QueryContext for QueryCtxt<'_> { &self, token: QueryJobId, diagnostics: Option<&Lock>>, - read_allowed: bool, compute: impl FnOnce() -> R, ) -> R { // The `TyCtxt` stored in TLS has the same global interner lifetime // as `self`, so we use `with_related_context` to relate the 'tcx lifetimes // when accessing the `ImplicitCtxt`. tls::with_related_context(**self, move |current_icx| { - let mut old_read_allowed = false; - if let Some(task_deps) = current_icx.task_deps { - old_read_allowed = std::mem::replace(&mut task_deps.lock().read_allowed, read_allowed); - } // Update the `ImplicitCtxt` to point to our new query job. let new_icx = ImplicitCtxt { tcx: **self, @@ -104,14 +99,9 @@ impl QueryContext for QueryCtxt<'_> { }; // Use the `ImplicitCtxt` while we execute the query. - let res = tls::enter_context(&new_icx, |_| { + tls::enter_context(&new_icx, |_| { rustc_data_structures::stack::ensure_sufficient_stack(compute) - }); - - if let Some(task_deps) = new_icx.task_deps { - task_deps.lock().read_allowed = old_read_allowed; - } - res + }) }) } } diff --git a/compiler/rustc_query_system/src/query/mod.rs b/compiler/rustc_query_system/src/query/mod.rs index 265e0b80d7c..a2f7843baaa 100644 --- a/compiler/rustc_query_system/src/query/mod.rs +++ b/compiler/rustc_query_system/src/query/mod.rs @@ -142,7 +142,6 @@ pub trait QueryContext: HasDepContext { &self, token: QueryJobId, diagnostics: Option<&Lock>>, - read_allowed: bool, compute: impl FnOnce() -> R, ) -> R; } diff --git a/compiler/rustc_query_system/src/query/plumbing.rs b/compiler/rustc_query_system/src/query/plumbing.rs index fd22698e419..cd31c5b3f08 100644 --- a/compiler/rustc_query_system/src/query/plumbing.rs +++ b/compiler/rustc_query_system/src/query/plumbing.rs @@ -2,6 +2,7 @@ //! generate the actual methods on tcx which find and execute the provider, //! manage the caches, and so forth. +use crate::dep_graph::DepKind; use crate::dep_graph::{DepContext, DepNode, DepNodeIndex, DepNodeParams, TaskDeps}; use crate::query::caches::QueryCache; use crate::query::config::{QueryDescription, QueryVtable}; @@ -9,7 +10,6 @@ use crate::query::job::{ report_cycle, QueryInfo, QueryJob, QueryJobId, QueryJobInfo, QueryShardJobId, }; use crate::query::{QueryContext, QueryMap, QuerySideEffects, QueryStackFrame}; -use crate::dep_graph::DepKind; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::{FxHashMap, FxHasher}; #[cfg(parallel_compiler)] @@ -440,7 +440,7 @@ where // Fast path for when incr. comp. is off. if !dep_graph.is_fully_enabled() { let prof_timer = tcx.dep_context().profiler().query_provider(); - let result = tcx.start_query(job_id, None, true, || query.compute(*tcx.dep_context(), key)); + let result = tcx.start_query(job_id, None, || query.compute(*tcx.dep_context(), key)); let dep_node_index = dep_graph.next_virtual_depnode_index(); prof_timer.finish_with_query_invocation_id(dep_node_index.into()); return (result, dep_node_index); @@ -453,7 +453,7 @@ where // The diagnostics for this query will be promoted to the current session during // `try_mark_green()`, so we can ignore them here. - if let Some(ret) = tcx.start_query(job_id, None, false, || { + if let Some(ret) = tcx.start_query(job_id, None, || { try_load_from_disk_and_cache_in_memory(tcx, &key, &dep_node, query) }) { return ret; @@ -463,7 +463,7 @@ where let prof_timer = tcx.dep_context().profiler().query_provider(); let diagnostics = Lock::new(ThinVec::new()); - let (result, dep_node_index) = tcx.start_query(job_id, Some(&diagnostics), true, || { + let (result, dep_node_index) = tcx.start_query(job_id, Some(&diagnostics), || { if query.anon { return dep_graph.with_anon_task(*tcx.dep_context(), query.dep_kind, || { query.compute(*tcx.dep_context(), key) From 28f19f62c7990704cf1f5ed5ef92599730393278 Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Mon, 20 Dec 2021 21:46:55 -0500 Subject: [PATCH 4/5] Address review comments --- .../rustc_query_system/src/dep_graph/graph.rs | 58 ++++++++++++++++++- .../rustc_query_system/src/query/plumbing.rs | 15 +++-- 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index 64dcc6f37f1..630c76764c7 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -171,6 +171,57 @@ impl DepGraph { K::with_deps(None, op) } + /// Used to wrap the deserialization of a query result from disk, + /// This method enforces that no new `DepNodes` are created during + /// query result deserialization. + /// + /// Enforcing this makes the query dep graph simpler - all nodes + /// must be created during the query execution, and should be + /// created from inside the 'body' of a query (the implementation + /// provided by a particular compiler crate). + /// + /// Consider the case of three queries `A`, `B`, and `C`, where + /// `A` invokes `B` and `B` invokes `C`: + /// + /// `A -> B -> C` + /// + /// Suppose that decoding the result of query `B` required invoking + /// a query `D`. If we did not create a fresh `TaskDeps` when + /// decoding `B`, we might would still be using the `TaskDeps` for query `A` + /// (if we needed to re-execute `A`). This would cause us to create + /// a new edge `A -> D`. If this edge did not previously + /// exist in the `DepGraph`, then we could end up with a different + /// `DepGraph` at the end of compilation, even if there were no + /// meaningful changes to the overall program (e.g. a newline was added). + /// In addition, this edge might cause a subsequent compilation run + /// to try to force `D` before marking other necessary nodes green. If + /// `D` did not exist in the new compilation session, then we might + /// get an ICE. Normally, we would have tried (and failed) to mark + /// some other query green (e.g. `item_children`) which was used + /// to obtain `D`, which would prevent us from ever trying to force + /// a non-existent `D`. + /// + /// It might be possible to enforce that all `DepNode`s read during + /// deserialization already exist in the previous `DepGraph`. In + /// the above example, we would invoke `D` during the deserialization + /// of `B`. Since we correctly create a new `TaskDeps` from the decoding + /// of `B`, this would result in an edge `B -> D`. If that edge already + /// existed (with the same `DepPathHash`es), then it should be correct + /// to allow the invocation of the query to proceed during deserialization + /// of a query result. However, this would require additional complexity + /// in the query infrastructure, and is not currently needed by the + /// decoding of any query results. Should the need arise in the future, + /// we should consider extending the query system with this functionality. + pub fn with_query_deserialization(&self, op: OP) -> R + where + OP: FnOnce() -> R, + { + let mut deps = TaskDeps::default(); + deps.read_allowed = false; + let deps = Lock::new(deps); + K::with_deps(Some(&deps), op) + } + /// Starts a new dep-graph task. Dep-graph tasks are specified /// using a free function (`task`) and **not** a closure -- this /// is intentional because we want to exercise tight control over @@ -1121,7 +1172,12 @@ pub struct TaskDeps { reads: EdgesVec, read_set: FxHashSet, phantom_data: PhantomData>, - pub read_allowed: bool, + /// Whether or not we allow `DepGraph::read_index` to run. + /// This is normally true, except inside `with_query_deserialization`, + /// where it set to `false` to enforce that no new `DepNode` edges are + /// created. See the documentation of `with_query_deserialization` for + /// more details. + read_allowed: bool, } impl Default for TaskDeps { diff --git a/compiler/rustc_query_system/src/query/plumbing.rs b/compiler/rustc_query_system/src/query/plumbing.rs index cd31c5b3f08..192da6735fc 100644 --- a/compiler/rustc_query_system/src/query/plumbing.rs +++ b/compiler/rustc_query_system/src/query/plumbing.rs @@ -2,8 +2,7 @@ //! generate the actual methods on tcx which find and execute the provider, //! manage the caches, and so forth. -use crate::dep_graph::DepKind; -use crate::dep_graph::{DepContext, DepNode, DepNodeIndex, DepNodeParams, TaskDeps}; +use crate::dep_graph::{DepContext, DepNode, DepNodeIndex, DepNodeParams}; use crate::query::caches::QueryCache; use crate::query::config::{QueryDescription, QueryVtable}; use crate::query::job::{ @@ -516,12 +515,12 @@ where if query.cache_on_disk { let prof_timer = tcx.dep_context().profiler().incr_cache_loading(); - let mut deps = TaskDeps::default(); - deps.read_allowed = false; - let deps = Lock::new(deps); - let result = CTX::DepKind::with_deps(Some(&deps), || { - query.try_load_from_disk(tcx, prev_dep_node_index) - }); + // The call to `with_query_deserialization` enforces that no new `DepNodes` + // are created during deserialization. See the docs of that method for more + // details. + let result = dep_graph + .with_query_deserialization(|| query.try_load_from_disk(tcx, prev_dep_node_index)); + prof_timer.finish_with_query_invocation_id(dep_node_index.into()); if let Some(result) = result { From 27ed52c0a2e05d459a25d077d2eec584cb98a591 Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Thu, 23 Dec 2021 13:44:04 -0500 Subject: [PATCH 5/5] Adjust wording of comment --- .../rustc_query_system/src/dep_graph/graph.rs | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index 630c76764c7..19788a979ad 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -185,20 +185,20 @@ impl DepGraph { /// /// `A -> B -> C` /// - /// Suppose that decoding the result of query `B` required invoking - /// a query `D`. If we did not create a fresh `TaskDeps` when - /// decoding `B`, we might would still be using the `TaskDeps` for query `A` + /// Suppose that decoding the result of query `B` required re-computing + /// the query `C`. If we did not create a fresh `TaskDeps` when + /// decoding `B`, we would still be using the `TaskDeps` for query `A` /// (if we needed to re-execute `A`). This would cause us to create - /// a new edge `A -> D`. If this edge did not previously + /// a new edge `A -> C`. If this edge did not previously /// exist in the `DepGraph`, then we could end up with a different /// `DepGraph` at the end of compilation, even if there were no /// meaningful changes to the overall program (e.g. a newline was added). /// In addition, this edge might cause a subsequent compilation run - /// to try to force `D` before marking other necessary nodes green. If - /// `D` did not exist in the new compilation session, then we might + /// to try to force `C` before marking other necessary nodes green. If + /// `C` did not exist in the new compilation session, then we could /// get an ICE. Normally, we would have tried (and failed) to mark /// some other query green (e.g. `item_children`) which was used - /// to obtain `D`, which would prevent us from ever trying to force + /// to obtain `C`, which would prevent us from ever trying to force /// a non-existent `D`. /// /// It might be possible to enforce that all `DepNode`s read during @@ -208,7 +208,12 @@ impl DepGraph { /// of `B`, this would result in an edge `B -> D`. If that edge already /// existed (with the same `DepPathHash`es), then it should be correct /// to allow the invocation of the query to proceed during deserialization - /// of a query result. However, this would require additional complexity + /// of a query result. We would merely assert that the dep-graph fragment + /// that would have been added by invoking `C` while decoding `B` + /// is equivalent to the dep-graph fragment that we already instantiated for B + /// (at the point where we successfully marked B as green). + /// + /// However, this would require additional complexity /// in the query infrastructure, and is not currently needed by the /// decoding of any query results. Should the need arise in the future, /// we should consider extending the query system with this functionality.