rustc: Wait for all codegen threads to exit

This commit updates rustc to wait for all codegen threads to exit before
allowing the main thread to exit. This is a stab in the dark to fix the
mysterious segfaults appearing on #55238, and hopefully we'll see
whether this actually fixes things in practice...
This commit is contained in:
Alex Crichton 2018-10-31 11:32:27 -07:00
parent 016eaf88f5
commit 14c6835e03
3 changed files with 105 additions and 23 deletions

View file

@ -15,17 +15,6 @@ dependencies = [
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "alloc_jemalloc"
version = "0.0.0"
dependencies = [
"build_helper 0.1.0",
"cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
"compiler_builtins 0.0.0",
"core 0.0.0",
"libc 0.0.0",
]
[[package]]
name = "alloc_system"
version = "0.0.0"
@ -2696,7 +2685,6 @@ name = "std"
version = "0.0.0"
dependencies = [
"alloc 0.0.0",
"alloc_jemalloc 0.0.0",
"alloc_system 0.0.0",
"build_helper 0.1.0",
"cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",

View file

@ -1508,6 +1508,7 @@ enum Message {
},
CodegenComplete,
CodegenItem,
CodegenAborted,
}
struct Diagnostic {
@ -1788,6 +1789,7 @@ fn start_executing_work(tcx: TyCtxt,
let mut needs_lto = Vec::new();
let mut lto_import_only_modules = Vec::new();
let mut started_lto = false;
let mut codegen_aborted = false;
// This flag tracks whether all items have gone through codegens
let mut codegen_done = false;
@ -1805,13 +1807,19 @@ fn start_executing_work(tcx: TyCtxt,
let mut llvm_start_time = None;
// Run the message loop while there's still anything that needs message
// processing:
// processing. Note that as soon as codegen is aborted we simply want to
// wait for all existing work to finish, so many of the conditions here
// only apply if codegen hasn't been aborted as they represent pending
// work to be done.
while !codegen_done ||
work_items.len() > 0 ||
running > 0 ||
needs_lto.len() > 0 ||
lto_import_only_modules.len() > 0 ||
main_thread_worker_state != MainThreadWorkerState::Idle {
(!codegen_aborted && (
work_items.len() > 0 ||
needs_lto.len() > 0 ||
lto_import_only_modules.len() > 0 ||
main_thread_worker_state != MainThreadWorkerState::Idle
))
{
// While there are still CGUs to be codegened, the coordinator has
// to decide how to utilize the compiler processes implicit Token:
@ -1840,6 +1848,9 @@ fn start_executing_work(tcx: TyCtxt,
spawn_work(cgcx, item);
}
}
} else if codegen_aborted {
// don't queue up any more work if codegen was aborted, we're
// just waiting for our existing children to finish
} else {
// If we've finished everything related to normal codegen
// then it must be the case that we've got some LTO work to do.
@ -1904,7 +1915,7 @@ fn start_executing_work(tcx: TyCtxt,
// Spin up what work we can, only doing this while we've got available
// parallelism slots and work left to spawn.
while work_items.len() > 0 && running < tokens.len() {
while !codegen_aborted && work_items.len() > 0 && running < tokens.len() {
let (item, _) = work_items.pop().unwrap();
maybe_start_llvm_timer(cgcx.config(item.module_kind()),
@ -1969,6 +1980,7 @@ fn start_executing_work(tcx: TyCtxt,
if !cgcx.opts.debugging_opts.no_parallel_llvm {
helper.request_token();
}
assert!(!codegen_aborted);
assert_eq!(main_thread_worker_state,
MainThreadWorkerState::Codegenning);
main_thread_worker_state = MainThreadWorkerState::Idle;
@ -1976,11 +1988,26 @@ fn start_executing_work(tcx: TyCtxt,
Message::CodegenComplete => {
codegen_done = true;
assert!(!codegen_aborted);
assert_eq!(main_thread_worker_state,
MainThreadWorkerState::Codegenning);
main_thread_worker_state = MainThreadWorkerState::Idle;
}
// If codegen is aborted that means translation was aborted due
// to some normal-ish compiler error. In this situation we want
// to exit as soon as possible, but we want to make sure all
// existing work has finished. Flag codegen as being done, and
// then conditions above will ensure no more work is spawned but
// we'll keep executing this loop until `running` hits 0.
Message::CodegenAborted => {
assert!(!codegen_aborted);
codegen_done = true;
codegen_aborted = true;
assert_eq!(main_thread_worker_state,
MainThreadWorkerState::Codegenning);
}
// If a thread exits successfully then we drop a token associated
// with that worker and update our `running` count. We may later
// re-acquire a token to continue running more work. We may also not
@ -2446,6 +2473,19 @@ impl OngoingCodegen {
drop(self.coordinator_send.send(Box::new(Message::CodegenComplete)));
}
/// Consume this context indicating that codegen was entirely aborted, and
/// we need to exit as quickly as possible.
///
/// This method blocks the current thread until all worker threads have
/// finished, and all worker threads should have exited or be real close to
/// exiting at this point.
pub fn codegen_aborted(self) {
// Signal to the coordinator it should spawn no more work and start
// shutdown.
drop(self.coordinator_send.send(Box::new(Message::CodegenAborted)));
drop(self.future.join());
}
pub fn check_for_errors(&self, sess: &Session) {
self.shared_emitter_main.check(sess, false);
}
@ -2464,6 +2504,11 @@ impl OngoingCodegen {
}
}
// impl Drop for OngoingCodegen {
// fn drop(&mut self) {
// }
// }
pub(crate) fn submit_codegened_module_to_llvm(tcx: TyCtxt,
module: ModuleCodegen,
cost: u64) {

View file

@ -76,12 +76,13 @@ use rustc_data_structures::small_c_str::SmallCStr;
use rustc_data_structures::sync::Lrc;
use std::any::Any;
use std::ffi::CString;
use std::sync::Arc;
use std::time::{Instant, Duration};
use std::i32;
use std::cmp;
use std::ffi::CString;
use std::i32;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use std::sync::mpsc;
use std::time::{Instant, Duration};
use syntax_pos::Span;
use syntax_pos::symbol::InternedString;
use syntax::attr;
@ -820,6 +821,7 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
metadata,
rx,
codegen_units.len());
let ongoing_codegen = AbortCodegenOnDrop(Some(ongoing_codegen));
// Codegen an allocator shim, if necessary.
//
@ -949,7 +951,54 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
ongoing_codegen.check_for_errors(tcx.sess);
assert_and_save_dep_graph(tcx);
ongoing_codegen
ongoing_codegen.into_inner()
}
/// A curious wrapper structure whose only purpose is to call `codegen_aborted`
/// when it's dropped abnormally.
///
/// In the process of working on rust-lang/rust#55238 a mysterious segfault was
/// stumbled upon. The segfault was never reproduced locally, but it was
/// suspected to be releated to the fact that codegen worker threads were
/// sticking around by the time the main thread was exiting, causing issues.
///
/// This structure is an attempt to fix that issue where the `codegen_aborted`
/// message will block until all workers have finished. This should ensure that
/// even if the main codegen thread panics we'll wait for pending work to
/// complete before returning from the main thread, hopefully avoiding
/// segfaults.
///
/// If you see this comment in the code, then it means that this workaround
/// worked! We may yet one day track down the mysterious cause of that
/// segfault...
struct AbortCodegenOnDrop(Option<OngoingCodegen>);
impl AbortCodegenOnDrop {
fn into_inner(mut self) -> OngoingCodegen {
self.0.take().unwrap()
}
}
impl Deref for AbortCodegenOnDrop {
type Target = OngoingCodegen;
fn deref(&self) -> &OngoingCodegen {
self.0.as_ref().unwrap()
}
}
impl DerefMut for AbortCodegenOnDrop {
fn deref_mut(&mut self) -> &mut OngoingCodegen {
self.0.as_mut().unwrap()
}
}
impl Drop for AbortCodegenOnDrop {
fn drop(&mut self) {
if let Some(codegen) = self.0.take() {
codegen.codegen_aborted();
}
}
}
fn assert_and_save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>) {