[1/4 for #2365, #2671] Fix create/kill race with schedulers and tasks during rust_kernel::fail

This commit is contained in:
Ben Blum 2012-07-20 18:06:17 -04:00
parent f55999fd7a
commit 5bb4a12900
10 changed files with 69 additions and 32 deletions

View file

@ -19,6 +19,7 @@ rust_kernel::rust_kernel(rust_env *env) :
max_port_id(1), max_port_id(1),
rval(0), rval(0),
max_sched_id(1), max_sched_id(1),
killed(false),
sched_reaper(this), sched_reaper(this),
osmain_driver(NULL), osmain_driver(NULL),
non_weak_tasks(0), non_weak_tasks(0),
@ -103,7 +104,8 @@ rust_kernel::create_scheduler(rust_sched_launcher_factory *launchfac,
id = max_sched_id++; id = max_sched_id++;
assert(id != INTPTR_MAX && "Hit the maximum scheduler id"); assert(id != INTPTR_MAX && "Hit the maximum scheduler id");
sched = new (this, "rust_scheduler") sched = new (this, "rust_scheduler")
rust_scheduler(this, num_threads, id, allow_exit, launchfac); rust_scheduler(this, num_threads, id, allow_exit, killed,
launchfac);
bool is_new = sched_table bool is_new = sched_table
.insert(std::pair<rust_sched_id, .insert(std::pair<rust_sched_id,
rust_scheduler*>(id, sched)).second; rust_scheduler*>(id, sched)).second;
@ -197,6 +199,10 @@ rust_kernel::fail() {
#endif #endif
// Copy the list of schedulers so that we don't hold the lock while // Copy the list of schedulers so that we don't hold the lock while
// running kill_all_tasks. // running kill_all_tasks.
// I think this only needs to be done by one task ever; as it is,
// multiple tasks invoking kill_all might get here. Currently libcore
// ensures only one task will ever invoke it, but this would really be
// fine either way, so I'm leaving it as it is. -- bblum
// FIXME (#2671): There's a lot that happens under kill_all_tasks, // FIXME (#2671): There's a lot that happens under kill_all_tasks,
// and I don't know that holding sched_lock here is ok, but we need // and I don't know that holding sched_lock here is ok, but we need
// to hold the sched lock to prevent the scheduler from being // to hold the sched lock to prevent the scheduler from being
@ -205,15 +211,13 @@ rust_kernel::fail() {
std::vector<rust_scheduler*> scheds; std::vector<rust_scheduler*> scheds;
{ {
scoped_lock with(sched_lock); scoped_lock with(sched_lock);
killed = true;
for (sched_map::iterator iter = sched_table.begin(); for (sched_map::iterator iter = sched_table.begin();
iter != sched_table.end(); iter++) { iter != sched_table.end(); iter++) {
scheds.push_back(iter->second); scheds.push_back(iter->second);
} }
} }
// FIXME (#2671): This is not a foolproof way to kill all tasks
// while ensuring that no new tasks or schedulers are created in the
// meantime that keep the scheduler alive.
for (std::vector<rust_scheduler*>::iterator iter = scheds.begin(); for (std::vector<rust_scheduler*>::iterator iter = scheds.begin();
iter != scheds.end(); iter++) { iter != scheds.end(); iter++) {
(*iter)->kill_all_tasks(); (*iter)->kill_all_tasks();

View file

@ -72,7 +72,7 @@ class rust_kernel {
lock_and_signal rval_lock; lock_and_signal rval_lock;
int rval; int rval;
// Protects max_sched_id and sched_table, join_list // Protects max_sched_id and sched_table, join_list, killed
lock_and_signal sched_lock; lock_and_signal sched_lock;
// The next scheduler id // The next scheduler id
rust_sched_id max_sched_id; rust_sched_id max_sched_id;
@ -81,6 +81,10 @@ class rust_kernel {
sched_map sched_table; sched_map sched_table;
// A list of scheduler ids that are ready to exit // A list of scheduler ids that are ready to exit
std::vector<rust_sched_id> join_list; std::vector<rust_sched_id> join_list;
// Whether or not the runtime has to die (triggered when the root/main
// task group fails). This propagates to all new schedulers and tasks
// created after it is set.
bool killed;
rust_sched_reaper sched_reaper; rust_sched_reaper sched_reaper;
// The single-threaded scheduler that uses the main thread // The single-threaded scheduler that uses the main thread

View file

@ -4,33 +4,36 @@
const size_t SCHED_STACK_SIZE = 1024*100; const size_t SCHED_STACK_SIZE = 1024*100;
rust_sched_launcher::rust_sched_launcher(rust_scheduler *sched, int id) rust_sched_launcher::rust_sched_launcher(rust_scheduler *sched, int id,
bool killed)
: kernel(sched->kernel), : kernel(sched->kernel),
sched_loop(sched, id), sched_loop(sched, id, killed),
driver(&sched_loop) { driver(&sched_loop) {
} }
rust_thread_sched_launcher::rust_thread_sched_launcher(rust_scheduler *sched, rust_thread_sched_launcher::rust_thread_sched_launcher(rust_scheduler *sched,
int id) int id, bool killed)
: rust_sched_launcher(sched, id), : rust_sched_launcher(sched, id, killed),
rust_thread(SCHED_STACK_SIZE) { rust_thread(SCHED_STACK_SIZE) {
} }
rust_manual_sched_launcher::rust_manual_sched_launcher(rust_scheduler *sched, rust_manual_sched_launcher::rust_manual_sched_launcher(rust_scheduler *sched,
int id) int id, bool killed)
: rust_sched_launcher(sched, id) { : rust_sched_launcher(sched, id, killed) {
} }
rust_sched_launcher * rust_sched_launcher *
rust_thread_sched_launcher_factory::create(rust_scheduler *sched, int id) { rust_thread_sched_launcher_factory::create(rust_scheduler *sched, int id,
bool killed) {
return new(sched->kernel, "rust_thread_sched_launcher") return new(sched->kernel, "rust_thread_sched_launcher")
rust_thread_sched_launcher(sched, id); rust_thread_sched_launcher(sched, id, killed);
} }
rust_sched_launcher * rust_sched_launcher *
rust_manual_sched_launcher_factory::create(rust_scheduler *sched, int id) { rust_manual_sched_launcher_factory::create(rust_scheduler *sched, int id,
bool killed) {
assert(launcher == NULL && "I can only track one sched_launcher"); assert(launcher == NULL && "I can only track one sched_launcher");
launcher = new(sched->kernel, "rust_manual_sched_launcher") launcher = new(sched->kernel, "rust_manual_sched_launcher")
rust_manual_sched_launcher(sched, id); rust_manual_sched_launcher(sched, id, killed);
return launcher; return launcher;
} }

View file

@ -17,7 +17,7 @@ protected:
rust_sched_driver driver; rust_sched_driver driver;
public: public:
rust_sched_launcher(rust_scheduler *sched, int id); rust_sched_launcher(rust_scheduler *sched, int id, bool killed);
virtual ~rust_sched_launcher() { } virtual ~rust_sched_launcher() { }
virtual void start() = 0; virtual void start() = 0;
@ -29,7 +29,7 @@ class rust_thread_sched_launcher
:public rust_sched_launcher, :public rust_sched_launcher,
private rust_thread { private rust_thread {
public: public:
rust_thread_sched_launcher(rust_scheduler *sched, int id); rust_thread_sched_launcher(rust_scheduler *sched, int id, bool killed);
virtual void start() { rust_thread::start(); } virtual void start() { rust_thread::start(); }
virtual void join() { rust_thread::join(); } virtual void join() { rust_thread::join(); }
virtual void run() { driver.start_main_loop(); } virtual void run() { driver.start_main_loop(); }
@ -37,7 +37,7 @@ public:
class rust_manual_sched_launcher : public rust_sched_launcher { class rust_manual_sched_launcher : public rust_sched_launcher {
public: public:
rust_manual_sched_launcher(rust_scheduler *sched, int id); rust_manual_sched_launcher(rust_scheduler *sched, int id, bool killed);
virtual void start() { } virtual void start() { }
virtual void join() { } virtual void join() { }
rust_sched_driver *get_driver() { return &driver; }; rust_sched_driver *get_driver() { return &driver; };
@ -47,13 +47,14 @@ class rust_sched_launcher_factory {
public: public:
virtual ~rust_sched_launcher_factory() { } virtual ~rust_sched_launcher_factory() { }
virtual rust_sched_launcher * virtual rust_sched_launcher *
create(rust_scheduler *sched, int id) = 0; create(rust_scheduler *sched, int id, bool killed) = 0;
}; };
class rust_thread_sched_launcher_factory class rust_thread_sched_launcher_factory
: public rust_sched_launcher_factory { : public rust_sched_launcher_factory {
public: public:
virtual rust_sched_launcher *create(rust_scheduler *sched, int id); virtual rust_sched_launcher *create(rust_scheduler *sched, int id,
bool killed);
}; };
class rust_manual_sched_launcher_factory class rust_manual_sched_launcher_factory
@ -62,7 +63,8 @@ private:
rust_manual_sched_launcher *launcher; rust_manual_sched_launcher *launcher;
public: public:
rust_manual_sched_launcher_factory() : launcher(NULL) { } rust_manual_sched_launcher_factory() : launcher(NULL) { }
virtual rust_sched_launcher *create(rust_scheduler *sched, int id); virtual rust_sched_launcher *create(rust_scheduler *sched, int id,
bool killed);
rust_sched_driver *get_driver() { rust_sched_driver *get_driver() {
assert(launcher != NULL); assert(launcher != NULL);
return launcher->get_driver(); return launcher->get_driver();

View file

@ -13,12 +13,13 @@ const size_t C_STACK_SIZE = 1024*1024;
bool rust_sched_loop::tls_initialized = false; bool rust_sched_loop::tls_initialized = false;
rust_sched_loop::rust_sched_loop(rust_scheduler *sched,int id) : rust_sched_loop::rust_sched_loop(rust_scheduler *sched, int id, bool killed) :
_log(this), _log(this),
id(id), id(id),
should_exit(false), should_exit(false),
cached_c_stack(NULL), cached_c_stack(NULL),
dead_task(NULL), dead_task(NULL),
killed(killed),
pump_signal(NULL), pump_signal(NULL),
kernel(sched->kernel), kernel(sched->kernel),
sched(sched), sched(sched),
@ -63,6 +64,8 @@ rust_sched_loop::kill_all_tasks() {
{ {
scoped_lock with(lock); scoped_lock with(lock);
// Any task created after this will be killed. See transition, below.
killed = true;
for (size_t i = 0; i < running_tasks.length(); i++) { for (size_t i = 0; i < running_tasks.length(); i++) {
all_tasks.push_back(running_tasks[i]); all_tasks.push_back(running_tasks[i]);
@ -319,6 +322,11 @@ rust_sched_loop::transition(rust_task *task,
} }
task->set_state(dst, cond, cond_name); task->set_state(dst, cond, cond_name);
// If the entire runtime is failing, newborn tasks must be doomed.
if (src == task_state_newborn && killed) {
task->kill_inner();
}
pump_loop(); pump_loop();
} }

View file

@ -60,6 +60,7 @@ private:
rust_task_list running_tasks; rust_task_list running_tasks;
rust_task_list blocked_tasks; rust_task_list blocked_tasks;
rust_task *dead_task; rust_task *dead_task;
bool killed;
rust_signal *pump_signal; rust_signal *pump_signal;
@ -91,7 +92,7 @@ public:
// Only a pointer to 'name' is kept, so it must live as long as this // Only a pointer to 'name' is kept, so it must live as long as this
// domain. // domain.
rust_sched_loop(rust_scheduler *sched, int id); rust_sched_loop(rust_scheduler *sched, int id, bool killed);
void activate(rust_task *task); void activate(rust_task *task);
rust_log & get_log(); rust_log & get_log();
void fail(); void fail();
@ -107,6 +108,7 @@ public:
void log_state(); void log_state();
void kill_all_tasks(); void kill_all_tasks();
bool doomed();
rust_task *create_task(rust_task *spawner, const char *name); rust_task *create_task(rust_task *spawner, const char *name);

View file

@ -9,6 +9,7 @@ rust_scheduler::rust_scheduler(rust_kernel *kernel,
size_t num_threads, size_t num_threads,
rust_sched_id id, rust_sched_id id,
bool allow_exit, bool allow_exit,
bool killed,
rust_sched_launcher_factory *launchfac) : rust_sched_launcher_factory *launchfac) :
kernel(kernel), kernel(kernel),
live_threads(num_threads), live_threads(num_threads),
@ -18,7 +19,7 @@ rust_scheduler::rust_scheduler(rust_kernel *kernel,
num_threads(num_threads), num_threads(num_threads),
id(id) id(id)
{ {
create_task_threads(launchfac); create_task_threads(launchfac, killed);
} }
rust_scheduler::~rust_scheduler() { rust_scheduler::~rust_scheduler() {
@ -27,8 +28,8 @@ rust_scheduler::~rust_scheduler() {
rust_sched_launcher * rust_sched_launcher *
rust_scheduler::create_task_thread(rust_sched_launcher_factory *launchfac, rust_scheduler::create_task_thread(rust_sched_launcher_factory *launchfac,
int id) { int id, bool killed) {
rust_sched_launcher *thread = launchfac->create(this, id); rust_sched_launcher *thread = launchfac->create(this, id, killed);
KLOG(kernel, kern, "created task thread: " PTR ", id: %d", KLOG(kernel, kern, "created task thread: " PTR ", id: %d",
thread, id); thread, id);
return thread; return thread;
@ -41,11 +42,12 @@ rust_scheduler::destroy_task_thread(rust_sched_launcher *thread) {
} }
void void
rust_scheduler::create_task_threads(rust_sched_launcher_factory *launchfac) { rust_scheduler::create_task_threads(rust_sched_launcher_factory *launchfac,
bool killed) {
KLOG(kernel, kern, "Using %d scheduler threads.", num_threads); KLOG(kernel, kern, "Using %d scheduler threads.", num_threads);
for(size_t i = 0; i < num_threads; ++i) { for(size_t i = 0; i < num_threads; ++i) {
threads.push(create_task_thread(launchfac, i)); threads.push(create_task_thread(launchfac, i, killed));
} }
} }

View file

@ -34,18 +34,20 @@ private:
rust_sched_id id; rust_sched_id id;
void create_task_threads(rust_sched_launcher_factory *launchfac); void create_task_threads(rust_sched_launcher_factory *launchfac,
bool killed);
void destroy_task_threads(); void destroy_task_threads();
rust_sched_launcher * rust_sched_launcher *
create_task_thread(rust_sched_launcher_factory *launchfac, int id); create_task_thread(rust_sched_launcher_factory *launchfac, int id,
bool killed);
void destroy_task_thread(rust_sched_launcher *thread); void destroy_task_thread(rust_sched_launcher *thread);
void exit(); void exit();
public: public:
rust_scheduler(rust_kernel *kernel, size_t num_threads, rust_scheduler(rust_kernel *kernel, size_t num_threads,
rust_sched_id id, bool allow_exit, rust_sched_id id, bool allow_exit, bool killed,
rust_sched_launcher_factory *launchfac); rust_sched_launcher_factory *launchfac);
~rust_scheduler(); ~rust_scheduler();

View file

@ -257,8 +257,17 @@ rust_task::yield(bool *killed) {
void void
rust_task::kill() { rust_task::kill() {
scoped_lock with(lifecycle_lock); scoped_lock with(lifecycle_lock);
kill_inner();
}
// XXX: bblum: kill/kill race void rust_task::kill_inner() {
lifecycle_lock.must_have_lock();
// Multiple kills should be able to safely race, but check anyway.
if (killed) {
LOG(this, task, "task %s @0x%" PRIxPTR " already killed", name, this);
return;
}
// Note the distinction here: kill() is when you're in an upcall // Note the distinction here: kill() is when you're in an upcall
// from task A and want to force-fail task B, you do B->kill(). // from task A and want to force-fail task B, you do B->kill().

View file

@ -264,6 +264,7 @@ public:
// Fail this task (assuming caller-on-stack is different task). // Fail this task (assuming caller-on-stack is different task).
void kill(); void kill();
void kill_inner();
// Indicates that we've been killed and now is an apropriate // Indicates that we've been killed and now is an apropriate
// time to fail as a result // time to fail as a result