Rewrite reap_dead_tasks to never grab the sched lock before a task lock

Doing so contradicts the locking order used everywhere else and causes deadlocks. Un-XFAIL task-perf-spawnalot Closes #854
2011-08-20 16:05:18 -07:00 · 2011-08-20 16:05:18 -07:00 · abdb6cd71b
commit abdb6cd71b
parent 25416bfae1
3 changed files with 47 additions and 9 deletions
--- a/src/rt/rust_scheduler.cpp
+++ b/src/rt/rust_scheduler.cpp
@ -100,25 +100,66 @@ rust_scheduler::number_of_live_tasks() {
 void
 rust_scheduler::reap_dead_tasks(int id) {
    I(this, lock.lock_held_by_current_thread());
-    for (size_t i = 0; i < dead_tasks.length(); ) {
+    if (dead_tasks.length() == 0) {
+        return;
+    }
+
+    // First make up copy of the dead_task list with the lock held
+    size_t dead_tasks_len = dead_tasks.length();
+    rust_task **dead_tasks_copy = (rust_task**)
+        srv->malloc(sizeof(rust_task*) * dead_tasks_len);
+    for (size_t i = 0; i < dead_tasks_len; ++i) {
        rust_task *task = dead_tasks[i];
+        dead_tasks_copy[i] = task;
+    }
+
+    // Now drop the lock and futz with the tasks. This avoids establishing
+    // a sched->lock then task->lock locking order, which would be devestating
+    // to performance.
+    lock.unlock();
+
+    for (size_t i = 0; i < dead_tasks_len; ++i) {
+        rust_task *task = dead_tasks_copy[i];
        task->lock.lock();
        // Make sure this task isn't still running somewhere else...
        if (task->can_schedule(id)) {
            I(this, task->tasks_waiting_to_join.is_empty());
-            dead_tasks.remove(task);
            DLOG(this, task,
                "deleting unreferenced dead task %s @0x%" PRIxPTR,
                task->name, task);
            task->lock.unlock();
+        } else {
+            task->lock.unlock();
+            dead_tasks_copy[i] = NULL;
+        }
+    }
+
+    // Now grab the lock again and remove the tasks that were truly dead
+    lock.lock();
+
+    for (size_t i = 0; i < dead_tasks_len; ++i) {
+        rust_task *task = dead_tasks_copy[i];
+        if (task) {
+            dead_tasks.remove(task);
+        }
+    }
+
+    // Now unlock again because we have to actually free the dead tasks,
+    // and that may end up wanting to do lock the task and sched locks
+    // again (via target->send)
+    lock.unlock();
+
+    for (size_t i = 0; i < dead_tasks_len; ++i) {
+        rust_task *task = dead_tasks_copy[i];
+        if (task) {
            task->deref();
            sync::decrement(kernel->live_tasks);
            kernel->wakeup_schedulers();
-            continue;
        }
-        task->lock.unlock();
-        ++i;
    }
+    srv->free(dead_tasks_copy);
+
+    lock.lock();
 }

 /**
--- a/src/rt/rust_task.cpp
+++ b/src/rt/rust_task.cpp
@ -90,6 +90,7 @@ rust_task::rust_task(rust_scheduler *sched, rust_task_list *state,

 rust_task::~rust_task()
 {
+    I(sched, !sched->lock.lock_held_by_current_thread());
    DLOG(sched, task, "~rust_task %s @0x%" PRIxPTR ", refcnt=%d",
         name, (uintptr_t)this, ref_count);

--- a/src/test/bench/task-perf-spawnalot.rs
+++ b/src/test/bench/task-perf-spawnalot.rs
@ -1,7 +1,3 @@
-// xfail-stage1
-// xfail-stage2
-// xfail-stage3
-
 use std;
 import std::vec;
 import std::task;