Teach SLRU code to avoid replacing I/O-busy pages.

Patch by me; review by Tom Lane and others.
This commit is contained in:
Robert Haas 2012-04-08 23:04:07 -04:00
parent d5375491f8
commit 3ae5133b1c

View file

@ -403,12 +403,6 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
/* Acquire per-buffer lock (cannot deadlock, see notes at top) */
LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE);
/*
* Temporarily mark page as recently-used to discourage
* SlruSelectLRUPage from selecting it again for someone else.
*/
SlruRecentlyUsed(shared, slotno);
/* Release control lock while doing I/O */
LWLockRelease(shared->ControlLock);
@ -909,9 +903,12 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
{
int slotno;
int cur_count;
int bestslot;
int best_delta;
int best_page_number;
int bestvalidslot = 0; /* keep compiler quiet */
int best_valid_delta = -1;
int best_valid_page_number = 0; /* keep compiler quiet */
int bestinvalidslot = 0; /* keep compiler quiet */
int best_invalid_delta = -1;
int best_invalid_page_number = 0; /* keep compiler quiet */
/* See if page already has a buffer assigned */
for (slotno = 0; slotno < shared->num_slots; slotno++)
@ -922,8 +919,16 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
}
/*
* If we find any EMPTY slot, just select that one. Else locate the
* least-recently-used slot to replace.
* If we find any EMPTY slot, just select that one. Else choose a
* victim page to replace. We normally take the least recently used
* valid page, but we will never take the slot containing
* latest_page_number, even if it appears least recently used. We
* will select a slot that is already I/O busy only if there is no
* other choice: a read-busy slot will not be least recently used once
* the read finishes, and waiting for an I/O on a write-busy slot is
* inferior to just picking some other slot. Testing shows the slot
* we pick instead will often be clean, allowing us to begin a read
* at once.
*
* Normally the page_lru_count values will all be different and so
* there will be a well-defined LRU page. But since we allow
@ -932,9 +937,6 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
* acquire the same lru_count values. In that case we break ties by
* choosing the furthest-back page.
*
* In no case will we select the slot containing latest_page_number
* for replacement, even if it appears least recently used.
*
* Notice that this next line forcibly advances cur_lru_count to a
* value that is certainly beyond any value that will be in the
* page_lru_count array after the loop finishes. This ensures that
@ -944,9 +946,6 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
* multiple pages with the same lru_count.
*/
cur_count = (shared->cur_lru_count)++;
best_delta = -1;
bestslot = 0; /* no-op, just keeps compiler quiet */
best_page_number = 0; /* ditto */
for (slotno = 0; slotno < shared->num_slots; slotno++)
{
int this_delta;
@ -968,34 +967,57 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
this_delta = 0;
}
this_page_number = shared->page_number[slotno];
if ((this_delta > best_delta ||
(this_delta == best_delta &&
ctl->PagePrecedes(this_page_number, best_page_number))) &&
this_page_number != shared->latest_page_number)
if (this_page_number == shared->latest_page_number)
continue;
if (shared->page_status[slotno] == SLRU_PAGE_VALID)
{
bestslot = slotno;
best_delta = this_delta;
best_page_number = this_page_number;
if (this_delta > best_valid_delta ||
(this_delta == best_valid_delta &&
ctl->PagePrecedes(this_page_number,
best_valid_page_number)))
{
bestvalidslot = slotno;
best_valid_delta = this_delta;
best_valid_page_number = this_page_number;
}
}
else
{
if (this_delta > best_invalid_delta ||
(this_delta == best_invalid_delta &&
ctl->PagePrecedes(this_page_number,
best_invalid_page_number)))
{
bestinvalidslot = slotno;
best_invalid_delta = this_delta;
best_invalid_page_number = this_page_number;
}
}
}
/*
* If all pages (except possibly the latest one) are I/O busy, we'll
* have to wait for an I/O to complete and then retry. In that unhappy
* case, we choose to wait for the I/O on the least recently used slot,
* on the assumption that it was likely initiated first of all the I/Os
* in progress and may therefore finish first.
*/
if (best_valid_delta < 0)
{
SimpleLruWaitIO(ctl, bestinvalidslot);
continue;
}
/*
* If the selected page is clean, we're set.
*/
if (shared->page_status[bestslot] == SLRU_PAGE_VALID &&
!shared->page_dirty[bestslot])
return bestslot;
if (!shared->page_dirty[bestvalidslot])
return bestvalidslot;
/*
* We need to wait for I/O. Normal case is that it's dirty and we
* must initiate a write, but it's possible that the page is already
* write-busy, or in the worst case still read-busy. In those cases
* we wait for the existing I/O to complete.
* Write the page.
*/
if (shared->page_status[bestslot] == SLRU_PAGE_VALID)
SlruInternalWritePage(ctl, bestslot, NULL);
else
SimpleLruWaitIO(ctl, bestslot);
SlruInternalWritePage(ctl, bestvalidslot, NULL);
/*
* Now loop back and try again. This is the easiest way of dealing