Fix sloppiness about alignment requirements in findsplitloc() space

calculation, also make it stop when it has a 'good enough' split instead
of exhaustively trying all split points.
This commit is contained in:
Tom Lane 2000-07-21 19:21:00 +00:00
parent 2f011a9c72
commit 1ea912e16d
2 changed files with 101 additions and 38 deletions

View file

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.60 2000/07/21 06:42:32 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.61 2000/07/21 19:21:00 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -377,6 +377,10 @@ _bt_insertonpg(Relation rel,
/* /*
* Do we need to split the page to fit the item on it? * Do we need to split the page to fit the item on it?
*
* Note: PageGetFreeSpace() subtracts sizeof(ItemIdData) from its
* result, so this comparison is correct even though we appear to
* be accounting only for the item and not for its line pointer.
*/ */
if (PageGetFreeSpace(page) < itemsz) if (PageGetFreeSpace(page) < itemsz)
{ {
@ -743,11 +747,14 @@ _bt_findsplitloc(Relation rel,
FindSplitData state; FindSplitData state;
int leftspace, int leftspace,
rightspace, rightspace,
goodenough,
dataitemtotal, dataitemtotal,
dataitemstoleft; dataitemstoleft;
opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page);
/* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
newitemsz += sizeof(ItemIdData);
state.newitemsz = newitemsz; state.newitemsz = newitemsz;
state.non_leaf = ! P_ISLEAF(opaque); state.non_leaf = ! P_ISLEAF(opaque);
state.have_split = false; state.have_split = false;
@ -758,11 +765,23 @@ _bt_findsplitloc(Relation rel,
MAXALIGN(sizeof(BTPageOpaqueData)) MAXALIGN(sizeof(BTPageOpaqueData))
+ sizeof(ItemIdData); + sizeof(ItemIdData);
/*
* Finding the best possible split would require checking all the possible
* split points, because of the high-key and left-key special cases.
* That's probably more work than it's worth; instead, stop as soon as
* we find a "good-enough" split, where good-enough is defined as an
* imbalance in free space of no more than pagesize/16 (arbitrary...)
* This should let us stop near the middle on most pages, instead of
* plowing to the end.
*/
goodenough = leftspace / 16;
/* The right page will have the same high key as the old page */ /* The right page will have the same high key as the old page */
if (!P_RIGHTMOST(opaque)) if (!P_RIGHTMOST(opaque))
{ {
itemid = PageGetItemId(page, P_HIKEY); itemid = PageGetItemId(page, P_HIKEY);
rightspace -= (int) (ItemIdGetLength(itemid) + sizeof(ItemIdData)); rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
sizeof(ItemIdData));
} }
/* Count up total space in data items without actually scanning 'em */ /* Count up total space in data items without actually scanning 'em */
@ -770,8 +789,7 @@ _bt_findsplitloc(Relation rel,
/* /*
* Scan through the data items and calculate space usage for a split * Scan through the data items and calculate space usage for a split
* at each possible position. XXX we could probably stop somewhere * at each possible position.
* near the middle...
*/ */
dataitemstoleft = 0; dataitemstoleft = 0;
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
@ -785,44 +803,65 @@ _bt_findsplitloc(Relation rel,
rightfree; rightfree;
itemid = PageGetItemId(page, offnum); itemid = PageGetItemId(page, offnum);
itemsz = ItemIdGetLength(itemid) + sizeof(ItemIdData); itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
/* /*
* We have to allow for the current item becoming the high key of * We have to allow for the current item becoming the high key of
* the left page; therefore it counts against left space. * the left page; therefore it counts against left space as well
* as right space.
*/ */
leftfree = leftspace - dataitemstoleft - (int) itemsz; leftfree = leftspace - dataitemstoleft - (int) itemsz;
rightfree = rightspace - (dataitemtotal - dataitemstoleft); rightfree = rightspace - (dataitemtotal - dataitemstoleft);
if (offnum < newitemoff) /*
* Will the new item go to left or right of split?
*/
if (offnum > newitemoff)
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
true, itemsz);
else if (offnum < newitemoff)
_bt_checksplitloc(&state, offnum, leftfree, rightfree, _bt_checksplitloc(&state, offnum, leftfree, rightfree,
false, itemsz); false, itemsz);
else if (offnum > newitemoff)
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
true, itemsz);
else else
{ {
/* need to try it both ways!! */ /* need to try it both ways! */
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
false, newitemsz);
_bt_checksplitloc(&state, offnum, leftfree, rightfree, _bt_checksplitloc(&state, offnum, leftfree, rightfree,
true, itemsz); true, itemsz);
/* here we are contemplating newitem as first on right */
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
false, newitemsz);
} }
/* Abort scan once we find a good-enough choice */
if (state.have_split && state.best_delta <= goodenough)
break;
dataitemstoleft += itemsz; dataitemstoleft += itemsz;
} }
/*
* I believe it is not possible to fail to find a feasible split,
* but just in case ...
*/
if (! state.have_split) if (! state.have_split)
elog(FATAL, "_bt_findsplitloc: can't find a feasible split point for %s", elog(FATAL, "_bt_findsplitloc: can't find a feasible split point for %s",
RelationGetRelationName(rel)); RelationGetRelationName(rel));
*newitemonleft = state.newitemonleft; *newitemonleft = state.newitemonleft;
return state.firstright; return state.firstright;
} }
/*
* Subroutine to analyze a particular possible split choice (ie, firstright
* and newitemonleft settings), and record the best split so far in *state.
*/
static void static void
_bt_checksplitloc(FindSplitData *state, OffsetNumber firstright, _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
int leftfree, int rightfree, int leftfree, int rightfree,
bool newitemonleft, Size firstrightitemsz) bool newitemonleft, Size firstrightitemsz)
{ {
/*
* Account for the new item on whichever side it is to be put.
*/
if (newitemonleft) if (newitemonleft)
leftfree -= (int) state->newitemsz; leftfree -= (int) state->newitemsz;
else else
@ -833,7 +872,7 @@ _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
*/ */
if (state->non_leaf) if (state->non_leaf)
rightfree += (int) firstrightitemsz - rightfree += (int) firstrightitemsz -
(int) (sizeof(BTItemData) + sizeof(ItemIdData)); (int) (MAXALIGN(sizeof(BTItemData)) + sizeof(ItemIdData));
/* /*
* If feasible split point, remember best delta. * If feasible split point, remember best delta.
*/ */

View file

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.38 2000/07/21 06:42:33 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.39 2000/07/21 19:21:00 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -103,6 +103,9 @@ _bt_freeskey(ScanKey skey)
pfree(skey); pfree(skey);
} }
/*
* free a retracement stack made by _bt_search.
*/
void void
_bt_freestack(BTStack stack) _bt_freestack(BTStack stack)
{ {
@ -116,12 +119,38 @@ _bt_freestack(BTStack stack)
} }
} }
/*
* Construct a BTItem from a plain IndexTuple.
*
* This is now useless code, since a BTItem *is* an index tuple with
* no extra stuff. We hang onto it for the moment to preserve the
* notational distinction, in case we want to add some extra stuff
* again someday.
*/
BTItem
_bt_formitem(IndexTuple itup)
{
int nbytes_btitem;
BTItem btitem;
Size tuplen;
extern Oid newoid();
/* make a copy of the index tuple with room for extra stuff */
tuplen = IndexTupleSize(itup);
nbytes_btitem = tuplen + (sizeof(BTItemData) - sizeof(IndexTupleData));
btitem = (BTItem) palloc(nbytes_btitem);
memcpy((char *) &(btitem->bti_itup), (char *) itup, tuplen);
return btitem;
}
/* /*
* _bt_orderkeys() -- Put keys in a sensible order for conjunctive quals. * _bt_orderkeys() -- Put keys in a sensible order for conjunctive quals.
* *
* The order of the keys in the qual match the ordering imposed by * The order of the keys in the qual match the ordering imposed by
* the index. This routine only needs to be called if there are * the index. This routine only needs to be called if there is
* more than one qual clauses using this index. * more than one qual clause using this index.
*/ */
void void
_bt_orderkeys(Relation relation, BTScanOpaque so) _bt_orderkeys(Relation relation, BTScanOpaque so)
@ -189,7 +218,8 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
if (i == numberOfKeys || cur->sk_attno != attno) if (i == numberOfKeys || cur->sk_attno != attno)
{ {
if (cur->sk_attno != attno + 1 && i < numberOfKeys) if (cur->sk_attno != attno + 1 && i < numberOfKeys)
elog(ERROR, "_bt_orderkeys: key(s) for attribute %d missed", attno + 1); elog(ERROR, "_bt_orderkeys: key(s) for attribute %d missed",
attno + 1);
underEqualStrategy = (!equalStrategyEnd); underEqualStrategy = (!equalStrategyEnd);
@ -320,24 +350,18 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
pfree(xform); pfree(xform);
} }
BTItem /*
_bt_formitem(IndexTuple itup) * Test whether an indextuple satisfies all the scankey conditions
{ *
int nbytes_btitem; * If not ("false" return), the number of conditions satisfied is
BTItem btitem; * returned in *keysok. Given proper ordering of the scankey conditions,
Size tuplen; * we can use this to determine whether it's worth continuing the scan.
extern Oid newoid(); * See _bt_orderkeys().
*
/* make a copy of the index tuple with room for the sequence number */ * HACK: *keysok == (Size) -1 means we stopped evaluating because we found
tuplen = IndexTupleSize(itup); * a NULL value in the index tuple. It's not quite clear to me why this
nbytes_btitem = tuplen + (sizeof(BTItemData) - sizeof(IndexTupleData)); * case has to be treated specially --- tgl 7/00.
*/
btitem = (BTItem) palloc(nbytes_btitem);
memcpy((char *) &(btitem->bti_itup), (char *) itup, tuplen);
return btitem;
}
bool bool
_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok) _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok)
{ {
@ -389,9 +413,9 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok)
if (DatumGetBool(test) == !!(key[0].sk_flags & SK_NEGATE)) if (DatumGetBool(test) == !!(key[0].sk_flags & SK_NEGATE))
return false; return false;
keysz -= 1;
key++;
(*keysok)++; (*keysok)++;
key++;
keysz--;
} }
return true; return true;