In the executor, use an array of pointers to access the rangetable.

Instead of doing a lot of list_nth() accesses to es_range_table,
create a flattened pointer array during executor startup and index
into that to get at individual RangeTblEntrys.

This eliminates one source of O(N^2) behavior with lots of partitions.
(I'm not exactly convinced that it's the most important source, but
it's an easy one to fix.)

Amit Langote and David Rowley

Discussion: https://postgr.es/m/468c85d9-540e-66a2-1dde-fec2b741e688@lab.ntt.co.jp
This commit is contained in:
Tom Lane 2018-10-04 15:48:17 -04:00
parent 9ddef36278
commit d73f4c74dd
11 changed files with 87 additions and 52 deletions

View file

@ -1345,7 +1345,7 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags)
rtindex = fsplan->scan.scanrelid;
else
rtindex = bms_next_member(fsplan->fs_relids, -1);
rte = rt_fetch(rtindex, estate->es_range_table);
rte = exec_rt_fetch(rtindex, estate);
userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
/* Get info about foreign table. */
@ -1731,8 +1731,8 @@ postgresBeginForeignModify(ModifyTableState *mtstate,
FdwModifyPrivateRetrievedAttrs);
/* Find RTE. */
rte = rt_fetch(resultRelInfo->ri_RangeTableIndex,
mtstate->ps.state->es_range_table);
rte = exec_rt_fetch(resultRelInfo->ri_RangeTableIndex,
mtstate->ps.state);
/* Construct an execution state. */
fmstate = create_foreign_modify(mtstate->ps.state,
@ -2036,7 +2036,7 @@ postgresBeginForeignInsert(ModifyTableState *mtstate,
* correspond to this partition if it is one of the UPDATE subplan target
* rels; in that case, we can just use the existing RTE as-is.
*/
rte = list_nth(estate->es_range_table, resultRelation - 1);
rte = exec_rt_fetch(resultRelation, estate);
if (rte->relid != RelationGetRelid(rel))
{
rte = copyObject(rte);
@ -2396,7 +2396,7 @@ postgresBeginDirectModify(ForeignScanState *node, int eflags)
* ExecCheckRTEPerms() does.
*/
rtindex = estate->es_result_relation_info->ri_RangeTableIndex;
rte = rt_fetch(rtindex, estate->es_range_table);
rte = exec_rt_fetch(rtindex, estate);
userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
/* Get info about foreign table. */
@ -5752,7 +5752,7 @@ conversion_error_callback(void *arg)
RangeTblEntry *rte;
Var *var = (Var *) tle->expr;
rte = rt_fetch(var->varno, estate->es_range_table);
rte = exec_rt_fetch(var->varno, estate);
if (var->varattno == 0)
is_wholerow = true;

View file

@ -2484,9 +2484,8 @@ CopyFrom(CopyState cstate)
estate->es_result_relations = resultRelInfo;
estate->es_num_result_relations = 1;
estate->es_result_relation_info = resultRelInfo;
estate->es_range_table = cstate->range_table;
estate->es_relations = (Relation *) palloc0(list_length(cstate->range_table) *
sizeof(Relation));
ExecInitRangeTable(estate, cstate->range_table);
/* Set up a tuple slot too */
myslot = ExecInitExtraTupleSlot(estate, tupDesc);

View file

@ -75,7 +75,7 @@ static int MyTriggerDepth = 0;
* to be changed, however.
*/
#define GetUpdatedColumns(relinfo, estate) \
(rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->updatedCols)
(exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->updatedCols)
/* Local function prototypes */
static void ConvertTriggerToFK(CreateTrigStmt *stmt, Oid funcoid);

View file

@ -3934,10 +3934,10 @@ ExecEvalWholeRowVar(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
* perhaps other places.)
*/
if (econtext->ecxt_estate &&
variable->varno <= list_length(econtext->ecxt_estate->es_range_table))
variable->varno <= econtext->ecxt_estate->es_range_table_size)
{
RangeTblEntry *rte = rt_fetch(variable->varno,
econtext->ecxt_estate->es_range_table);
RangeTblEntry *rte = exec_rt_fetch(variable->varno,
econtext->ecxt_estate);
if (rte->eref)
ExecTypeSetColNames(output_tupdesc, rte->eref->colnames);

View file

@ -109,9 +109,9 @@ static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
* to be changed, however.
*/
#define GetInsertedColumns(relinfo, estate) \
(rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->insertedCols)
(exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->insertedCols)
#define GetUpdatedColumns(relinfo, estate) \
(rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->updatedCols)
(exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->updatedCols)
/* end of local decls */
@ -823,15 +823,7 @@ InitPlan(QueryDesc *queryDesc, int eflags)
/*
* initialize the node's execution state
*/
estate->es_range_table = rangeTable;
/*
* Allocate an array to store an open Relation corresponding to each
* rangeTable item, and initialize entries to NULL. Relations are opened
* and stored here as needed.
*/
estate->es_relations = (Relation *) palloc0(list_length(rangeTable) *
sizeof(Relation));
ExecInitRangeTable(estate, rangeTable);
estate->es_plannedstmt = plannedstmt;
@ -918,9 +910,9 @@ InitPlan(QueryDesc *queryDesc, int eflags)
resultRelIndex))
{
Relation resultRelDesc;
Oid reloid = exec_rt_fetch(resultRelIndex, estate)->relid;
resultRelDesc = heap_open(getrelid(resultRelIndex, rangeTable),
NoLock);
resultRelDesc = heap_open(reloid, NoLock);
Assert(CheckRelationLockedByMe(resultRelDesc, RowExclusiveLock, true));
heap_close(resultRelDesc, NoLock);
}
@ -962,7 +954,7 @@ InitPlan(QueryDesc *queryDesc, int eflags)
continue;
/* get relation's OID (will produce InvalidOid if subquery) */
relid = getrelid(rc->rti, rangeTable);
relid = exec_rt_fetch(rc->rti, estate)->relid;
switch (rc->markType)
{
@ -1619,8 +1611,8 @@ static void
ExecEndPlan(PlanState *planstate, EState *estate)
{
ResultRelInfo *resultRelInfo;
int num_relations;
int i;
Index num_relations;
Index i;
ListCell *l;
/*
@ -1661,7 +1653,7 @@ ExecEndPlan(PlanState *planstate, EState *estate)
* close whatever rangetable Relations have been opened. We did not
* acquire locks in ExecGetRangeTableRelation, so don't release 'em here.
*/
num_relations = list_length(estate->es_range_table);
num_relations = estate->es_range_table_size;
for (i = 0; i < num_relations; i++)
{
if (estate->es_relations[i])
@ -3087,7 +3079,7 @@ EvalPlanQualBegin(EPQState *epqstate, EState *parentestate)
/*
* We already have a suitable child EPQ tree, so just reset it.
*/
int rtsize = list_length(parentestate->es_range_table);
Index rtsize = parentestate->es_range_table_size;
PlanState *planstate = epqstate->planstate;
MemSet(estate->es_epqScanDone, 0, rtsize * sizeof(bool));
@ -3136,11 +3128,11 @@ static void
EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
{
EState *estate;
int rtsize;
Index rtsize;
MemoryContext oldcontext;
ListCell *l;
rtsize = list_length(parentestate->es_range_table);
rtsize = parentestate->es_range_table_size;
epqstate->estate = estate = CreateExecutorState();
@ -3164,6 +3156,8 @@ EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
estate->es_snapshot = parentestate->es_snapshot;
estate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot;
estate->es_range_table = parentestate->es_range_table;
estate->es_range_table_array = parentestate->es_range_table_array;
estate->es_range_table_size = parentestate->es_range_table_size;
estate->es_relations = parentestate->es_relations;
estate->es_plannedstmt = parentestate->es_plannedstmt;
estate->es_junkFilter = parentestate->es_junkFilter;

View file

@ -26,6 +26,8 @@
*
* ExecOpenScanRelation Common code for scan node init routines.
*
* ExecInitRangeTable Set up executor's range-table-related data.
*
* ExecGetRangeTableRelation Fetch Relation for a rangetable entry.
*
* executor_errposition Report syntactic position of an error.
@ -108,6 +110,8 @@ CreateExecutorState(void)
estate->es_snapshot = InvalidSnapshot; /* caller must initialize this */
estate->es_crosscheck_snapshot = InvalidSnapshot; /* no crosscheck */
estate->es_range_table = NIL;
estate->es_range_table_array = NULL;
estate->es_range_table_size = 0;
estate->es_relations = NULL;
estate->es_plannedstmt = NULL;
@ -670,6 +674,43 @@ ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
return rel;
}
/*
* ExecInitRangeTable
* Set up executor's range-table-related data
*
* We build an array from the range table list to allow faster lookup by RTI.
* (The es_range_table field is now somewhat redundant, but we keep it to
* avoid breaking external code unnecessarily.)
* This is also a convenient place to set up the parallel es_relations array.
*/
void
ExecInitRangeTable(EState *estate, List *rangeTable)
{
Index rti;
ListCell *lc;
/* Remember the range table List as-is */
estate->es_range_table = rangeTable;
/* Set up the equivalent array representation */
estate->es_range_table_size = list_length(rangeTable);
estate->es_range_table_array = (RangeTblEntry **)
palloc(estate->es_range_table_size * sizeof(RangeTblEntry *));
rti = 0;
foreach(lc, rangeTable)
{
estate->es_range_table_array[rti++] = lfirst_node(RangeTblEntry, lc);
}
/*
* Allocate an array to store an open Relation corresponding to each
* rangetable entry, and initialize entries to NULL. Relations are opened
* and stored here as needed.
*/
estate->es_relations = (Relation *)
palloc0(estate->es_range_table_size * sizeof(Relation));
}
/*
* ExecGetRangeTableRelation
* Open the Relation for a range table entry, if not already done
@ -681,13 +722,13 @@ ExecGetRangeTableRelation(EState *estate, Index rti)
{
Relation rel;
Assert(rti > 0 && rti <= list_length(estate->es_range_table));
Assert(rti > 0 && rti <= estate->es_range_table_size);
rel = estate->es_relations[rti - 1];
if (rel == NULL)
{
/* First time through, so open the relation */
RangeTblEntry *rte = rt_fetch(rti, estate->es_range_table);
RangeTblEntry *rte = exec_rt_fetch(rti, estate);
Assert(rte->rtekind == RTE_RELATION);
@ -876,7 +917,7 @@ ExecLockNonLeafAppendTables(List *partitioned_rels, EState *estate)
ListCell *l;
Index rti = lfirst_int(lc);
bool is_result_rel = false;
Oid relid = getrelid(rti, estate->es_range_table);
Oid relid = exec_rt_fetch(rti, estate)->relid;
/* If this is a result relation, already locked in InitPlan */
foreach(l, stmt->nonleafResultRelations)
@ -911,7 +952,7 @@ ExecLockNonLeafAppendTables(List *partitioned_rels, EState *estate)
else
lockmode = AccessShareLock;
Assert(lockmode == rt_fetch(rti, estate->es_range_table)->rellockmode);
Assert(lockmode == exec_rt_fetch(rti, estate)->rellockmode);
LockRelationOid(relid, lockmode);
}

View file

@ -400,7 +400,7 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
/*
* Create workspace in which we can remember per-RTE locked tuples
*/
lrstate->lr_ntables = list_length(estate->es_range_table);
lrstate->lr_ntables = estate->es_range_table_size;
lrstate->lr_curtuples = (HeapTuple *)
palloc0(lrstate->lr_ntables * sizeof(HeapTuple));

View file

@ -200,8 +200,7 @@ create_estate_for_relation(LogicalRepRelMapEntry *rel)
rte->relid = RelationGetRelid(rel->localrel);
rte->relkind = rel->localrel->rd_rel->relkind;
rte->rellockmode = AccessShareLock;
estate->es_range_table = list_make1(rte);
estate->es_relations = (Relation *) palloc0(1 * sizeof(Relation));
ExecInitRangeTable(estate, list_make1(rte));
resultRelInfo = makeNode(ResultRelInfo);
InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);

View file

@ -514,6 +514,15 @@ extern bool ExecRelationIsTargetRelation(EState *estate, Index scanrelid);
extern Relation ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags);
extern void ExecInitRangeTable(EState *estate, List *rangeTable);
static inline RangeTblEntry *
exec_rt_fetch(Index rti, EState *estate)
{
Assert(rti > 0 && rti <= estate->es_range_table_size);
return estate->es_range_table_array[rti - 1];
}
extern Relation ExecGetRangeTableRelation(EState *estate, Index rti);
extern int executor_errposition(EState *estate, int location);

View file

@ -36,6 +36,7 @@ struct PlanState; /* forward references in this file */
struct ParallelHashJoinState;
struct ExprState;
struct ExprContext;
struct RangeTblEntry; /* avoid including parsenodes.h here */
struct ExprEvalStep; /* avoid including execExpr.h everywhere */
@ -486,7 +487,9 @@ typedef struct EState
Snapshot es_snapshot; /* time qual to use */
Snapshot es_crosscheck_snapshot; /* crosscheck time qual for RI */
List *es_range_table; /* List of RangeTblEntry */
Relation *es_relations; /* Array of per-es_range_table-entry Relation
struct RangeTblEntry **es_range_table_array; /* equivalent array */
Index es_range_table_size; /* size of the range table arrays */
Relation *es_relations; /* Array of per-range-table-entry Relation
* pointers, or NULL if not yet opened */
PlannedStmt *es_plannedstmt; /* link to top of plan tree */
const char *es_sourceText; /* Source text from QueryDesc */
@ -563,7 +566,7 @@ typedef struct EState
* return, or NULL if nothing to return; es_epqTupleSet[] is true if a
* particular array entry is valid; and es_epqScanDone[] is state to
* remember if the tuple has been returned already. Arrays are of size
* list_length(es_range_table) and are indexed by scan node scanrelid - 1.
* es_range_table_size and are indexed by scan node scanrelid - 1.
*/
HeapTuple *es_epqTuple; /* array of EPQ substitute tuples */
bool *es_epqTupleSet; /* true if EPQ tuple is provided */

View file

@ -31,16 +31,6 @@
#define rt_fetch(rangetable_index, rangetable) \
((RangeTblEntry *) list_nth(rangetable, (rangetable_index)-1))
/*
* getrelid
*
* Given the range index of a relation, return the corresponding
* relation OID. Note that InvalidOid will be returned if the
* RTE is for a non-relation-type RTE.
*/
#define getrelid(rangeindex,rangetable) \
(rt_fetch(rangeindex, rangetable)->relid)
/*
* Given an RTE and an attribute number, return the appropriate
* variable name or alias for that attribute of that RTE.