diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 04e6c8cfdd..8d05b19934 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -58,11 +58,8 @@ */ typedef struct foreign_glob_cxt { - /* Input values */ - PlannerInfo *root; - RelOptInfo *foreignrel; - /* Result values */ - List *param_numbers; /* Param IDs of PARAM_EXTERN Params */ + PlannerInfo *root; /* global planner state */ + RelOptInfo *foreignrel; /* the foreign relation we are planning for */ } foreign_glob_cxt; /* @@ -82,12 +79,21 @@ typedef struct foreign_loc_cxt FDWCollateState state; /* state of current collation choice */ } foreign_loc_cxt; +/* + * Context for deparseExpr + */ +typedef struct deparse_expr_cxt +{ + PlannerInfo *root; /* global planner state */ + RelOptInfo *foreignrel; /* the foreign relation we are planning for */ + StringInfo buf; /* output buffer to append to */ + List **params_list; /* exprs that will become remote Params */ +} deparse_expr_cxt; + /* * Functions to determine whether an expression can be evaluated safely on * remote server. */ -static bool is_foreign_expr(PlannerInfo *root, RelOptInfo *baserel, - Expr *expr, List **param_numbers); static bool foreign_expr_walker(Node *node, foreign_glob_cxt *glob_cxt, foreign_loc_cxt *outer_cxt); @@ -108,76 +114,46 @@ static void deparseColumnRef(StringInfo buf, int varno, int varattno, PlannerInfo *root); static void deparseRelation(StringInfo buf, Relation rel); static void deparseStringLiteral(StringInfo buf, const char *val); -static void deparseExpr(StringInfo buf, Expr *expr, PlannerInfo *root); -static void deparseVar(StringInfo buf, Var *node, PlannerInfo *root); -static void deparseConst(StringInfo buf, Const *node, PlannerInfo *root); -static void deparseParam(StringInfo buf, Param *node, PlannerInfo *root); -static void deparseArrayRef(StringInfo buf, ArrayRef *node, PlannerInfo *root); -static void deparseFuncExpr(StringInfo buf, FuncExpr *node, PlannerInfo *root); -static void deparseOpExpr(StringInfo buf, OpExpr *node, PlannerInfo *root); +static void deparseExpr(Expr *expr, deparse_expr_cxt *context); +static void deparseVar(Var *node, deparse_expr_cxt *context); +static void deparseConst(Const *node, deparse_expr_cxt *context); +static void deparseParam(Param *node, deparse_expr_cxt *context); +static void deparseArrayRef(ArrayRef *node, deparse_expr_cxt *context); +static void deparseFuncExpr(FuncExpr *node, deparse_expr_cxt *context); +static void deparseOpExpr(OpExpr *node, deparse_expr_cxt *context); static void deparseOperatorName(StringInfo buf, Form_pg_operator opform); -static void deparseDistinctExpr(StringInfo buf, DistinctExpr *node, - PlannerInfo *root); -static void deparseScalarArrayOpExpr(StringInfo buf, ScalarArrayOpExpr *node, - PlannerInfo *root); -static void deparseRelabelType(StringInfo buf, RelabelType *node, - PlannerInfo *root); -static void deparseBoolExpr(StringInfo buf, BoolExpr *node, PlannerInfo *root); -static void deparseNullTest(StringInfo buf, NullTest *node, PlannerInfo *root); -static void deparseArrayExpr(StringInfo buf, ArrayExpr *node, - PlannerInfo *root); +static void deparseDistinctExpr(DistinctExpr *node, deparse_expr_cxt *context); +static void deparseScalarArrayOpExpr(ScalarArrayOpExpr *node, + deparse_expr_cxt *context); +static void deparseRelabelType(RelabelType *node, deparse_expr_cxt *context); +static void deparseBoolExpr(BoolExpr *node, deparse_expr_cxt *context); +static void deparseNullTest(NullTest *node, deparse_expr_cxt *context); +static void deparseArrayExpr(ArrayExpr *node, deparse_expr_cxt *context); /* * Examine each restriction clause in baserel's baserestrictinfo list, - * and classify them into three groups, which are returned as three lists: - * - remote_conds contains expressions that can be evaluated remotely, - * and contain no PARAM_EXTERN Params - * - param_conds contains expressions that can be evaluated remotely, - * but contain one or more PARAM_EXTERN Params - * - local_conds contains all expressions that can't be evaluated remotely - * - * In addition, the fourth output parameter param_numbers receives an integer - * list of the param IDs of the PARAM_EXTERN Params used in param_conds. - * - * The reason for segregating param_conds is mainly that it's difficult to - * use such conditions in remote EXPLAIN. We could do it, but unless the - * planner has been given representative values for all the Params, we'd - * have to guess at representative values to use in EXPLAIN EXECUTE. - * So for now we don't include them when doing remote EXPLAIN. + * and classify them into two groups, which are returned as two lists: + * - remote_conds contains expressions that can be evaluated remotely + * - local_conds contains expressions that can't be evaluated remotely */ void classifyConditions(PlannerInfo *root, RelOptInfo *baserel, List **remote_conds, - List **param_conds, - List **local_conds, - List **param_numbers) + List **local_conds) { ListCell *lc; *remote_conds = NIL; - *param_conds = NIL; *local_conds = NIL; - *param_numbers = NIL; foreach(lc, baserel->baserestrictinfo) { RestrictInfo *ri = (RestrictInfo *) lfirst(lc); - List *cur_param_numbers; - if (is_foreign_expr(root, baserel, ri->clause, &cur_param_numbers)) - { - if (cur_param_numbers == NIL) - *remote_conds = lappend(*remote_conds, ri); - else - { - *param_conds = lappend(*param_conds, ri); - /* Use list_concat_unique_int to get rid of duplicates */ - *param_numbers = list_concat_unique_int(*param_numbers, - cur_param_numbers); - } - } + if (is_foreign_expr(root, baserel, ri->clause)) + *remote_conds = lappend(*remote_conds, ri); else *local_conds = lappend(*local_conds, ri); } @@ -185,28 +161,21 @@ classifyConditions(PlannerInfo *root, /* * Returns true if given expr is safe to evaluate on the foreign server. - * - * If result is true, we also return a list of param IDs of PARAM_EXTERN - * Params appearing in the expr into *param_numbers. */ -static bool +bool is_foreign_expr(PlannerInfo *root, RelOptInfo *baserel, - Expr *expr, - List **param_numbers) + Expr *expr) { foreign_glob_cxt glob_cxt; foreign_loc_cxt loc_cxt; - *param_numbers = NIL; /* default result */ - /* * Check that the expression consists of nodes that are safe to execute * remotely. */ glob_cxt.root = root; glob_cxt.foreignrel = baserel; - glob_cxt.param_numbers = NIL; loc_cxt.collation = InvalidOid; loc_cxt.state = FDW_COLLATE_NONE; if (!foreign_expr_walker((Node *) expr, &glob_cxt, &loc_cxt)) @@ -226,18 +195,14 @@ is_foreign_expr(PlannerInfo *root, if (contain_mutable_functions((Node *) expr)) return false; - /* - * OK, so return list of param IDs too. - */ - *param_numbers = glob_cxt.param_numbers; - + /* OK to evaluate on the remote server */ return true; } /* * Check if expression is safe to execute remotely, and return true if so. * - * In addition, glob_cxt->param_numbers and *outer_cxt are updated. + * In addition, *outer_cxt is updated with collation information. * * We must check that the expression contains only node types we can deparse, * that all types/functions/operators are safe to send (which we approximate @@ -271,19 +236,30 @@ foreign_expr_walker(Node *node, Var *var = (Var *) node; /* - * Var can be used if it is in the foreign table (we shouldn't - * really see anything else in baserestrict clauses, but let's - * check anyway). + * If the Var is from the foreign table, we consider its + * collation (if any) safe to use. If it is from another + * table, we treat its collation the same way as we would a + * Param's collation, ie it's not safe for it to have a + * non-default collation. */ - if (var->varno != glob_cxt->foreignrel->relid || - var->varlevelsup != 0) - return false; + if (var->varno == glob_cxt->foreignrel->relid && + var->varlevelsup == 0) + { + /* Var belongs to foreign table */ + collation = var->varcollid; + state = OidIsValid(collation) ? FDW_COLLATE_SAFE : FDW_COLLATE_NONE; + } + else + { + /* Var belongs to some other table */ + if (var->varcollid != InvalidOid && + var->varcollid != DEFAULT_COLLATION_OID) + return false; - /* - * If Var has a collation, consider that safe to use. - */ - collation = var->varcollid; - state = OidIsValid(collation) ? FDW_COLLATE_SAFE : FDW_COLLATE_NONE; + /* We can consider that it doesn't set collation */ + collation = InvalidOid; + state = FDW_COLLATE_NONE; + } } break; case T_Const: @@ -308,30 +284,15 @@ foreign_expr_walker(Node *node, { Param *p = (Param *) node; - /* - * Only external parameters can be sent to remote. (XXX This - * needs to be improved, but at the point where this code - * runs, we should only see PARAM_EXTERN Params anyway.) - */ - if (p->paramkind != PARAM_EXTERN) - return false; - /* * Collation handling is same as for Consts. */ if (p->paramcollid != InvalidOid && p->paramcollid != DEFAULT_COLLATION_OID) return false; + collation = InvalidOid; state = FDW_COLLATE_NONE; - - /* - * Report IDs of PARAM_EXTERN Params. We don't bother to - * eliminate duplicate list elements here; classifyConditions - * will do that. - */ - glob_cxt->param_numbers = lappend_int(glob_cxt->param_numbers, - p->paramid); } break; case T_ArrayRef: @@ -791,17 +752,38 @@ deparseTargetList(StringInfo buf, /* * Deparse WHERE clauses in given list of RestrictInfos and append them to buf. * + * baserel is the foreign table we're planning for. + * * If no WHERE clause already exists in the buffer, is_first should be true. + * + * If params is not NULL, it receives a list of Params and other-relation Vars + * used in the clauses; these values must be transmitted to the remote server + * as parameter values. + * + * If params is NULL, we're generating the query for EXPLAIN purposes, + * so Params and other-relation Vars should be replaced by dummy values. */ void appendWhereClause(StringInfo buf, PlannerInfo *root, + RelOptInfo *baserel, List *exprs, - bool is_first) + bool is_first, + List **params) { + deparse_expr_cxt context; int nestlevel; ListCell *lc; + if (params) + *params = NIL; /* initialize result list to empty */ + + /* Set up context struct for recursion */ + context.root = root; + context.foreignrel = baserel; + context.buf = buf; + context.params_list = params; + /* Make sure any constants in the exprs are printed portably */ nestlevel = set_transmission_modes(); @@ -816,7 +798,7 @@ appendWhereClause(StringInfo buf, appendStringInfoString(buf, " AND "); appendStringInfoChar(buf, '('); - deparseExpr(buf, ri->clause, root); + deparseExpr(ri->clause, &context); appendStringInfoChar(buf, ')'); is_first = false; @@ -1145,7 +1127,7 @@ deparseStringLiteral(StringInfo buf, const char *val) } /* - * Deparse given expression into buf. + * Deparse given expression into context->buf. * * This function must support all the same node types that foreign_expr_walker * accepts. @@ -1155,7 +1137,7 @@ deparseStringLiteral(StringInfo buf, const char *val) * should be self-parenthesized. */ static void -deparseExpr(StringInfo buf, Expr *node, PlannerInfo *root) +deparseExpr(Expr *node, deparse_expr_cxt *context) { if (node == NULL) return; @@ -1163,40 +1145,40 @@ deparseExpr(StringInfo buf, Expr *node, PlannerInfo *root) switch (nodeTag(node)) { case T_Var: - deparseVar(buf, (Var *) node, root); + deparseVar((Var *) node, context); break; case T_Const: - deparseConst(buf, (Const *) node, root); + deparseConst((Const *) node, context); break; case T_Param: - deparseParam(buf, (Param *) node, root); + deparseParam((Param *) node, context); break; case T_ArrayRef: - deparseArrayRef(buf, (ArrayRef *) node, root); + deparseArrayRef((ArrayRef *) node, context); break; case T_FuncExpr: - deparseFuncExpr(buf, (FuncExpr *) node, root); + deparseFuncExpr((FuncExpr *) node, context); break; case T_OpExpr: - deparseOpExpr(buf, (OpExpr *) node, root); + deparseOpExpr((OpExpr *) node, context); break; case T_DistinctExpr: - deparseDistinctExpr(buf, (DistinctExpr *) node, root); + deparseDistinctExpr((DistinctExpr *) node, context); break; case T_ScalarArrayOpExpr: - deparseScalarArrayOpExpr(buf, (ScalarArrayOpExpr *) node, root); + deparseScalarArrayOpExpr((ScalarArrayOpExpr *) node, context); break; case T_RelabelType: - deparseRelabelType(buf, (RelabelType *) node, root); + deparseRelabelType((RelabelType *) node, context); break; case T_BoolExpr: - deparseBoolExpr(buf, (BoolExpr *) node, root); + deparseBoolExpr((BoolExpr *) node, context); break; case T_NullTest: - deparseNullTest(buf, (NullTest *) node, root); + deparseNullTest((NullTest *) node, context); break; case T_ArrayExpr: - deparseArrayExpr(buf, (ArrayExpr *) node, root); + deparseArrayExpr((ArrayExpr *) node, context); break; default: elog(ERROR, "unsupported expression type for deparse: %d", @@ -1206,23 +1188,69 @@ deparseExpr(StringInfo buf, Expr *node, PlannerInfo *root) } /* - * Deparse given Var node into buf. + * Deparse given Var node into context->buf. + * + * If the Var belongs to the foreign relation, just print its remote name. + * Otherwise, it's effectively a Param (and will in fact be a Param at + * run time). Handle it the same way we handle plain Params --- see + * deparseParam for comments. */ static void -deparseVar(StringInfo buf, Var *node, PlannerInfo *root) +deparseVar(Var *node, deparse_expr_cxt *context) { - Assert(node->varlevelsup == 0); - deparseColumnRef(buf, node->varno, node->varattno, root); + StringInfo buf = context->buf; + + if (node->varno == context->foreignrel->relid && + node->varlevelsup == 0) + { + /* Var belongs to foreign table */ + deparseColumnRef(buf, node->varno, node->varattno, context->root); + } + else + { + /* Treat like a Param */ + if (context->params_list) + { + int pindex = 0; + ListCell *lc; + + /* find its index in params_list */ + foreach(lc, *context->params_list) + { + pindex++; + if (equal(node, (Node *) lfirst(lc))) + break; + } + if (lc == NULL) + { + /* not in list, so add it */ + pindex++; + *context->params_list = lappend(*context->params_list, node); + } + + appendStringInfo(buf, "$%d", pindex); + appendStringInfo(buf, "::%s", + format_type_with_typemod(node->vartype, + node->vartypmod)); + } + else + { + appendStringInfo(buf, "(SELECT null::%s)", + format_type_with_typemod(node->vartype, + node->vartypmod)); + } + } } /* - * Deparse given constant value into buf. + * Deparse given constant value into context->buf. * * This function has to be kept in sync with ruleutils.c's get_const_expr. */ static void -deparseConst(StringInfo buf, Const *node, PlannerInfo *root) +deparseConst(Const *node, deparse_expr_cxt *context) { + StringInfo buf = context->buf; Oid typoutput; bool typIsVarlena; char *extval; @@ -1312,11 +1340,19 @@ deparseConst(StringInfo buf, Const *node, PlannerInfo *root) } /* - * Deparse given Param node into buf. + * Deparse given Param node. * - * We don't need to renumber the parameter ID, because the executor functions - * in postgres_fdw.c preserve the numbering of PARAM_EXTERN Params. - * (This might change soon.) + * If we're generating the query "for real", add the Param to + * context->params_list if it's not already present, and then use its index + * in that list as the remote parameter number. + * + * If we're just generating the query for EXPLAIN, replace the Param with + * a dummy expression "(SELECT null::)". In all extant versions of + * Postgres, the planner will see that as an unknown constant value, which is + * what we want. (If we sent a Param, recent versions might try to use the + * value supplied for the Param as an estimated or even constant value, which + * we don't want.) This might need adjustment if we ever make the planner + * flatten scalar subqueries. * * Note: we label the Param's type explicitly rather than relying on * transmitting a numeric type OID in PQexecParams(). This allows us to @@ -1324,21 +1360,49 @@ deparseConst(StringInfo buf, Const *node, PlannerInfo *root) * do locally --- they need only have the same names. */ static void -deparseParam(StringInfo buf, Param *node, PlannerInfo *root) +deparseParam(Param *node, deparse_expr_cxt *context) { - Assert(node->paramkind == PARAM_EXTERN); - appendStringInfo(buf, "$%d", node->paramid); - appendStringInfo(buf, "::%s", - format_type_with_typemod(node->paramtype, - node->paramtypmod)); + StringInfo buf = context->buf; + + if (context->params_list) + { + int pindex = 0; + ListCell *lc; + + /* find its index in params_list */ + foreach(lc, *context->params_list) + { + pindex++; + if (equal(node, (Node *) lfirst(lc))) + break; + } + if (lc == NULL) + { + /* not in list, so add it */ + pindex++; + *context->params_list = lappend(*context->params_list, node); + } + + appendStringInfo(buf, "$%d", pindex); + appendStringInfo(buf, "::%s", + format_type_with_typemod(node->paramtype, + node->paramtypmod)); + } + else + { + appendStringInfo(buf, "(SELECT null::%s)", + format_type_with_typemod(node->paramtype, + node->paramtypmod)); + } } /* * Deparse an array subscript expression. */ static void -deparseArrayRef(StringInfo buf, ArrayRef *node, PlannerInfo *root) +deparseArrayRef(ArrayRef *node, deparse_expr_cxt *context) { + StringInfo buf = context->buf; ListCell *lowlist_item; ListCell *uplist_item; @@ -1352,11 +1416,11 @@ deparseArrayRef(StringInfo buf, ArrayRef *node, PlannerInfo *root) * case of subscripting a Var, but otherwise do it. */ if (IsA(node->refexpr, Var)) - deparseExpr(buf, node->refexpr, root); + deparseExpr(node->refexpr, context); else { appendStringInfoChar(buf, '('); - deparseExpr(buf, node->refexpr, root); + deparseExpr(node->refexpr, context); appendStringInfoChar(buf, ')'); } @@ -1367,11 +1431,11 @@ deparseArrayRef(StringInfo buf, ArrayRef *node, PlannerInfo *root) appendStringInfoChar(buf, '['); if (lowlist_item) { - deparseExpr(buf, lfirst(lowlist_item), root); + deparseExpr(lfirst(lowlist_item), context); appendStringInfoChar(buf, ':'); lowlist_item = lnext(lowlist_item); } - deparseExpr(buf, lfirst(uplist_item), root); + deparseExpr(lfirst(uplist_item), context); appendStringInfoChar(buf, ']'); } @@ -1379,11 +1443,12 @@ deparseArrayRef(StringInfo buf, ArrayRef *node, PlannerInfo *root) } /* - * Deparse given node which represents a function call into buf. + * Deparse a function call. */ static void -deparseFuncExpr(StringInfo buf, FuncExpr *node, PlannerInfo *root) +deparseFuncExpr(FuncExpr *node, deparse_expr_cxt *context) { + StringInfo buf = context->buf; HeapTuple proctup; Form_pg_proc procform; const char *proname; @@ -1397,7 +1462,7 @@ deparseFuncExpr(StringInfo buf, FuncExpr *node, PlannerInfo *root) */ if (node->funcformat == COERCE_IMPLICIT_CAST) { - deparseExpr(buf, (Expr *) linitial(node->args), root); + deparseExpr((Expr *) linitial(node->args), context); return; } @@ -1413,7 +1478,7 @@ deparseFuncExpr(StringInfo buf, FuncExpr *node, PlannerInfo *root) /* Get the typmod if this is a length-coercion function */ (void) exprIsLengthCoercion((Node *) node, &coercedTypmod); - deparseExpr(buf, (Expr *) linitial(node->args), root); + deparseExpr((Expr *) linitial(node->args), context); appendStringInfo(buf, "::%s", format_type_with_typemod(rettype, coercedTypmod)); return; @@ -1458,7 +1523,7 @@ deparseFuncExpr(StringInfo buf, FuncExpr *node, PlannerInfo *root) appendStringInfoString(buf, ", "); if (use_variadic && lnext(arg) == NULL) appendStringInfoString(buf, "VARIADIC "); - deparseExpr(buf, (Expr *) lfirst(arg), root); + deparseExpr((Expr *) lfirst(arg), context); first = false; } appendStringInfoChar(buf, ')'); @@ -1467,12 +1532,13 @@ deparseFuncExpr(StringInfo buf, FuncExpr *node, PlannerInfo *root) } /* - * Deparse given operator expression into buf. To avoid problems around + * Deparse given operator expression. To avoid problems around * priority of operations, we always parenthesize the arguments. */ static void -deparseOpExpr(StringInfo buf, OpExpr *node, PlannerInfo *root) +deparseOpExpr(OpExpr *node, deparse_expr_cxt *context) { + StringInfo buf = context->buf; HeapTuple tuple; Form_pg_operator form; char oprkind; @@ -1497,7 +1563,7 @@ deparseOpExpr(StringInfo buf, OpExpr *node, PlannerInfo *root) if (oprkind == 'r' || oprkind == 'b') { arg = list_head(node->args); - deparseExpr(buf, lfirst(arg), root); + deparseExpr(lfirst(arg), context); appendStringInfoChar(buf, ' '); } @@ -1509,7 +1575,7 @@ deparseOpExpr(StringInfo buf, OpExpr *node, PlannerInfo *root) { arg = list_tail(node->args); appendStringInfoChar(buf, ' '); - deparseExpr(buf, lfirst(arg), root); + deparseExpr(lfirst(arg), context); } appendStringInfoChar(buf, ')'); @@ -1549,26 +1615,27 @@ deparseOperatorName(StringInfo buf, Form_pg_operator opform) * Deparse IS DISTINCT FROM. */ static void -deparseDistinctExpr(StringInfo buf, DistinctExpr *node, PlannerInfo *root) +deparseDistinctExpr(DistinctExpr *node, deparse_expr_cxt *context) { + StringInfo buf = context->buf; + Assert(list_length(node->args) == 2); appendStringInfoChar(buf, '('); - deparseExpr(buf, linitial(node->args), root); + deparseExpr(linitial(node->args), context); appendStringInfoString(buf, " IS DISTINCT FROM "); - deparseExpr(buf, lsecond(node->args), root); + deparseExpr(lsecond(node->args), context); appendStringInfoChar(buf, ')'); } /* - * Deparse given ScalarArrayOpExpr expression into buf. To avoid problems + * Deparse given ScalarArrayOpExpr expression. To avoid problems * around priority of operations, we always parenthesize the arguments. */ static void -deparseScalarArrayOpExpr(StringInfo buf, - ScalarArrayOpExpr *node, - PlannerInfo *root) +deparseScalarArrayOpExpr(ScalarArrayOpExpr *node, deparse_expr_cxt *context) { + StringInfo buf = context->buf; HeapTuple tuple; Form_pg_operator form; Expr *arg1; @@ -1588,7 +1655,7 @@ deparseScalarArrayOpExpr(StringInfo buf, /* Deparse left operand. */ arg1 = linitial(node->args); - deparseExpr(buf, arg1, root); + deparseExpr(arg1, context); appendStringInfoChar(buf, ' '); /* Deparse operator name plus decoration. */ @@ -1597,7 +1664,7 @@ deparseScalarArrayOpExpr(StringInfo buf, /* Deparse right operand. */ arg2 = lsecond(node->args); - deparseExpr(buf, arg2, root); + deparseExpr(arg2, context); appendStringInfoChar(buf, ')'); @@ -1611,11 +1678,11 @@ deparseScalarArrayOpExpr(StringInfo buf, * Deparse a RelabelType (binary-compatible cast) node. */ static void -deparseRelabelType(StringInfo buf, RelabelType *node, PlannerInfo *root) +deparseRelabelType(RelabelType *node, deparse_expr_cxt *context) { - deparseExpr(buf, node->arg, root); + deparseExpr(node->arg, context); if (node->relabelformat != COERCE_IMPLICIT_CAST) - appendStringInfo(buf, "::%s", + appendStringInfo(context->buf, "::%s", format_type_with_typemod(node->resulttype, node->resulttypmod)); } @@ -1627,8 +1694,9 @@ deparseRelabelType(StringInfo buf, RelabelType *node, PlannerInfo *root) * into N-argument form, so we'd better be prepared to deal with that. */ static void -deparseBoolExpr(StringInfo buf, BoolExpr *node, PlannerInfo *root) +deparseBoolExpr(BoolExpr *node, deparse_expr_cxt *context) { + StringInfo buf = context->buf; const char *op = NULL; /* keep compiler quiet */ bool first; ListCell *lc; @@ -1643,7 +1711,7 @@ deparseBoolExpr(StringInfo buf, BoolExpr *node, PlannerInfo *root) break; case NOT_EXPR: appendStringInfoString(buf, "(NOT "); - deparseExpr(buf, linitial(node->args), root); + deparseExpr(linitial(node->args), context); appendStringInfoChar(buf, ')'); return; } @@ -1654,7 +1722,7 @@ deparseBoolExpr(StringInfo buf, BoolExpr *node, PlannerInfo *root) { if (!first) appendStringInfo(buf, " %s ", op); - deparseExpr(buf, (Expr *) lfirst(lc), root); + deparseExpr((Expr *) lfirst(lc), context); first = false; } appendStringInfoChar(buf, ')'); @@ -1664,10 +1732,12 @@ deparseBoolExpr(StringInfo buf, BoolExpr *node, PlannerInfo *root) * Deparse IS [NOT] NULL expression. */ static void -deparseNullTest(StringInfo buf, NullTest *node, PlannerInfo *root) +deparseNullTest(NullTest *node, deparse_expr_cxt *context) { + StringInfo buf = context->buf; + appendStringInfoChar(buf, '('); - deparseExpr(buf, node->arg, root); + deparseExpr(node->arg, context); if (node->nulltesttype == IS_NULL) appendStringInfoString(buf, " IS NULL)"); else @@ -1678,8 +1748,9 @@ deparseNullTest(StringInfo buf, NullTest *node, PlannerInfo *root) * Deparse ARRAY[...] construct. */ static void -deparseArrayExpr(StringInfo buf, ArrayExpr *node, PlannerInfo *root) +deparseArrayExpr(ArrayExpr *node, deparse_expr_cxt *context) { + StringInfo buf = context->buf; bool first = true; ListCell *lc; @@ -1688,7 +1759,7 @@ deparseArrayExpr(StringInfo buf, ArrayExpr *node, PlannerInfo *root) { if (!first) appendStringInfoString(buf, ", "); - deparseExpr(buf, lfirst(lc), root); + deparseExpr(lfirst(lc), context); first = false; } appendStringInfoChar(buf, ']'); diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 706a37c685..3909ef8b58 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -446,6 +446,27 @@ EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c8 = 'foo'; -- can't Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" (4 rows) +-- parameterized remote path +EXPLAIN (VERBOSE, COSTS false) + SELECT * FROM ft2 a, ft2 b WHERE a.c1 = 47 AND b.c1 = a.c2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Nested Loop + Output: a.c1, a.c2, a.c3, a.c4, a.c5, a.c6, a.c7, a.c8, b.c1, b.c2, b.c3, b.c4, b.c5, b.c6, b.c7, b.c8 + -> Foreign Scan on public.ft2 a + Output: a.c1, a.c2, a.c3, a.c4, a.c5, a.c6, a.c7, a.c8 + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = 47)) + -> Foreign Scan on public.ft2 b + Output: b.c1, b.c2, b.c3, b.c4, b.c5, b.c6, b.c7, b.c8 + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (($1::integer = "C 1")) +(8 rows) + +SELECT * FROM ft2 a, ft2 b WHERE a.c1 = 47 AND b.c1 = a.c2; + c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 +----+----+-------+------------------------------+--------------------------+----+------------+-----+----+----+-------+------------------------------+--------------------------+----+------------+----- + 47 | 7 | 00047 | Tue Feb 17 00:00:00 1970 PST | Tue Feb 17 00:00:00 1970 | 7 | 7 | foo | 7 | 7 | 00007 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo +(1 row) + -- =================================================================== -- parameterized queries -- =================================================================== @@ -646,7 +667,7 @@ EXPLAIN (VERBOSE, COSTS false) EXECUTE st5('foo', 1); Foreign Scan on public.ft1 t1 Output: c1, c2, c3, c4, c5, c6, c7, c8 Filter: (t1.c8 = $1) - Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = $2::integer)) + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = $1::integer)) (4 rows) EXECUTE st5('foo', 1); diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 58dd12c575..982a8d9a61 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -23,10 +23,13 @@ #include "funcapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" +#include "optimizer/paths.h" #include "optimizer/planmain.h" #include "optimizer/prep.h" +#include "optimizer/restrictinfo.h" #include "optimizer/var.h" #include "parser/parsetree.h" #include "utils/builtins.h" @@ -49,28 +52,41 @@ PG_MODULE_MAGIC; */ typedef struct PgFdwRelationInfo { - /* XXX underdocumented, but a lot of this shouldn't be here anyway */ - StringInfoData sql; + /* baserestrictinfo clauses, broken down into safe and unsafe subsets. */ + List *remote_conds; + List *local_conds; + + /* Bitmap of attr numbers we need to fetch from the remote server. */ + Bitmapset *attrs_used; + + /* Cost and selectivity of local_conds. */ + QualCost local_conds_cost; + Selectivity local_conds_sel; + + /* Estimated size and cost for a scan with baserestrictinfo quals. */ + double rows; + int width; Cost startup_cost; Cost total_cost; - List *remote_conds; - List *param_conds; - List *local_conds; - List *param_numbers; + + /* Options extracted from catalogs. */ + bool use_remote_estimate; + Cost fdw_startup_cost; + Cost fdw_tuple_cost; /* Cached catalog information. */ ForeignTable *table; ForeignServer *server; + UserMapping *user; /* only set in use_remote_estimate mode */ } PgFdwRelationInfo; /* * Indexes of FDW-private information stored in fdw_private lists. * * We store various information in ForeignScan.fdw_private to pass it from - * planner to executor. Specifically there is: + * planner to executor. Currently there is just: * * 1) SELECT statement text to be sent to the remote server - * 2) IDs of PARAM_EXEC Params used in the SELECT statement * * These items are indexed with the enum FdwScanPrivateIndex, so an item * can be fetched with list_nth(). For example, to get the SELECT statement: @@ -79,9 +95,7 @@ typedef struct PgFdwRelationInfo enum FdwScanPrivateIndex { /* SQL statement to execute remotely (as a String node) */ - FdwScanPrivateSelectSql, - /* Integer list of param IDs of PARAM_EXEC Params used in SQL stmt */ - FdwScanPrivateExternParamIds + FdwScanPrivateSelectSql }; /* @@ -117,10 +131,10 @@ typedef struct PgFdwScanState PGconn *conn; /* connection for the scan */ unsigned int cursor_number; /* quasi-unique ID for my cursor */ bool cursor_exists; /* have we created the cursor? */ - bool extparams_done; /* have we converted PARAM_EXTERN params? */ int numParams; /* number of parameters passed to query */ - Oid *param_types; /* array of types of query parameters */ - const char **param_values; /* array of values of query parameters */ + FmgrInfo *param_flinfo; /* output conversion functions for them */ + List *param_exprs; /* executable expressions for param values */ + const char **param_values; /* textual values of query parameters */ /* for storing result tuples */ HeapTuple *tuples; /* array of currently-retrieved tuples */ @@ -194,6 +208,13 @@ typedef struct ConversionLocation AttrNumber cur_attno; /* attribute number being processed, or 0 */ } ConversionLocation; +/* Callback argument for ec_member_matches_foreign */ +typedef struct +{ + Expr *current; /* current expr, or NULL if not yet found */ + List *already_used; /* expressions already dealt with */ +} ec_member_foreign_arg; + /* * SQL functions */ @@ -260,12 +281,20 @@ static bool postgresAnalyzeForeignTable(Relation relation, /* * Helper functions */ +static void estimate_path_cost_size(PlannerInfo *root, + RelOptInfo *baserel, + List *join_conds, + double *p_rows, int *p_width, + Cost *p_startup_cost, Cost *p_total_cost); static void get_remote_estimate(const char *sql, PGconn *conn, double *rows, int *width, Cost *startup_cost, Cost *total_cost); +static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel, + EquivalenceClass *ec, EquivalenceMember *em, + void *arg); static void create_cursor(ForeignScanState *node); static void fetch_more_data(ForeignScanState *node); static void close_cursor(PGconn *conn, unsigned int cursor_number); @@ -330,38 +359,15 @@ postgres_fdw_handler(PG_FUNCTION_ARGS) * postgresGetForeignRelSize * Estimate # of rows and width of the result of the scan * - * Here we estimate number of rows returned by the scan in two steps. In the - * first step, we execute remote EXPLAIN command to obtain the number of rows - * returned from remote side. In the second step, we calculate the selectivity - * of the filtering done on local side, and modify first estimate. - * - * We have to get some catalog objects and generate remote query string here, - * so we store such expensive information in FDW private area of RelOptInfo and - * pass them to subsequent functions for reuse. + * We should consider the effect of all baserestrictinfo clauses here, but + * not any join clauses. */ static void postgresGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid) { - bool use_remote_estimate = false; PgFdwRelationInfo *fpinfo; - StringInfo sql; - ForeignTable *table; - ForeignServer *server; - Selectivity sel; - double rows; - int width; - Cost startup_cost; - Cost total_cost; - Cost run_cost; - QualCost qpqual_cost; - Cost cpu_per_tuple; - List *remote_conds; - List *param_conds; - List *local_conds; - List *param_numbers; - Bitmapset *attrs_used; ListCell *lc; /* @@ -369,127 +375,122 @@ postgresGetForeignRelSize(PlannerInfo *root, * functions. */ fpinfo = (PgFdwRelationInfo *) palloc0(sizeof(PgFdwRelationInfo)); - initStringInfo(&fpinfo->sql); - sql = &fpinfo->sql; + baserel->fdw_private = (void *) fpinfo; + + /* Look up foreign-table catalog info. */ + fpinfo->table = GetForeignTable(foreigntableid); + fpinfo->server = GetForeignServer(fpinfo->table->serverid); /* - * Determine whether we use remote estimate or not. Note that per-table - * setting overrides per-server setting. + * Extract user-settable option values. Note that per-table setting of + * use_remote_estimate overrides per-server setting. */ - table = GetForeignTable(foreigntableid); - server = GetForeignServer(table->serverid); - foreach(lc, server->options) + fpinfo->use_remote_estimate = false; + fpinfo->fdw_startup_cost = DEFAULT_FDW_STARTUP_COST; + fpinfo->fdw_tuple_cost = DEFAULT_FDW_TUPLE_COST; + + foreach(lc, fpinfo->server->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "use_remote_estimate") == 0) + fpinfo->use_remote_estimate = defGetBoolean(def); + else if (strcmp(def->defname, "fdw_startup_cost") == 0) + fpinfo->fdw_startup_cost = strtod(defGetString(def), NULL); + else if (strcmp(def->defname, "fdw_tuple_cost") == 0) + fpinfo->fdw_tuple_cost = strtod(defGetString(def), NULL); + } + foreach(lc, fpinfo->table->options) { DefElem *def = (DefElem *) lfirst(lc); if (strcmp(def->defname, "use_remote_estimate") == 0) { - use_remote_estimate = defGetBoolean(def); - break; - } - } - foreach(lc, table->options) - { - DefElem *def = (DefElem *) lfirst(lc); - - if (strcmp(def->defname, "use_remote_estimate") == 0) - { - use_remote_estimate = defGetBoolean(def); - break; + fpinfo->use_remote_estimate = defGetBoolean(def); + break; /* only need the one value */ } } /* - * Identify which restriction clauses can be sent to the remote server and - * which can't. Conditions that are remotely executable but contain - * PARAM_EXTERN Params have to be treated separately because we can't use - * placeholders in remote EXPLAIN. + * If the table or the server is configured to use remote estimates, + * identify which user to do remote access as during planning. This + * should match what ExecCheckRTEPerms() does. If we fail due to lack of + * permissions, the query would have failed at runtime anyway. */ - classifyConditions(root, baserel, &remote_conds, ¶m_conds, - &local_conds, ¶m_numbers); + if (fpinfo->use_remote_estimate) + { + RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root); + Oid userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); + + fpinfo->user = GetUserMapping(userid, fpinfo->server->serverid); + } + else + fpinfo->user = NULL; + + /* + * Identify which baserestrictinfo clauses can be sent to the remote + * server and which can't. + */ + classifyConditions(root, baserel, + &fpinfo->remote_conds, &fpinfo->local_conds); /* * Identify which attributes will need to be retrieved from the remote - * server. These include all attrs needed for joins or final output, plus - * all attrs used in the local_conds. + * server. These include all attrs needed for joins or final output, plus + * all attrs used in the local_conds. (Note: if we end up using a + * parameterized scan, it's possible that some of the join clauses will be + * sent to the remote and thus we wouldn't really need to retrieve the + * columns used in them. Doesn't seem worth detecting that case though.) */ - attrs_used = NULL; + fpinfo->attrs_used = NULL; pull_varattnos((Node *) baserel->reltargetlist, baserel->relid, - &attrs_used); - foreach(lc, local_conds) + &fpinfo->attrs_used); + foreach(lc, fpinfo->local_conds) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); pull_varattnos((Node *) rinfo->clause, baserel->relid, - &attrs_used); + &fpinfo->attrs_used); } /* - * Construct remote query which consists of SELECT, FROM, and WHERE - * clauses. For now, leave out the param_conds. + * Compute the selectivity and cost of the local_conds, so we don't have + * to do it over again for each path. The best we can do for these + * conditions is to estimate selectivity on the basis of local statistics. */ - deparseSelectSql(sql, root, baserel, attrs_used); - if (remote_conds) - appendWhereClause(sql, root, remote_conds, true); + fpinfo->local_conds_sel = clauselist_selectivity(root, + fpinfo->local_conds, + baserel->relid, + JOIN_INNER, + NULL); + + cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root); /* * If the table or the server is configured to use remote estimates, - * connect to the foreign server and execute EXPLAIN with the quals that - * don't contain any Param nodes. Otherwise, estimate rows using whatever - * statistics we have locally, in a way similar to ordinary tables. + * connect to the foreign server and execute EXPLAIN to estimate the + * number of rows selected by the restriction clauses, as well as the + * average row width. Otherwise, estimate using whatever statistics we + * have locally, in a way similar to ordinary tables. */ - if (use_remote_estimate) + if (fpinfo->use_remote_estimate) { - RangeTblEntry *rte; - Oid userid; - UserMapping *user; - PGconn *conn; - /* - * Identify which user to do the remote access as. This should match - * what ExecCheckRTEPerms() does. If we fail due to lack of - * permissions, the query would have failed at runtime anyway. + * Get cost/size estimates with help of remote server. Save the + * values in fpinfo so we don't need to do it again to generate the + * basic foreign path. */ - rte = planner_rt_fetch(baserel->relid, root); - userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); + estimate_path_cost_size(root, baserel, NIL, + &fpinfo->rows, &fpinfo->width, + &fpinfo->startup_cost, &fpinfo->total_cost); - user = GetUserMapping(userid, server->serverid); - conn = GetConnection(server, user, false); - get_remote_estimate(sql->data, conn, &rows, &width, - &startup_cost, &total_cost); - ReleaseConnection(conn); - - /* - * Estimate selectivity of conditions which were not used in remote - * EXPLAIN by calling clauselist_selectivity(). The best we can do - * for these conditions is to estimate selectivity on the basis of - * local statistics. - */ - sel = clauselist_selectivity(root, param_conds, - baserel->relid, JOIN_INNER, NULL); - sel *= clauselist_selectivity(root, local_conds, - baserel->relid, JOIN_INNER, NULL); - - /* - * Add in the eval cost of those conditions, too. - */ - cost_qual_eval(&qpqual_cost, param_conds, root); - startup_cost += qpqual_cost.startup; - total_cost += qpqual_cost.per_tuple * rows; - cost_qual_eval(&qpqual_cost, local_conds, root); - startup_cost += qpqual_cost.startup; - total_cost += qpqual_cost.per_tuple * rows; - - /* Report estimated numbers to planner. */ - baserel->rows = clamp_row_est(rows * sel); - baserel->width = width; + /* Report estimated baserel size to planner. */ + baserel->rows = fpinfo->rows; + baserel->width = fpinfo->width; } else { /* - * Estimate rows from the result of the last ANALYZE, using all - * conditions specified in original query. - * * If the foreign table has never been ANALYZEd, it will have relpages * and reltuples equal to zero, which most likely has nothing to do * with reality. We can't do a whole lot about that if we're not @@ -505,26 +506,281 @@ postgresGetForeignRelSize(PlannerInfo *root, (10 * BLCKSZ) / (baserel->width + sizeof(HeapTupleHeaderData)); } + /* Estimate baserel size as best we can with local statistics. */ set_baserel_size_estimates(root, baserel); - /* Cost as though this were a seqscan, which is pessimistic. */ - startup_cost = 0; - run_cost = 0; - run_cost += seq_page_cost * baserel->pages; + /* Fill in basically-bogus cost estimates for use later. */ + estimate_path_cost_size(root, baserel, NIL, + &fpinfo->rows, &fpinfo->width, + &fpinfo->startup_cost, &fpinfo->total_cost); + } +} - startup_cost += baserel->baserestrictcost.startup; - cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; - run_cost += cpu_per_tuple * baserel->tuples; +/* + * postgresGetForeignPaths + * Create possible scan paths for a scan on the foreign table + */ +static void +postgresGetForeignPaths(PlannerInfo *root, + RelOptInfo *baserel, + Oid foreigntableid) +{ + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; + ForeignPath *path; + Relids lateral_referencers; + List *join_quals; + Relids required_outer; + double rows; + int width; + Cost startup_cost; + Cost total_cost; + ListCell *lc; - total_cost = startup_cost + run_cost; + /* + * Create simplest ForeignScan path node and add it to baserel. This path + * corresponds to SeqScan path of regular tables (though depending on what + * baserestrict conditions we were able to send to remote, there might + * actually be an indexscan happening there). We already did all the work + * to estimate cost and size of this path. + */ + path = create_foreignscan_path(root, baserel, + fpinfo->rows, + fpinfo->startup_cost, + fpinfo->total_cost, + NIL, /* no pathkeys */ + NULL, /* no outer rel either */ + NIL); /* no fdw_private list */ + add_path(baserel, (Path *) path); + + /* + * If we're not using remote estimates, stop here. We have no way to + * estimate whether any join clauses would be worth sending across, so + * don't bother building parameterized paths. + */ + if (!fpinfo->use_remote_estimate) + return; + + /* + * As a crude first hack, we consider each available join clause and try + * to make a parameterized path using just that clause. Later we should + * consider combinations of clauses, probably. + */ + + /* + * If there are any rels that have LATERAL references to this one, we + * cannot use join quals referencing them as remote quals for this one, + * since such rels would have to be on the inside not the outside of a + * nestloop join relative to this one. Create a Relids set listing all + * such rels, for use in checks of potential join clauses. + */ + lateral_referencers = NULL; + foreach(lc, root->lateral_info_list) + { + LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc); + + if (bms_is_member(baserel->relid, ljinfo->lateral_lhs)) + lateral_referencers = bms_add_member(lateral_referencers, + ljinfo->lateral_rhs); + } + + /* Scan the rel's join clauses */ + foreach(lc, baserel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* Check if clause can be moved to this rel */ + if (!join_clause_is_movable_to(rinfo, baserel->relid)) + continue; + + /* Not useful if it conflicts with any LATERAL references */ + if (bms_overlap(rinfo->clause_relids, lateral_referencers)) + continue; + + /* See if it is safe to send to remote */ + if (!is_foreign_expr(root, baserel, rinfo->clause)) + continue; + + /* + * OK, get a cost estimate from the remote, and make a path. + */ + join_quals = list_make1(rinfo); + estimate_path_cost_size(root, baserel, join_quals, + &rows, &width, + &startup_cost, &total_cost); + + /* Must calculate required outer rels for this path */ + required_outer = bms_union(rinfo->clause_relids, + baserel->lateral_relids); + /* We do not want the foreign rel itself listed in required_outer */ + required_outer = bms_del_member(required_outer, baserel->relid); + /* Enforce convention that required_outer is exactly NULL if empty */ + if (bms_is_empty(required_outer)) + required_outer = NULL; + + path = create_foreignscan_path(root, baserel, + rows, + startup_cost, + total_cost, + NIL, /* no pathkeys */ + required_outer, + NIL); /* no fdw_private list */ + add_path(baserel, (Path *) path); } /* - * Finish deparsing remote query by adding conditions which were unusable - * in remote EXPLAIN because they contain Param nodes. + * The above scan examined only "generic" join clauses, not those that + * were absorbed into EquivalenceClauses. See if we can make anything out + * of EquivalenceClauses. */ - if (param_conds) - appendWhereClause(sql, root, param_conds, (remote_conds == NIL)); + if (baserel->has_eclass_joins) + { + /* + * We repeatedly scan the eclass list looking for column references + * (or expressions) belonging to the foreign rel. Each time we find + * one, we generate a list of equivalence joinclauses for it, and then + * try to make those into foreign paths. Repeat till there are no + * more candidate EC members. + */ + ec_member_foreign_arg arg; + + arg.already_used = NIL; + for (;;) + { + List *clauses; + + /* Make clauses, skipping any that join to lateral_referencers */ + arg.current = NULL; + clauses = generate_implied_equalities_for_column(root, + baserel, + ec_member_matches_foreign, + (void *) &arg, + lateral_referencers); + + /* Done if there are no more expressions in the foreign rel */ + if (arg.current == NULL) + { + Assert(clauses == NIL); + break; + } + + /* Scan the extracted join clauses */ + foreach(lc, clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* Check if clause can be moved to this rel */ + if (!join_clause_is_movable_to(rinfo, baserel->relid)) + continue; + + /* Shouldn't conflict with any LATERAL references */ + Assert(!bms_overlap(rinfo->clause_relids, lateral_referencers)); + + /* See if it is safe to send to remote */ + if (!is_foreign_expr(root, baserel, rinfo->clause)) + continue; + + /* + * OK, get a cost estimate from the remote, and make a path. + */ + join_quals = list_make1(rinfo); + estimate_path_cost_size(root, baserel, join_quals, + &rows, &width, + &startup_cost, &total_cost); + + /* Must calculate required outer rels for this path */ + required_outer = bms_union(rinfo->clause_relids, + baserel->lateral_relids); + required_outer = bms_del_member(required_outer, baserel->relid); + if (bms_is_empty(required_outer)) + required_outer = NULL; + + path = create_foreignscan_path(root, baserel, + rows, + startup_cost, + total_cost, + NIL, /* no pathkeys */ + required_outer, + NIL); /* no fdw_private */ + add_path(baserel, (Path *) path); + } + + /* Try again, now ignoring the expression we found this time */ + arg.already_used = lappend(arg.already_used, arg.current); + } + } +} + +/* + * postgresGetForeignPlan + * Create ForeignScan plan node which implements selected best path + */ +static ForeignScan * +postgresGetForeignPlan(PlannerInfo *root, + RelOptInfo *baserel, + Oid foreigntableid, + ForeignPath *best_path, + List *tlist, + List *scan_clauses) +{ + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; + Index scan_relid = baserel->relid; + List *fdw_private; + List *remote_conds = NIL; + List *local_exprs = NIL; + List *params_list = NIL; + StringInfoData sql; + ListCell *lc; + + /* + * Separate the scan_clauses into those that can be executed remotely and + * those that can't. baserestrictinfo clauses that were previously + * determined to be safe or unsafe by classifyClauses are shown in + * fpinfo->remote_conds and fpinfo->local_conds. Anything else in the + * scan_clauses list should be a join clause that was found safe by + * postgresGetForeignPaths. + * + * Note: for clauses extracted from EquivalenceClasses, it's possible that + * what we get here is a different representation of the clause than what + * postgresGetForeignPaths saw; for example we might get a commuted + * version of the clause. So we can't insist on simple equality as we do + * for the baserestrictinfo clauses. + * + * This code must match "extract_actual_clauses(scan_clauses, false)" + * except for the additional decision about remote versus local execution. + * Note however that we only strip the RestrictInfo nodes from the + * local_exprs list, since appendWhereClause expects a list of + * RestrictInfos. + */ + foreach(lc, scan_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + Assert(IsA(rinfo, RestrictInfo)); + + /* Ignore any pseudoconstants, they're dealt with elsewhere */ + if (rinfo->pseudoconstant) + continue; + + if (list_member_ptr(fpinfo->remote_conds, rinfo)) + remote_conds = lappend(remote_conds, rinfo); + else if (list_member_ptr(fpinfo->local_conds, rinfo)) + local_exprs = lappend(local_exprs, rinfo->clause); + else + { + Assert(is_foreign_expr(root, baserel, rinfo->clause)); + remote_conds = lappend(remote_conds, rinfo); + } + } + + /* + * Build the query string to be sent for execution, and identify + * expressions to be sent as parameters. + */ + initStringInfo(&sql); + deparseSelectSql(&sql, root, baserel, fpinfo->attrs_used); + if (remote_conds) + appendWhereClause(&sql, root, baserel, remote_conds, + true, ¶ms_list); /* * Add FOR UPDATE/SHARE if appropriate. We apply locking during the @@ -538,7 +794,7 @@ postgresGetForeignRelSize(PlannerInfo *root, root->parse->commandType == CMD_DELETE)) { /* Relation is UPDATE/DELETE target, so use FOR UPDATE */ - appendStringInfo(sql, " FOR UPDATE"); + appendStringInfo(&sql, " FOR UPDATE"); } else { @@ -559,177 +815,34 @@ postgresGetForeignRelSize(PlannerInfo *root, { case LCS_FORKEYSHARE: case LCS_FORSHARE: - appendStringInfo(sql, " FOR SHARE"); + appendStringInfo(&sql, " FOR SHARE"); break; case LCS_FORNOKEYUPDATE: case LCS_FORUPDATE: - appendStringInfo(sql, " FOR UPDATE"); + appendStringInfo(&sql, " FOR UPDATE"); break; } } } - /* - * Store obtained information into FDW-private area of RelOptInfo so it's - * available to subsequent functions. - */ - fpinfo->startup_cost = startup_cost; - fpinfo->total_cost = total_cost; - fpinfo->remote_conds = remote_conds; - fpinfo->param_conds = param_conds; - fpinfo->local_conds = local_conds; - fpinfo->param_numbers = param_numbers; - fpinfo->table = table; - fpinfo->server = server; - baserel->fdw_private = (void *) fpinfo; -} - -/* - * postgresGetForeignPaths - * Create possible scan paths for a scan on the foreign table - */ -static void -postgresGetForeignPaths(PlannerInfo *root, - RelOptInfo *baserel, - Oid foreigntableid) -{ - PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; - ForeignPath *path; - ListCell *lc; - double fdw_startup_cost = DEFAULT_FDW_STARTUP_COST; - double fdw_tuple_cost = DEFAULT_FDW_TUPLE_COST; - Cost startup_cost; - Cost total_cost; - List *fdw_private; - - /* - * Check for user override of fdw_startup_cost, fdw_tuple_cost values - */ - foreach(lc, fpinfo->server->options) - { - DefElem *d = (DefElem *) lfirst(lc); - - if (strcmp(d->defname, "fdw_startup_cost") == 0) - fdw_startup_cost = strtod(defGetString(d), NULL); - else if (strcmp(d->defname, "fdw_tuple_cost") == 0) - fdw_tuple_cost = strtod(defGetString(d), NULL); - } - - /* - * We have cost values which are estimated on remote side, so adjust them - * for better estimate which respect various stuffs to complete the scan, - * such as sending query, transferring result, and local filtering. - */ - startup_cost = fpinfo->startup_cost; - total_cost = fpinfo->total_cost; - - /*---------- - * Adjust costs with factors of the corresponding foreign server: - * - add cost to establish connection to both startup and total - * - add cost to manipulate on remote, and transfer result to total - * - add cost to manipulate tuples on local side to total - *---------- - */ - startup_cost += fdw_startup_cost; - total_cost += fdw_startup_cost; - total_cost += fdw_tuple_cost * baserel->rows; - total_cost += cpu_tuple_cost * baserel->rows; - /* * Build the fdw_private list that will be available to the executor. * Items in the list must match enum FdwScanPrivateIndex, above. */ - fdw_private = list_make2(makeString(fpinfo->sql.data), - fpinfo->param_numbers); - - /* - * Create simplest ForeignScan path node and add it to baserel. This path - * corresponds to SeqScan path of regular tables (though depending on what - * baserestrict conditions we were able to send to remote, there might - * actually be an indexscan happening there). - */ - path = create_foreignscan_path(root, baserel, - baserel->rows, - startup_cost, - total_cost, - NIL, /* no pathkeys */ - NULL, /* no outer rel either */ - fdw_private); - add_path(baserel, (Path *) path); - - /* - * XXX We can consider sorted path or parameterized path here if we know - * that foreign table is indexed on remote end. For this purpose, we - * might have to support FOREIGN INDEX to represent possible sets of sort - * keys and/or filtering. Or we could just try some join conditions and - * see if remote side estimates using them as markedly cheaper. Note that - * executor functions need work to support internal Params before we can - * try generating any parameterized paths, though. - */ -} - -/* - * postgresGetForeignPlan - * Create ForeignScan plan node which implements selected best path - */ -static ForeignScan * -postgresGetForeignPlan(PlannerInfo *root, - RelOptInfo *baserel, - Oid foreigntableid, - ForeignPath *best_path, - List *tlist, - List *scan_clauses) -{ - PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; - Index scan_relid = baserel->relid; - List *fdw_private = best_path->fdw_private; - List *remote_exprs = NIL; - List *local_exprs = NIL; - ListCell *lc; - - /* - * Separate the scan_clauses into those that can be executed remotely and - * those that can't. For now, we accept only remote clauses that were - * previously determined to be safe by classifyClauses (so, only - * baserestrictinfo clauses can be used that way). - * - * This code must match "extract_actual_clauses(scan_clauses, false)" - * except for the additional decision about remote versus local execution. - */ - foreach(lc, scan_clauses) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); - - Assert(IsA(rinfo, RestrictInfo)); - - /* Ignore any pseudoconstants, they're dealt with elsewhere */ - if (rinfo->pseudoconstant) - continue; - - /* Either simple or parameterized remote clauses are OK now */ - if (list_member_ptr(fpinfo->remote_conds, rinfo) || - list_member_ptr(fpinfo->param_conds, rinfo)) - remote_exprs = lappend(remote_exprs, rinfo->clause); - else - local_exprs = lappend(local_exprs, rinfo->clause); - } + fdw_private = list_make1(makeString(sql.data)); /* * Create the ForeignScan node from target list, local filtering - * expressions, remote filtering expressions, and FDW private information. + * expressions, remote parameter expressions, and FDW private information. * - * Note that the remote_exprs are stored in the fdw_exprs field of the - * finished plan node; we can't keep them in private state because then - * they wouldn't be subject to later planner processing. - * - * XXX Currently, the remote_exprs aren't actually used at runtime, so we - * don't need to store them at all. But we'll keep this behavior for a - * little while for debugging reasons. + * Note that the remote parameter expressions are stored in the fdw_exprs + * field of the finished plan node; we can't keep them in private state + * because then they wouldn't be subject to later planner processing. */ return make_foreignscan(tlist, local_exprs, scan_relid, - remote_exprs, + params_list, fdw_private); } @@ -748,9 +861,9 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) ForeignTable *table; ForeignServer *server; UserMapping *user; - List *param_numbers; int numParams; int i; + ListCell *lc; /* * Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL. @@ -805,40 +918,42 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) /* Get info we'll need for data conversion. */ fsstate->attinmeta = TupleDescGetAttInMetadata(RelationGetDescr(fsstate->rel)); - /* - * Allocate buffer for query parameters, if the remote conditions use any. - * - * We use a parameter slot for each PARAM_EXTERN parameter, even though - * not all of them may get sent to the remote server. This allows us to - * refer to Params by their original number rather than remapping, and it - * doesn't cost much. Slots that are not actually used get filled with - * null values that are arbitrarily marked as being of type int4. - */ - param_numbers = (List *) - list_nth(fsstate->fdw_private, FdwScanPrivateExternParamIds); - if (param_numbers != NIL) - { - ParamListInfo params = estate->es_param_list_info; - - numParams = params ? params->numParams : 0; - } - else - numParams = 0; + /* Prepare for output conversion of parameters used in remote query. */ + numParams = list_length(fsplan->fdw_exprs); fsstate->numParams = numParams; + fsstate->param_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo) * numParams); + + i = 0; + foreach(lc, fsplan->fdw_exprs) + { + Node *param_expr = (Node *) lfirst(lc); + Oid typefnoid; + bool isvarlena; + + getTypeOutputInfo(exprType(param_expr), &typefnoid, &isvarlena); + fmgr_info(typefnoid, &fsstate->param_flinfo[i]); + i++; + } + + /* + * Prepare remote-parameter expressions for evaluation. (Note: in + * practice, we expect that all these expressions will be just Params, so + * we could possibly do something more efficient than using the full + * expression-eval machinery for this. But probably there would be little + * benefit, and it'd require postgres_fdw to know more than is desirable + * about Param evaluation.) + */ + fsstate->param_exprs = (List *) + ExecInitExpr((Expr *) fsplan->fdw_exprs, + (PlanState *) node); + + /* + * Allocate buffer for text form of query parameters, if any. + */ if (numParams > 0) - { - /* we initially fill all slots with value = NULL, type = int4 */ - fsstate->param_types = (Oid *) palloc(numParams * sizeof(Oid)); fsstate->param_values = (const char **) palloc0(numParams * sizeof(char *)); - for (i = 0; i < numParams; i++) - fsstate->param_types[i] = INT4OID; - } else - { - fsstate->param_types = NULL; fsstate->param_values = NULL; - } - fsstate->extparams_done = false; } /* @@ -894,11 +1009,6 @@ postgresReScanForeignScan(ForeignScanState *node) char sql[64]; PGresult *res; - /* - * Note: we assume that PARAM_EXTERN params don't change over the life of - * the query, so no need to reset extparams_done. - */ - /* If we haven't created the cursor yet, nothing to do. */ if (!fsstate->cursor_exists) return; @@ -1497,8 +1607,128 @@ postgresExplainForeignModify(ModifyTableState *mtstate, } } + /* - * Estimate costs of executing given SQL statement. + * estimate_path_cost_size + * Get cost and size estimates for a foreign scan + * + * We assume that all the baserestrictinfo clauses will be applied, plus + * any join clauses listed in join_conds. + */ +static void +estimate_path_cost_size(PlannerInfo *root, + RelOptInfo *baserel, + List *join_conds, + double *p_rows, int *p_width, + Cost *p_startup_cost, Cost *p_total_cost) +{ + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) baserel->fdw_private; + double rows; + double retrieved_rows; + int width; + Cost startup_cost; + Cost total_cost; + Cost run_cost; + Cost cpu_per_tuple; + + /* + * If the table or the server is configured to use remote estimates, + * connect to the foreign server and execute EXPLAIN to estimate the + * number of rows selected by the restriction+join clauses. Otherwise, + * estimate rows using whatever statistics we have locally, in a way + * similar to ordinary tables. + */ + if (fpinfo->use_remote_estimate) + { + StringInfoData sql; + PGconn *conn; + + /* + * Construct EXPLAIN query including the desired SELECT, FROM, and + * WHERE clauses. Params and other-relation Vars are replaced by + * dummy values. + */ + initStringInfo(&sql); + appendStringInfoString(&sql, "EXPLAIN "); + deparseSelectSql(&sql, root, baserel, fpinfo->attrs_used); + if (fpinfo->remote_conds) + appendWhereClause(&sql, root, baserel, fpinfo->remote_conds, + true, NULL); + if (join_conds) + appendWhereClause(&sql, root, baserel, join_conds, + (fpinfo->remote_conds == NIL), NULL); + + /* Get the remote estimate */ + conn = GetConnection(fpinfo->server, fpinfo->user, false); + get_remote_estimate(sql.data, conn, &rows, &width, + &startup_cost, &total_cost); + ReleaseConnection(conn); + + retrieved_rows = rows; + + /* Factor in the selectivity of the local_conds */ + rows = clamp_row_est(rows * fpinfo->local_conds_sel); + + /* Add in the eval cost of the local_conds */ + startup_cost += fpinfo->local_conds_cost.startup; + total_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows; + } + else + { + /* + * We don't support join conditions in this mode (hence, no + * parameterized paths can be made). + */ + Assert(join_conds == NIL); + + /* Use rows/width estimates made by set_baserel_size_estimates. */ + rows = baserel->rows; + width = baserel->width; + + /* + * Back into an estimate of the number of retrieved rows. Just in + * case this is nuts, clamp to at most baserel->tuples. + */ + retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel); + retrieved_rows = Min(retrieved_rows, baserel->tuples); + + /* + * Cost as though this were a seqscan, which is pessimistic. We + * effectively imagine the local_conds are being evaluated remotely, + * too. + */ + startup_cost = 0; + run_cost = 0; + run_cost += seq_page_cost * baserel->pages; + + startup_cost += baserel->baserestrictcost.startup; + cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + + total_cost = startup_cost + run_cost; + } + + /* + * Add some additional cost factors to account for connection overhead + * (fdw_startup_cost), transferring data across the network + * (fdw_tuple_cost per retrieved row), and local manipulation of the data + * (cpu_tuple_cost per retrieved row). + */ + startup_cost += fpinfo->fdw_startup_cost; + total_cost += fpinfo->fdw_startup_cost; + total_cost += fpinfo->fdw_tuple_cost * retrieved_rows; + total_cost += cpu_tuple_cost * retrieved_rows; + + /* Return results. */ + *p_rows = rows; + *p_width = width; + *p_startup_cost = startup_cost; + *p_total_cost = total_cost; +} + +/* + * Estimate costs of executing a SQL statement remotely. + * The given "sql" must be an EXPLAIN command. */ static void get_remote_estimate(const char *sql, PGconn *conn, @@ -1510,19 +1740,16 @@ get_remote_estimate(const char *sql, PGconn *conn, /* PGresult must be released before leaving this function. */ PG_TRY(); { - StringInfoData buf; char *line; char *p; int n; /* - * Execute EXPLAIN remotely on given SQL statement. + * Execute EXPLAIN remotely. */ - initStringInfo(&buf); - appendStringInfo(&buf, "EXPLAIN %s", sql); - res = PQexec(conn, buf.data); + res = PQexec(conn, sql); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, false, buf.data); + pgfdw_report_error(ERROR, res, false, sql); /* * Extract cost numbers for topmost plan node. Note we search for a @@ -1550,6 +1777,37 @@ get_remote_estimate(const char *sql, PGconn *conn, PG_END_TRY(); } +/* + * Detect whether we want to process an EquivalenceClass member. + * + * This is a callback for use by generate_implied_equalities_for_column. + */ +static bool +ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel, + EquivalenceClass *ec, EquivalenceMember *em, + void *arg) +{ + ec_member_foreign_arg *state = (ec_member_foreign_arg *) arg; + Expr *expr = em->em_expr; + + /* + * If we've identified what we're processing in the current scan, we only + * want to match that expression. + */ + if (state->current != NULL) + return equal(expr, state->current); + + /* + * Otherwise, ignore anything we've already processed. + */ + if (list_member(state->already_used, expr)) + return false; + + /* This is the new target to process. */ + state->current = expr; + return true; +} + /* * Create cursor for node's query with current parameter values. */ @@ -1557,8 +1815,8 @@ static void create_cursor(ForeignScanState *node) { PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state; + ExprContext *econtext = node->ss.ps.ps_ExprContext; int numParams = fsstate->numParams; - Oid *types = fsstate->param_types; const char **values = fsstate->param_values; PGconn *conn = fsstate->conn; char *sql; @@ -1566,69 +1824,46 @@ create_cursor(ForeignScanState *node) PGresult *res; /* - * Construct array of external parameter values in text format. Since - * there might be random unconvertible stuff in the ParamExternData array, - * take care to convert only values we actually need. - * - * Note that we leak the memory for the value strings until end of query; - * this doesn't seem like a big problem, and in any case we might need to - * recreate the cursor after a rescan, so we could need to re-use the - * values anyway. + * Construct array of query parameter values in text format. We do the + * conversions in the short-lived per-tuple context, so as not to cause a + * memory leak over repeated scans. */ - if (numParams > 0 && !fsstate->extparams_done) + if (numParams > 0) { - ParamListInfo params = node->ss.ps.state->es_param_list_info; int nestlevel; - List *param_numbers; + MemoryContext oldcontext; + int i; ListCell *lc; + oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + nestlevel = set_transmission_modes(); - param_numbers = (List *) - list_nth(fsstate->fdw_private, FdwScanPrivateExternParamIds); - foreach(lc, param_numbers) + i = 0; + foreach(lc, fsstate->param_exprs) { - int paramno = lfirst_int(lc); - ParamExternData *prm = ¶ms->params[paramno - 1]; + ExprState *expr_state = (ExprState *) lfirst(lc); + Datum expr_value; + bool isNull; - /* give hook a chance in case parameter is dynamic */ - if (!OidIsValid(prm->ptype) && params->paramFetch != NULL) - params->paramFetch(params, paramno); - - /* - * Force the remote server to infer a type for this parameter. - * Since we explicitly cast every parameter (see deparse.c), the - * "inference" is trivial and will produce the desired result. - * This allows us to avoid assuming that the remote server has the - * same OIDs we do for the parameters' types. - * - * We'd not need to pass a type array to PQexecParams at all, - * except that there may be unused holes in the array, which will - * have to be filled with something or the remote server will - * complain. We arbitrarily set them to INT4OID earlier. - */ - types[paramno - 1] = InvalidOid; + /* Evaluate the parameter expression */ + expr_value = ExecEvalExpr(expr_state, econtext, &isNull, NULL); /* * Get string representation of each parameter value by invoking * type-specific output function, unless the value is null. */ - if (prm->isnull) - values[paramno - 1] = NULL; + if (isNull) + values[i] = NULL; else - { - Oid out_func; - bool isvarlena; - - getTypeOutputInfo(prm->ptype, &out_func, &isvarlena); - values[paramno - 1] = OidOutputFunctionCall(out_func, - prm->value); - } + values[i] = OutputFunctionCall(&fsstate->param_flinfo[i], + expr_value); + i++; } reset_transmission_modes(nestlevel); - fsstate->extparams_done = true; + MemoryContextSwitchTo(oldcontext); } /* Construct the DECLARE CURSOR command */ @@ -1638,10 +1873,16 @@ create_cursor(ForeignScanState *node) fsstate->cursor_number, sql); /* + * Notice that we pass NULL for paramTypes, thus forcing the remote server + * to infer types for all parameters. Since we explicitly cast every + * parameter (see deparse.c), the "inference" is trivial and will produce + * the desired result. This allows us to avoid assuming that the remote + * server has the same OIDs we do for the parameters' types. + * * We don't use a PG_TRY block here, so be careful not to throw error * without releasing the PGresult. */ - res = PQexecParams(conn, buf.data, numParams, types, values, + res = PQexecParams(conn, buf.data, numParams, NULL, values, NULL, NULL, 0); if (PQresultStatus(res) != PGRES_COMMAND_OK) pgfdw_report_error(ERROR, res, true, sql); diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index fc0e7b61fd..78a57ea057 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -42,17 +42,20 @@ extern int ExtractConnectionOptions(List *defelems, extern void classifyConditions(PlannerInfo *root, RelOptInfo *baserel, List **remote_conds, - List **param_conds, - List **local_conds, - List **param_numbers); + List **local_conds); +extern bool is_foreign_expr(PlannerInfo *root, + RelOptInfo *baserel, + Expr *expr); extern void deparseSelectSql(StringInfo buf, PlannerInfo *root, RelOptInfo *baserel, Bitmapset *attrs_used); extern void appendWhereClause(StringInfo buf, PlannerInfo *root, + RelOptInfo *baserel, List *exprs, - bool is_first); + bool is_first, + List **params); extern void deparseInsertSql(StringInfo buf, PlannerInfo *root, Index rtindex, Relation rel, List *targetAttrs, List *returningList); diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 6dc50e4a2a..1d5989e8dd 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -189,6 +189,10 @@ EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c1 = ANY(ARRAY[c2, 1, EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c1 = (ARRAY[c1,c2,3])[1]; -- ArrayRef EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c6 = E'foo''s\\bar'; -- check special chars EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 WHERE c8 = 'foo'; -- can't be sent to remote +-- parameterized remote path +EXPLAIN (VERBOSE, COSTS false) + SELECT * FROM ft2 a, ft2 b WHERE a.c1 = 47 AND b.c1 = a.c2; +SELECT * FROM ft2 a, ft2 b WHERE a.c1 = 47 AND b.c1 = a.c2; -- =================================================================== -- parameterized queries diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index e2df448db6..5c4ac066a5 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -512,7 +512,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, * be more than one EC that matches the expression; if so it's order-dependent * which one you get. This is annoying but it only happens in corner cases, * so for now we live with just reporting the first match. See also - * generate_implied_equalities_for_indexcol and match_pathkeys_to_index.) + * generate_implied_equalities_for_column and match_pathkeys_to_index.) * * If create_it is TRUE, we'll build a new EquivalenceClass when there is no * match. If create_it is FALSE, we just return NULL when no match. @@ -2013,15 +2013,21 @@ mutate_eclass_expressions(PlannerInfo *root, /* - * generate_implied_equalities_for_indexcol - * Create EC-derived joinclauses usable with a specific index column. + * generate_implied_equalities_for_column + * Create EC-derived joinclauses usable with a specific column. * - * We assume that any given index column could appear in only one EC. + * This is used by indxpath.c to extract potentially indexable joinclauses + * from ECs, and can be used by foreign data wrappers for similar purposes. + * We assume that only expressions in Vars of a single table are of interest, + * but the caller provides a callback function to identify exactly which + * such expressions it would like to know about. + * + * We assume that any given table/index column could appear in only one EC. * (This should be true in all but the most pathological cases, and if it * isn't, we stop on the first match anyway.) Therefore, what we return - * is a redundant list of clauses equating the index column to each of + * is a redundant list of clauses equating the table/index column to each of * the other-relation values it is known to be equal to. Any one of - * these clauses can be used to create a parameterized indexscan, and there + * these clauses can be used to create a parameterized path, and there * is no value in using more than one. (But it *is* worthwhile to create * a separate parameterized path for each one, since that leads to different * join orders.) @@ -2030,13 +2036,13 @@ mutate_eclass_expressions(PlannerInfo *root, * to, so as to save the work of creating useless clauses. */ List * -generate_implied_equalities_for_indexcol(PlannerInfo *root, - IndexOptInfo *index, - int indexcol, - Relids prohibited_rels) +generate_implied_equalities_for_column(PlannerInfo *root, + RelOptInfo *rel, + ec_matches_callback_type callback, + void *callback_arg, + Relids prohibited_rels) { List *result = NIL; - RelOptInfo *rel = index->rel; bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL); Index parent_relid; ListCell *lc1; @@ -2069,11 +2075,11 @@ generate_implied_equalities_for_indexcol(PlannerInfo *root, continue; /* - * Scan members, looking for a match to the indexable column. Note + * Scan members, looking for a match to the target column. Note * that child EC members are considered, but only when they belong to * the target relation. (Unlike regular members, the same expression * could be a child member of more than one EC. Therefore, it's - * potentially order-dependent which EC a child relation's index + * potentially order-dependent which EC a child relation's target * column gets matched to. This is annoying but it only happens in * corner cases, so for now we live with just reporting the first * match. See also get_eclass_for_sort_expr.) @@ -2083,8 +2089,7 @@ generate_implied_equalities_for_indexcol(PlannerInfo *root, { cur_em = (EquivalenceMember *) lfirst(lc2); if (bms_equal(cur_em->em_relids, rel->relids) && - eclass_member_matches_indexcol(cur_ec, cur_em, - index, indexcol)) + callback(root, rel, cur_ec, cur_em, callback_arg)) break; cur_em = NULL; } diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 8e13c1fddc..d74603983b 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -78,6 +78,13 @@ typedef struct Bitmapset *clauseids; /* quals+preds represented as a bitmapset */ } PathClauseUsage; +/* Callback argument for ec_member_matches_indexcol */ +typedef struct +{ + IndexOptInfo *index; /* index we're considering */ + int indexcol; /* index column we want to match to */ +} ec_member_matches_arg; + static void consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index, @@ -162,6 +169,9 @@ static void match_pathkeys_to_index(IndexOptInfo *index, List *pathkeys, List **clause_columns_p); static Expr *match_clause_to_ordering_op(IndexOptInfo *index, int indexcol, Expr *clause, Oid pk_opfamily); +static bool ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, + EquivalenceClass *ec, EquivalenceMember *em, + void *arg); static bool match_boolean_index_clause(Node *clause, int indexcol, IndexOptInfo *index); static bool match_special_index_operator(Expr *clause, @@ -645,7 +655,7 @@ get_join_index_paths(PlannerInfo *root, RelOptInfo *rel, /* * Add applicable eclass join clauses. The clauses generated for each - * column are redundant (cf generate_implied_equalities_for_indexcol), + * column are redundant (cf generate_implied_equalities_for_column), * so we need at most one. This is the only exception to the general * rule of using all available index clauses. */ @@ -1992,18 +2002,22 @@ match_eclass_clauses_to_index(PlannerInfo *root, IndexOptInfo *index, for (indexcol = 0; indexcol < index->ncolumns; indexcol++) { + ec_member_matches_arg arg; List *clauses; /* Generate clauses, skipping any that join to lateral_referencers */ - clauses = generate_implied_equalities_for_indexcol(root, - index, - indexcol, - lateral_referencers); + arg.index = index; + arg.indexcol = indexcol; + clauses = generate_implied_equalities_for_column(root, + index->rel, + ec_member_matches_indexcol, + (void *) &arg, + lateral_referencers); /* * We have to check whether the results actually do match the index, * since for non-btree indexes the EC's equality operators might not - * be in the index opclass (cf eclass_member_matches_indexcol). + * be in the index opclass (cf ec_member_matches_indexcol). */ match_clauses_to_index(index, clauses, clauseset); } @@ -2682,15 +2696,18 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel) ****************************************************************************/ /* - * eclass_member_matches_indexcol + * ec_member_matches_indexcol * Test whether an EquivalenceClass member matches an index column. * - * This is exported for use by generate_implied_equalities_for_indexcol. + * This is a callback for use by generate_implied_equalities_for_column. */ -bool -eclass_member_matches_indexcol(EquivalenceClass *ec, EquivalenceMember *em, - IndexOptInfo *index, int indexcol) +static bool +ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, + EquivalenceClass *ec, EquivalenceMember *em, + void *arg) { + IndexOptInfo *index = ((ec_member_matches_arg *) arg)->index; + int indexcol = ((ec_member_matches_arg *) arg)->indexcol; Oid curFamily = index->opfamily[indexcol]; Oid curCollation = index->indexcollations[indexcol]; @@ -2701,7 +2718,7 @@ eclass_member_matches_indexcol(EquivalenceClass *ec, EquivalenceMember *em, * whether clauses generated from the EC could be used with the index, so * don't check the opfamily. This might mean we return "true" for a * useless EC, so we have to recheck the results of - * generate_implied_equalities_for_indexcol; see + * generate_implied_equalities_for_column; see * match_eclass_clauses_to_index. */ if (index->relam == BTREE_AM_OID && diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 8091b08234..88ab4630fe 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -49,9 +49,6 @@ extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, List *exprlist, List *oprlist); -extern bool eclass_member_matches_indexcol(EquivalenceClass *ec, - EquivalenceMember *em, - IndexOptInfo *index, int indexcol); extern bool match_index_to_operand(Node *operand, int indexcol, IndexOptInfo *index); extern void expand_indexqual_conditions(IndexOptInfo *index, @@ -99,6 +96,12 @@ extern bool have_join_order_restriction(PlannerInfo *root, * equivclass.c * routines for managing EquivalenceClasses */ +typedef bool (*ec_matches_callback_type) (PlannerInfo *root, + RelOptInfo *rel, + EquivalenceClass *ec, + EquivalenceMember *em, + void *arg); + extern bool process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, bool below_outer_join); extern Expr *canonicalize_ec_expression(Expr *expr, @@ -126,10 +129,11 @@ extern void mutate_eclass_expressions(PlannerInfo *root, Node *(*mutator) (), void *context, bool include_child_exprs); -extern List *generate_implied_equalities_for_indexcol(PlannerInfo *root, - IndexOptInfo *index, - int indexcol, - Relids prohibited_rels); +extern List *generate_implied_equalities_for_column(PlannerInfo *root, + RelOptInfo *rel, + ec_matches_callback_type callback, + void *callback_arg, + Relids prohibited_rels); extern bool have_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2); extern bool has_relevant_eclass_joinclause(PlannerInfo *root,