From 332bec1e6096679b04ec9717beb44a858495260f Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 24 Apr 2017 22:50:07 -0400 Subject: [PATCH] postgres_fdw: Fix join push down with extensions Objects in an extension are shippable to a foreign server if the extension is part of the foreign server definition's shippable extensions list. But this was not properly considered in some cases when checking whether a join condition can be pushed to a foreign server and the join condition uses an object from a shippable extension. So the join would never be pushed down in those cases. So, the list of extensions needs to be made available in fpinfo of the relation being considered to be pushed down before any expressions are assessed for being shippable. Fix foreign_join_ok() to do that for a join relation. The code to save FDW options in fpinfo is scattered at multiple places. Bring all of that together into functions apply_server_options(), apply_table_options(), and merge_fdw_options(). David Rowley and Ashutosh Bapat, per report from David Rowley --- .../postgres_fdw/expected/postgres_fdw.out | 29 +++ contrib/postgres_fdw/postgres_fdw.c | 195 +++++++++++------- contrib/postgres_fdw/sql/postgres_fdw.sql | 8 + 3 files changed, 160 insertions(+), 72 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index b29549a28f..d1bc5b0660 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -1620,6 +1620,35 @@ SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) WHERE (t1.c1 | 21 (10 rows) +-- full outer join + WHERE clause with shippable extensions set +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t2.c2, t1.c3 FROM ft1 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE postgres_fdw_abs(t1.c1) > 0 OFFSET 10 LIMIT 10; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c2, t1.c3 + -> Foreign Scan + Output: t1.c1, t2.c2, t1.c3 + Relations: (public.ft1 t1) FULL JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r1.c3, r2.c2 FROM ("S 1"."T 1" r1 FULL JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) WHERE ((public.postgres_fdw_abs(r1."C 1") > 0)) +(6 rows) + +ALTER SERVER loopback OPTIONS (DROP extensions); +-- full outer join + WHERE clause with shippable extensions not set +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t2.c2, t1.c3 FROM ft1 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE postgres_fdw_abs(t1.c1) > 0 OFFSET 10 LIMIT 10; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: t1.c1, t2.c2, t1.c3 + -> Foreign Scan + Output: t1.c1, t2.c2, t1.c3 + Filter: (postgres_fdw_abs(t1.c1) > 0) + Relations: (public.ft1 t1) FULL JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r1.c3, r2.c2 FROM ("S 1"."T 1" r1 FULL JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) +(7 rows) + +ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw'); -- join two tables with FOR UPDATE clause -- tests whole-row reference for row marks EXPLAIN (VERBOSE, COSTS OFF) diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 8d022437be..18b4b01cfa 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -414,6 +414,11 @@ static void add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, static void add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel); +static void apply_server_options(PgFdwRelationInfo *fpinfo); +static void apply_table_options(PgFdwRelationInfo *fpinfo); +static void merge_fdw_options(PgFdwRelationInfo *fpinfo, + const PgFdwRelationInfo *fpinfo_o, + const PgFdwRelationInfo *fpinfo_i); /* @@ -513,31 +518,8 @@ postgresGetForeignRelSize(PlannerInfo *root, fpinfo->shippable_extensions = NIL; fpinfo->fetch_size = 100; - foreach(lc, fpinfo->server->options) - { - DefElem *def = (DefElem *) lfirst(lc); - - if (strcmp(def->defname, "use_remote_estimate") == 0) - fpinfo->use_remote_estimate = defGetBoolean(def); - else if (strcmp(def->defname, "fdw_startup_cost") == 0) - fpinfo->fdw_startup_cost = strtod(defGetString(def), NULL); - else if (strcmp(def->defname, "fdw_tuple_cost") == 0) - fpinfo->fdw_tuple_cost = strtod(defGetString(def), NULL); - else if (strcmp(def->defname, "extensions") == 0) - fpinfo->shippable_extensions = - ExtractExtensionList(defGetString(def), false); - else if (strcmp(def->defname, "fetch_size") == 0) - fpinfo->fetch_size = strtol(defGetString(def), NULL, 10); - } - foreach(lc, fpinfo->table->options) - { - DefElem *def = (DefElem *) lfirst(lc); - - if (strcmp(def->defname, "use_remote_estimate") == 0) - fpinfo->use_remote_estimate = defGetBoolean(def); - else if (strcmp(def->defname, "fetch_size") == 0) - fpinfo->fetch_size = strtol(defGetString(def), NULL, 10); - } + apply_server_options(fpinfo); + apply_table_options(fpinfo); /* * If the table or the server is configured to use remote estimates, @@ -4114,6 +4096,15 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, if (fpinfo_o->local_conds || fpinfo_i->local_conds) return false; + /* + * Merge FDW options. We might be tempted to do this after we have deemed + * the foreign join to be OK. But we must do this beforehand so that we + * know which quals can be evaluated on the foreign server, which might + * depend on shippable_extensions. + */ + fpinfo->server = fpinfo_o->server; + merge_fdw_options(fpinfo, fpinfo_o, fpinfo_i); + /* * Separate restrict list into join quals and pushed-down (other) quals. * @@ -4279,15 +4270,6 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, /* Mark that this join can be pushed down safely */ fpinfo->pushdown_safe = true; - /* - * If user is willing to estimate cost for a scan of either of the joining - * relations using EXPLAIN, he intends to estimate scans on that relation - * more accurately. Then, it makes sense to estimate the cost of the join - * with that relation more accurately using EXPLAIN. - */ - fpinfo->use_remote_estimate = fpinfo_o->use_remote_estimate || - fpinfo_i->use_remote_estimate; - /* Get user mapping */ if (fpinfo->use_remote_estimate) { @@ -4299,17 +4281,6 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, else fpinfo->user = NULL; - /* Get foreign server */ - fpinfo->server = fpinfo_o->server; - - /* - * Since both the joining relations come from the same server, the server - * level options should have same value for both the relations. Pick from - * any side. - */ - fpinfo->fdw_startup_cost = fpinfo_o->fdw_startup_cost; - fpinfo->fdw_tuple_cost = fpinfo_o->fdw_tuple_cost; - /* * Set cached relation costs to some negative value, so that we can detect * when they are set to some sensible costs, during one (usually the @@ -4318,15 +4289,6 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, fpinfo->rel_startup_cost = -1; fpinfo->rel_total_cost = -1; - /* - * Set fetch size to maximum of the joining sides, since we are expecting - * the rows returned by the join to be proportional to the relation sizes. - */ - if (fpinfo_o->fetch_size > fpinfo_i->fetch_size) - fpinfo->fetch_size = fpinfo_o->fetch_size; - else - fpinfo->fetch_size = fpinfo_i->fetch_size; - /* * Set the string describing this join relation to be used in EXPLAIN * output of corresponding ForeignScan. @@ -4384,6 +4346,110 @@ add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, } } +/* + * Parse options from foreign server and apply them to fpinfo. + * + * New options might also require tweaking merge_fdw_options(). + */ +static void +apply_server_options(PgFdwRelationInfo *fpinfo) +{ + ListCell *lc; + + foreach(lc, fpinfo->server->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "use_remote_estimate") == 0) + fpinfo->use_remote_estimate = defGetBoolean(def); + else if (strcmp(def->defname, "fdw_startup_cost") == 0) + fpinfo->fdw_startup_cost = strtod(defGetString(def), NULL); + else if (strcmp(def->defname, "fdw_tuple_cost") == 0) + fpinfo->fdw_tuple_cost = strtod(defGetString(def), NULL); + else if (strcmp(def->defname, "extensions") == 0) + fpinfo->shippable_extensions = + ExtractExtensionList(defGetString(def), false); + else if (strcmp(def->defname, "fetch_size") == 0) + fpinfo->fetch_size = strtol(defGetString(def), NULL, 10); + } +} + +/* + * Parse options from foreign table and apply them to fpinfo. + * + * New options might also require tweaking merge_fdw_options(). + */ +static void +apply_table_options(PgFdwRelationInfo *fpinfo) +{ + ListCell *lc; + + foreach(lc, fpinfo->table->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "use_remote_estimate") == 0) + fpinfo->use_remote_estimate = defGetBoolean(def); + else if (strcmp(def->defname, "fetch_size") == 0) + fpinfo->fetch_size = strtol(defGetString(def), NULL, 10); + } +} + +/* + * Merge FDW options from input relations into a new set of options for a join + * or an upper rel. + * + * For a join relation, FDW-specific information about the inner and outer + * relations is provided using fpinfo_i and fpinfo_o. For an upper relation, + * fpinfo_o provides the information for the input relation; fpinfo_i is + * expected to NULL. + */ +static void +merge_fdw_options(PgFdwRelationInfo *fpinfo, + const PgFdwRelationInfo *fpinfo_o, + const PgFdwRelationInfo *fpinfo_i) +{ + /* We must always have fpinfo_o. */ + Assert(fpinfo_o); + + /* fpinfo_i may be NULL, but if present the servers must both match. */ + Assert(!fpinfo_i || + fpinfo_i->server->serverid == fpinfo_o->server->serverid); + + /* + * Copy the server specific FDW options. (For a join, both relations come + * from the same server, so the server options should have the same value + * for both relations.) + */ + fpinfo->fdw_startup_cost = fpinfo_o->fdw_startup_cost; + fpinfo->fdw_tuple_cost = fpinfo_o->fdw_tuple_cost; + fpinfo->shippable_extensions = fpinfo_o->shippable_extensions; + fpinfo->use_remote_estimate = fpinfo_o->use_remote_estimate; + fpinfo->fetch_size = fpinfo_o->fetch_size; + + /* Merge the table level options from either side of the join. */ + if (fpinfo_i) + { + /* + * We'll prefer to use remote estimates for this join if any table + * from either side of the join is using remote estimates. This is + * most likely going to be preferred since they're already willing to + * pay the price of a round trip to get the remote EXPLAIN. In any + * case it's not entirely clear how we might otherwise handle this + * best. + */ + fpinfo->use_remote_estimate = fpinfo_o->use_remote_estimate || + fpinfo_i->use_remote_estimate; + + /* + * Set fetch size to maximum of the joining sides, since we are + * expecting the rows returned by the join to be proportional to the + * relation sizes. + */ + fpinfo->fetch_size = Max(fpinfo_o->fetch_size, fpinfo_i->fetch_size); + } +} + /* * postgresGetForeignJoinPaths * Add possible ForeignPath to joinrel, if join is safe to push down. @@ -4714,18 +4780,6 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel) /* Safe to pushdown */ fpinfo->pushdown_safe = true; - /* - * If user is willing to estimate cost for a scan using EXPLAIN, he - * intends to estimate scans on that relation more accurately. Then, it - * makes sense to estimate the cost of the grouping on that relation more - * accurately using EXPLAIN. - */ - fpinfo->use_remote_estimate = ofpinfo->use_remote_estimate; - - /* Copy startup and tuple cost as is from underneath input rel's fpinfo */ - fpinfo->fdw_startup_cost = ofpinfo->fdw_startup_cost; - fpinfo->fdw_tuple_cost = ofpinfo->fdw_tuple_cost; - /* * Set cached relation costs to some negative value, so that we can detect * when they are set to some sensible costs, during one (usually the @@ -4734,9 +4788,6 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel) fpinfo->rel_startup_cost = -1; fpinfo->rel_total_cost = -1; - /* Set fetch size same as that of underneath input rel's fpinfo */ - fpinfo->fetch_size = ofpinfo->fetch_size; - /* * Set the string describing this grouped relation to be used in EXPLAIN * output of corresponding ForeignScan. @@ -4812,13 +4863,13 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, fpinfo->outerrel = input_rel; /* - * Copy foreign table, foreign server, user mapping, shippable extensions - * etc. details from the input relation's fpinfo. + * Copy foreign table, foreign server, user mapping, FDW options etc. + * details from the input relation's fpinfo. */ fpinfo->table = ifpinfo->table; fpinfo->server = ifpinfo->server; fpinfo->user = ifpinfo->user; - fpinfo->shippable_extensions = ifpinfo->shippable_extensions; + merge_fdw_options(fpinfo, ifpinfo , NULL); /* Assess if it is safe to push down aggregation and grouping. */ if (!foreign_grouping_ok(root, grouped_rel)) diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 423eb024bb..509bb547c6 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -447,6 +447,14 @@ SELECT t1.c1, t2.c2, t3.c3 FROM ft2 t1 LEFT JOIN ft2 t2 ON (t1.c1 = t2.c1) RIGHT EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) WHERE (t1.c1 = t2.c1 OR t1.c1 IS NULL) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) WHERE (t1.c1 = t2.c1 OR t1.c1 IS NULL) ORDER BY t1.c1, t2.c1 OFFSET 10 LIMIT 10; +-- full outer join + WHERE clause with shippable extensions set +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t2.c2, t1.c3 FROM ft1 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE postgres_fdw_abs(t1.c1) > 0 OFFSET 10 LIMIT 10; +ALTER SERVER loopback OPTIONS (DROP extensions); +-- full outer join + WHERE clause with shippable extensions not set +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, t2.c2, t1.c3 FROM ft1 t1 FULL JOIN ft2 t2 ON (t1.c1 = t2.c1) WHERE postgres_fdw_abs(t1.c1) > 0 OFFSET 10 LIMIT 10; +ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw'); -- join two tables with FOR UPDATE clause -- tests whole-row reference for row marks EXPLAIN (VERBOSE, COSTS OFF)