Improve the planner's simplification of NOT constructs.

This patch merges the responsibility for NOT-flattening into eval_const_expressions' processing. It wasn't done that way originally because prepqual.c is far older than eval_const_expressions. But putting this work into eval_const_expressions saves one pass over the qual trees, and in fact saves even more than that because we can exploit the knowledge that the subexpressions have already been recursively simplified. Doing it this way also lets us do it uniformly over all expressions, whereas prepqual.c formerly just did it at top level to save cycles. That should improve the planner's ability to recognize logically-equivalent constructs. While at it, also add the ability to fold a NOT into BooleanTest and NullTest constructs (the latter only for the scalar-datatype case). Per discussion of bug #5702.
2010-10-10 23:19:50 -04:00 · 2010-10-10 23:19:50 -04:00 · 220e45bf32
parent b48b9cb3a4
commit 220e45bf32
3 changed files with 236 additions and 180 deletions
--- a/src/backend/optimizer/prep/prepqual.c
+++ b/src/backend/optimizer/prep/prepqual.c
@ -32,6 +32,7 @@

 #include "postgres.h"

+#include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/prep.h"
 #include "utils/lsyscache.h"
@ -39,12 +40,226 @@

 static List *pull_ands(List *andlist);
 static List *pull_ors(List *orlist);
-static Expr *find_nots(Expr *qual);
-static Expr *push_nots(Expr *qual);
 static Expr *find_duplicate_ors(Expr *qual);
 static Expr *process_duplicate_ors(List *orlist);


+/*
+ * negate_clause
+ *	  Negate a Boolean expression.
+ *
+ * Input is a clause to be negated (e.g., the argument of a NOT clause).
+ * Returns a new clause equivalent to the negation of the given clause.
+ *
+ * Although this can be invoked on its own, it's mainly intended as a helper
+ * for eval_const_expressions(), and that context drives several design
+ * decisions.  In particular, if the input is already AND/OR flat, we must
+ * preserve that property.  We also don't bother to recurse in situations
+ * where we can assume that lower-level executions of eval_const_expressions
+ * would already have simplified sub-clauses of the input.
+ *
+ * The difference between this and a simple make_notclause() is that this
+ * tries to get rid of the NOT node by logical simplification.  It's clearly
+ * always a win if the NOT node can be eliminated altogether.  However, our
+ * use of DeMorgan's laws could result in having more NOT nodes rather than
+ * fewer.  We do that unconditionally anyway, because in WHERE clauses it's
+ * important to expose as much top-level AND/OR structure as possible.
+ * Also, eliminating an intermediate NOT may allow us to flatten two levels
+ * of AND or OR together that we couldn't have otherwise.  Finally, one of
+ * the motivations for doing this is to ensure that logically equivalent
+ * expressions will be seen as physically equal(), so we should always apply
+ * the same transformations.
+ */
+Node *
+negate_clause(Node *node)
+{
+	if (node == NULL)			/* should not happen */
+		elog(ERROR, "can't negate an empty subexpression");
+	switch (nodeTag(node))
+	{
+		case T_Const:
+			{
+				Const	   *c = (Const *) node;
+
+				/* NOT NULL is still NULL */
+				if (c->constisnull)
+					return makeBoolConst(false, true);
+				/* otherwise pretty easy */
+				return makeBoolConst(!DatumGetBool(c->constvalue), false);
+			}
+			break;
+		case T_OpExpr:
+			{
+				/*
+				 * Negate operator if possible: (NOT (< A B)) => (>= A B)
+				 */
+				OpExpr	   *opexpr = (OpExpr *) node;
+				Oid			negator = get_negator(opexpr->opno);
+
+				if (negator)
+				{
+					OpExpr	   *newopexpr = makeNode(OpExpr);
+
+					newopexpr->opno = negator;
+					newopexpr->opfuncid = InvalidOid;
+					newopexpr->opresulttype = opexpr->opresulttype;
+					newopexpr->opretset = opexpr->opretset;
+					newopexpr->args = opexpr->args;
+					newopexpr->location = opexpr->location;
+					return (Node *) newopexpr;
+				}
+			}
+			break;
+		case T_ScalarArrayOpExpr:
+			{
+				/*
+				 * Negate a ScalarArrayOpExpr if its operator has a negator;
+				 * for example x = ANY (list) becomes x <> ALL (list)
+				 */
+				ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
+				Oid			negator = get_negator(saopexpr->opno);
+
+				if (negator)
+				{
+					ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
+
+					newopexpr->opno = negator;
+					newopexpr->opfuncid = InvalidOid;
+					newopexpr->useOr = !saopexpr->useOr;
+					newopexpr->args = saopexpr->args;
+					newopexpr->location = saopexpr->location;
+					return (Node *) newopexpr;
+				}
+			}
+			break;
+		case T_BoolExpr:
+			{
+				BoolExpr   *expr = (BoolExpr *) node;
+
+				switch (expr->boolop)
+				{
+					/*--------------------
+					 * Apply DeMorgan's Laws:
+					 *		(NOT (AND A B)) => (OR (NOT A) (NOT B))
+					 *		(NOT (OR A B))	=> (AND (NOT A) (NOT B))
+					 * i.e., swap AND for OR and negate each subclause.
+					 *
+					 * If the input is already AND/OR flat and has no NOT
+					 * directly above AND or OR, this transformation preserves
+					 * those properties.  For example, if no direct child of
+					 * the given AND clause is an AND or a NOT-above-OR, then
+					 * the recursive calls of negate_clause() can't return any
+					 * OR clauses.  So we needn't call pull_ors() before
+					 * building a new OR clause.  Similarly for the OR case.
+					 *--------------------
+					 */
+					case AND_EXPR:
+						{
+							List	   *nargs = NIL;
+							ListCell   *lc;
+
+							foreach(lc, expr->args)
+							{
+								nargs = lappend(nargs,
+												negate_clause(lfirst(lc)));
+							}
+							return (Node *) make_orclause(nargs);
+						}
+						break;
+					case OR_EXPR:
+						{
+							List	   *nargs = NIL;
+							ListCell   *lc;
+
+							foreach(lc, expr->args)
+							{
+								nargs = lappend(nargs,
+												negate_clause(lfirst(lc)));
+							}
+							return (Node *) make_andclause(nargs);
+						}
+						break;
+					case NOT_EXPR:
+						/*
+						 * NOT underneath NOT: they cancel.  We assume the
+						 * input is already simplified, so no need to recurse.
+						 */
+						return (Node *) linitial(expr->args);
+					default:
+						elog(ERROR, "unrecognized boolop: %d",
+							 (int) expr->boolop);
+						break;
+				}
+			}
+			break;
+		case T_NullTest:
+			{
+				NullTest   *expr = (NullTest *) node;
+
+				/*
+				 * In the rowtype case, the two flavors of NullTest are *not*
+				 * logical inverses, so we can't simplify.  But it does work
+				 * for scalar datatypes.
+				 */
+				if (!expr->argisrow)
+				{
+					NullTest   *newexpr = makeNode(NullTest);
+
+					newexpr->arg = expr->arg;
+					newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
+											 IS_NOT_NULL : IS_NULL);
+					newexpr->argisrow = expr->argisrow;
+					return (Node *) newexpr;
+				}
+			}
+			break;
+		case T_BooleanTest:
+			{
+				BooleanTest   *expr = (BooleanTest *) node;
+				BooleanTest   *newexpr = makeNode(BooleanTest);
+
+				newexpr->arg = expr->arg;
+				switch (expr->booltesttype)
+				{
+					case IS_TRUE:
+						newexpr->booltesttype = IS_NOT_TRUE;
+						break;
+					case IS_NOT_TRUE:
+						newexpr->booltesttype = IS_TRUE;
+						break;
+					case IS_FALSE:
+						newexpr->booltesttype = IS_NOT_FALSE;
+						break;
+					case IS_NOT_FALSE:
+						newexpr->booltesttype = IS_FALSE;
+						break;
+					case IS_UNKNOWN:
+						newexpr->booltesttype = IS_NOT_UNKNOWN;
+						break;
+					case IS_NOT_UNKNOWN:
+						newexpr->booltesttype = IS_UNKNOWN;
+						break;
+					default:
+						elog(ERROR, "unrecognized booltesttype: %d",
+							 (int) expr->booltesttype);
+						break;
+				}
+				return (Node *) newexpr;
+			}
+			break;
+		default:
+			/* else fall through */
+			break;
+	}
+
+	/*
+	 * Otherwise we don't know how to simplify this, so just tack on an
+	 * explicit NOT node.
+	 */
+	return (Node *) make_notclause((Expr *) node);
+}
+
+
 /*
 * canonicalize_qual
 *	  Convert a qualification expression to the most useful form.
@ -72,18 +287,11 @@ canonicalize_qual(Expr *qual)
 		return NULL;

 	/*
-	 * Push down NOTs.	We do this only in the top-level boolean expression,
-	 * without examining arguments of operators/functions. The main reason for
-	 * doing this is to expose as much top-level AND/OR structure as we can,
-	 * so there's no point in descending further.
+	 * Pull up redundant subclauses in OR-of-AND trees.  We do this only
+	 * within the top-level AND/OR structure; there's no point in looking
+	 * deeper.
 	 */
-	newqual = find_nots(qual);
-
-	/*
-	 * Pull up redundant subclauses in OR-of-AND trees.  Again, we do this
-	 * only within the top-level AND/OR structure.
-	 */
-	newqual = find_duplicate_ors(newqual);
+	newqual = find_duplicate_ors(qual);

 	return newqual;
 }
@ -154,147 +362,6 @@ pull_ors(List *orlist)
 }


-/*
- * find_nots
- *	  Traverse the qualification, looking for NOTs to take care of.
- *	  For NOT clauses, apply push_nots() to try to push down the NOT.
- *	  For AND and OR clause types, simply recurse.	Otherwise stop
- *	  recursing (we do not worry about structure below the top AND/OR tree).
- *
- * Returns the modified qualification.	AND/OR flatness is preserved.
- */
-static Expr *
-find_nots(Expr *qual)
-{
-	if (and_clause((Node *) qual))
-	{
-		List	   *t_list = NIL;
-		ListCell   *temp;
-
-		foreach(temp, ((BoolExpr *) qual)->args)
-			t_list = lappend(t_list, find_nots(lfirst(temp)));
-		return make_andclause(pull_ands(t_list));
-	}
-	else if (or_clause((Node *) qual))
-	{
-		List	   *t_list = NIL;
-		ListCell   *temp;
-
-		foreach(temp, ((BoolExpr *) qual)->args)
-			t_list = lappend(t_list, find_nots(lfirst(temp)));
-		return make_orclause(pull_ors(t_list));
-	}
-	else if (not_clause((Node *) qual))
-		return push_nots(get_notclausearg(qual));
-	else
-		return qual;
-}
-
-/*
- * push_nots
- *	  Push down a NOT as far as possible.
- *
- * Input is an expression to be negated (e.g., the argument of a NOT clause).
- * Returns a new qual equivalent to the negation of the given qual.
- */
-static Expr *
-push_nots(Expr *qual)
-{
-	if (is_opclause(qual))
-	{
-		/*
-		 * Negate an operator clause if possible: (NOT (< A B)) => (>= A B)
-		 * Otherwise, retain the clause as it is (the NOT can't be pushed down
-		 * any farther).
-		 */
-		OpExpr	   *opexpr = (OpExpr *) qual;
-		Oid			negator = get_negator(opexpr->opno);
-
-		if (negator)
-		{
-			OpExpr	   *newopexpr = makeNode(OpExpr);
-
-			newopexpr->opno = negator;
-			newopexpr->opfuncid = InvalidOid;
-			newopexpr->opresulttype = opexpr->opresulttype;
-			newopexpr->opretset = opexpr->opretset;
-			newopexpr->args = opexpr->args;
-			newopexpr->location = opexpr->location;
-			return (Expr *) newopexpr;
-		}
-		else
-			return make_notclause(qual);
-	}
-	else if (qual && IsA(qual, ScalarArrayOpExpr))
-	{
-		/*
-		 * Negate a ScalarArrayOpExpr if there is a negator for its operator;
-		 * for example x = ANY (list) becomes x <> ALL (list). Otherwise,
-		 * retain the clause as it is (the NOT can't be pushed down any
-		 * farther).
-		 */
-		ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) qual;
-		Oid			negator = get_negator(saopexpr->opno);
-
-		if (negator)
-		{
-			ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
-
-			newopexpr->opno = negator;
-			newopexpr->opfuncid = InvalidOid;
-			newopexpr->useOr = !saopexpr->useOr;
-			newopexpr->args = saopexpr->args;
-			newopexpr->location = saopexpr->location;
-			return (Expr *) newopexpr;
-		}
-		else
-			return make_notclause(qual);
-	}
-	else if (and_clause((Node *) qual))
-	{
-		/*--------------------
-		 * Apply DeMorgan's Laws:
-		 *		(NOT (AND A B)) => (OR (NOT A) (NOT B))
-		 *		(NOT (OR A B))	=> (AND (NOT A) (NOT B))
-		 * i.e., swap AND for OR and negate all the subclauses.
-		 *--------------------
-		 */
-		List	   *t_list = NIL;
-		ListCell   *temp;
-
-		foreach(temp, ((BoolExpr *) qual)->args)
-			t_list = lappend(t_list, push_nots(lfirst(temp)));
-		return make_orclause(pull_ors(t_list));
-	}
-	else if (or_clause((Node *) qual))
-	{
-		List	   *t_list = NIL;
-		ListCell   *temp;
-
-		foreach(temp, ((BoolExpr *) qual)->args)
-			t_list = lappend(t_list, push_nots(lfirst(temp)));
-		return make_andclause(pull_ands(t_list));
-	}
-	else if (not_clause((Node *) qual))
-	{
-		/*
-		 * Another NOT cancels this NOT, so eliminate the NOT and stop
-		 * negating this branch.  But search the subexpression for more NOTs
-		 * to simplify.
-		 */
-		return find_nots(get_notclausearg(qual));
-	}
-	else
-	{
-		/*
-		 * We don't know how to negate anything else, place a NOT at this
-		 * level.  No point in recursing deeper, either.
-		 */
-		return make_notclause(qual);
-	}
-}
-
-
 /*--------------------
 * The following code attempts to apply the inverse OR distributive law:
 *		((A AND B) OR (A AND C))  =>  (A AND (B OR C))
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@ -98,7 +98,7 @@ static List *simplify_or_arguments(List *args,
 static List *simplify_and_arguments(List *args,
 					   eval_const_expressions_context *context,
 					   bool *haveNull, bool *forceFalse);
-static Expr *simplify_boolean_equality(Oid opno, List *args);
+static Node *simplify_boolean_equality(Oid opno, List *args);
 static Expr *simplify_function(Oid funcid,
 				  Oid result_type, int32 result_typmod, List **args,
 				  bool has_named_args,
@ -2229,7 +2229,7 @@ eval_const_expressions_mutator(Node *node,
 		if (expr->opno == BooleanEqualOperator ||
 			expr->opno == BooleanNotEqualOperator)
 		{
-			simple = simplify_boolean_equality(expr->opno, args);
+			simple = (Expr *) simplify_boolean_equality(expr->opno, args);
 			if (simple)			/* successfully simplified it */
 				return (Node *) simple;
 		}
@ -2395,24 +2395,12 @@ eval_const_expressions_mutator(Node *node,
 					Assert(list_length(expr->args) == 1);
 					arg = eval_const_expressions_mutator(linitial(expr->args),
 														 context);
-					if (IsA(arg, Const))
-					{
-						Const	   *const_input = (Const *) arg;

-						/* NOT NULL => NULL */
-						if (const_input->constisnull)
-							return makeBoolConst(false, true);
-						/* otherwise pretty easy */
-						return makeBoolConst(!DatumGetBool(const_input->constvalue),
-											 false);
-					}
-					else if (not_clause(arg))
-					{
-						/* Cancel NOT/NOT */
-						return (Node *) get_notclausearg((Expr *) arg);
-					}
-					/* Else we still need a NOT node */
-					return (Node *) make_notclause((Expr *) arg);
+					/*
+					 * Use negate_clause() to see if we can simplify away
+					 * the NOT.
+					 */
+					return negate_clause(arg);
 				}
 			default:
 				elog(ERROR, "unrecognized boolop: %d",
@ -3222,11 +3210,11 @@ simplify_and_arguments(List *args,
 * We come here only if simplify_function has failed; therefore we cannot
 * see two constant inputs, nor a constant-NULL input.
 */
-static Expr *
+static Node *
 simplify_boolean_equality(Oid opno, List *args)
 {
-	Expr	   *leftop;
-	Expr	   *rightop;
+	Node	   *leftop;
+	Node	   *rightop;

 	Assert(list_length(args) == 2);
 	leftop = linitial(args);
@ -3239,12 +3227,12 @@ simplify_boolean_equality(Oid opno, List *args)
 			if (DatumGetBool(((Const *) leftop)->constvalue))
 				return rightop; /* true = foo */
 			else
-				return make_notclause(rightop); /* false = foo */
+				return negate_clause(rightop); /* false = foo */
 		}
 		else
 		{
 			if (DatumGetBool(((Const *) leftop)->constvalue))
-				return make_notclause(rightop); /* true <> foo */
+				return negate_clause(rightop); /* true <> foo */
 			else
 				return rightop; /* false <> foo */
 		}
@ -3257,12 +3245,12 @@ simplify_boolean_equality(Oid opno, List *args)
 			if (DatumGetBool(((Const *) rightop)->constvalue))
 				return leftop;	/* foo = true */
 			else
-				return make_notclause(leftop);	/* foo = false */
+				return negate_clause(leftop);	/* foo = false */
 		}
 		else
 		{
 			if (DatumGetBool(((Const *) rightop)->constvalue))
-				return make_notclause(leftop);	/* foo <> true */
+				return negate_clause(leftop);	/* foo <> true */
 			else
 				return leftop;	/* foo <> false */
 		}
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@ -33,6 +33,7 @@ extern Relids get_relids_for_join(PlannerInfo *root, int joinrelid);
 /*
 * prototypes for prepqual.c
 */
+extern Node *negate_clause(Node *node);
 extern Expr *canonicalize_qual(Expr *qual);

 /*