greenplumn cdbsubselect 源码

2022-08-18
浏览 (363)

greenplumn cdbsubselect 代码

文件路径：/src/backend/cdb/cdbsubselect.c

/*-------------------------------------------------------------------------
 *
 * cdbsubselect.c
 *	  Flattens subqueries, transforms them to joins.
 *
 * Portions Copyright (c) 2005-2008, Greenplum inc
 * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates.
 *
 *
 * IDENTIFICATION
 *	    src/backend/cdb/cdbsubselect.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/htup_details.h"
#include "access/skey.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/optimizer.h"
#include "optimizer/subselect.h"	/* convert_testexpr() */
#include "optimizer/tlist.h"
#include "optimizer/prep.h"		/* canonicalize_qual() */
#include "parser/parse_oper.h"	/* make_op() */
#include "parser/parse_expr.h"
#include "parser/parse_relation.h"	/* addRangeTableEntryForSubquery() */
#include "parser/parsetree.h"	/* rt_fetch() */
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"	/* get_op_btree_interpretation() */
#include "utils/syscache.h"
#include "cdb/cdbsubselect.h"	/* me */
#include "lib/stringinfo.h"
#include "cdb/cdbpullup.h"

static int	add_expr_subquery_rte(Query *parse, Query *subselect);

static JoinExpr *make_join_expr(Node *larg, int r_rtindex, int join_type);

static Node *make_lasj_quals(PlannerInfo *root, SubLink *sublink, int subquery_indx);

static Node *add_null_match_clause(Node *clause);

typedef struct NonNullableVarsContext
{
	Query	   *query;              /* Query in question. */
	List       *varsToCheck;        /* Vars to check when walking to RTE */
	List	   *nonNullableVars;	/* Known non-nullable vars */
} NonNullableVarsContext;

typedef struct FindAllVarsContext
{
	List       *rtable;
	List       *vars;
} FindAllVarsContext;


/**
 * Walker that performs the following tasks:
 * - It checks if a given expr is "safe" to be pulled up to be a join
 * - Extracts out the vars from the outer query in the qual in order
 * - Extracts out the vars from the inner query in the qual in order
 */
typedef struct ConvertSubqueryToJoinContext
{
	bool		safeToConvert;	/* Can correlated expression subquery be
								 * pulled up? */
	Node	   *joinQual;		/* Qual to employ to join subquery */
	Node	   *innerQual;		/* Qual to leave behind in subquery */
	List	   *targetList;		/* targetlist for subquery */
	List	   *groupClause;	/* grouping clause for subquery */
} ConvertSubqueryToJoinContext;

static void ProcessSubqueryToJoin(Query *subselect, ConvertSubqueryToJoinContext *context);
static void ProcessSubqueryToJoin_walker(Node *jtree, ConvertSubqueryToJoinContext *context);
static void SubqueryToJoinWalker(Node *node, ConvertSubqueryToJoinContext *context);
static void RemoveInnerJoinQuals(Query *subselect);
static void RemoveInnerJoinQuals_walker(Node *jtree);

static bool cdb_find_nonnullable_vars_walker(Node *node, NonNullableVarsContext *context);
static bool is_attribute_nonnullable(Oid relationOid, AttrNumber attrNumber);
static List *fetch_targetlist_exprs(List *targetlist);
static List *fetch_outer_exprs(Node *testexpr);
static bool  is_exprs_nullable(Node *exprs, Query *query);
static bool  is_exprs_nullable_internal(Node *exprs, List *nonnullable_vars, List *rtable);
static List *cdb_find_all_vars(Node *exprs, List *rtable);
static bool  cdb_find_all_vars_walker(Node *node, FindAllVarsContext *context);
static Var *cdb_map_to_base_var(Var *var, List *rtable);

#define DUMMY_COLUMN_NAME "zero"

/**
 * Initialize context.
 */
static void
InitConvertSubqueryToJoinContext(ConvertSubqueryToJoinContext *ctx)
{
	Assert(ctx);
	ctx->safeToConvert = true;
	ctx->joinQual = NULL;
	ctx->innerQual = NULL;
	ctx->groupClause = NIL;
	ctx->targetList = NIL;
}

/**
 * Process correlated opexpr of the form foo(outer.var) OP bar(inner.var). Extracts
 * bar(inner.var) as innerExpr.
 * Returns true, if this is not a compatible correlated opexpr.
 */
static bool
IsCorrelatedOpExpr(OpExpr *opexp, Expr **innerExpr)
{
	Expr	   *e1;
	Expr	   *e2;

	if (list_length(opexp->args) != 2)
		return false;

	e1 = (Expr *) list_nth(opexp->args, 0);
	e2 = (Expr *) list_nth(opexp->args, 1);

	/*
	 * One of the vars must be outer, and other must be inner.
	 */
	if (contain_vars_of_level((Node *) e1, 1) &&
			!contain_vars_of_level((Node *) e1, 0) &&
			contain_vars_of_level((Node *) e2, 0) &&
			!contain_vars_of_level((Node *) e2, 1))
	{
		*innerExpr = (Expr *) copyObject(e2);

		return true;
	}

	if (contain_vars_of_level((Node *) e1, 0) &&
			!contain_vars_of_level((Node *) e1, 1) &&
			contain_vars_of_level((Node *) e2, 1) &&
			!contain_vars_of_level((Node *) e2, 0))
	{
		*innerExpr = (Expr *) copyObject(e1);

		return true;
	}

	return false;
}

/**
 * Checks if an opexpression is of the form (foo(outervar) = bar(innervar))
 * Input:
 *	opexp - op expression
 * Output:
 *	returns true if correlated equality condition
 *	*innerExpr - points to the inner expr i.e. bar(innervar) in the condition
 *	*eqOp and *sortOp - equality and < operators, to implement the condition as a mergejoin.
 */
static bool
IsCorrelatedEqualityOpExpr(OpExpr *opexp, Expr **innerExpr, Oid *eqOp, Oid *sortOp, bool *hashable)
{
	Oid			opfamily;
	Oid			ltype;
	Oid			rtype;
	List	   *l;

	Assert(opexp);
	Assert(list_length(opexp->args) > 1);
	Assert(innerExpr);
	Assert(eqOp);
	Assert(sortOp);

	/*
	 * If this is an expression of the form a = b, then we want to know about
	 * the vars involved.
	 */
	if (!op_mergejoinable(opexp->opno, exprType(linitial(opexp->args))))
		return false;

	/*
	 * Arbitrarily use the first operator family containing the operator that
	 * we can find.
	 */
	l = get_mergejoin_opfamilies(opexp->opno);
	if (l == NIL)
		return false;

	opfamily = linitial_oid(l);
	list_free(l);

	/*
	 * Look up the correct sort operator from the chosen opfamily.
	 */
	ltype = exprType(linitial(opexp->args));
	rtype = exprType(lsecond(opexp->args));
	*eqOp = get_opfamily_member(opfamily, ltype, rtype, BTEqualStrategyNumber);
	if (!OidIsValid(*eqOp))	/* should not happen */
		elog(ERROR, "could not find member %d(%u,%u) of opfamily %u",
			 BTEqualStrategyNumber, ltype, rtype, opfamily);

	*sortOp = get_opfamily_member(opfamily, ltype, rtype, BTLessStrategyNumber);
	if (!OidIsValid(*sortOp))	/* should not happen */
		elog(ERROR, "could not find member %d(%u,%u) of opfamily %u",
			 BTLessStrategyNumber, ltype, rtype, opfamily);

	*hashable = op_hashjoinable(*eqOp, ltype);

	if (!IsCorrelatedOpExpr(opexp, innerExpr))
		return false;

	return true;
}

/**
 * Process subquery to extract useful information to be able to convert it to
 * a join.
 *
 * This scans the join tree, and verifies that it consists entirely of inner
 * joins. The inner joins can be represented as explicit JOIN_INNER JoinExprs*
 * or as FromExprs. All the join quals are collected in context->innerQual.
 *
 * context->safeToConvert must be 'true' on entry. This sets it to false if
 * there are any non-inner joins in the tree.
 */
static void
ProcessSubqueryToJoin(Query *subselect, ConvertSubqueryToJoinContext *context)
{
	Assert(context);
	Assert(context->safeToConvert);
	Assert(subselect);

	ProcessSubqueryToJoin_walker((Node *) subselect->jointree, context);
}

static void
ProcessSubqueryToJoin_walker(Node *jtree, ConvertSubqueryToJoinContext *context)
{
	if (IsA(jtree, JoinExpr))
	{
		JoinExpr   *je = (JoinExpr *) jtree;

		/*
		 * If subselect's join tree is not a plain relation or an inner join,
		 * we refuse to convert.
		 */
		if (je->jointype != JOIN_INNER)
		{
			context->safeToConvert = false;
			return;
		}

		ProcessSubqueryToJoin_walker(je->larg, context);
		if (!context->safeToConvert)
			return;
		ProcessSubqueryToJoin_walker(je->rarg, context);
		if (!context->safeToConvert)
			return;

		SubqueryToJoinWalker(je->quals, context);
	}
	else if (IsA(jtree, FromExpr))
	{
		FromExpr   *fe = (FromExpr *) jtree;
		ListCell   *lc;

		foreach(lc, fe->fromlist)
		{
			ProcessSubqueryToJoin_walker(lfirst(lc), context);
			if (!context->safeToConvert)
				return;
		}

		SubqueryToJoinWalker(fe->quals, context);
	}
	else if (IsA(jtree, RangeTblRef))
	{
		/* nothing to do */
	}
	else
	{
		elog(ERROR, "unexpected node of type %d in join tree", jtree->type);
	}
}

/**
 * Wipe out join quals i.e. top-level where clause and any quals in the top-level inner join.
 */
static void
RemoveInnerJoinQuals(Query *subselect)
{
	RemoveInnerJoinQuals_walker((Node *) subselect->jointree);
}

static void
RemoveInnerJoinQuals_walker(Node *jtree)
{
	if (IsA(jtree, JoinExpr))
	{
		JoinExpr   *je = (JoinExpr *) jtree;

		/*
		 * We already checked in ProcessSubqueryToJoin() that there
		 * are no outer joins, but doesn't hurt to check again.
		 */
		if (je->jointype != JOIN_INNER)
		{
			elog(ERROR, "unexpected join type encountered while converting subquery to join");
		}

		RemoveInnerJoinQuals_walker(je->larg);
		RemoveInnerJoinQuals_walker(je->rarg);

		je->quals = NULL;
	}
	else if (IsA(jtree, FromExpr))
	{
		FromExpr   *fe = (FromExpr *) jtree;
		ListCell   *lc;

		foreach(lc, fe->fromlist)
		{
			RemoveInnerJoinQuals_walker(lfirst(lc));
		}

		fe->quals = NULL;
	}
	else if (IsA(jtree, RangeTblRef))
	{
		/* nothing to do */
	}
	else
	{
		elog(ERROR, "unexpected node of type %d in join tree", jtree->type);
	}
}

/**
 * This method recursively walks down the quals of an expression subquery to see if it can be pulled up to a join
 * and constructs the pieces necessary to perform the pullup.
 * E.g. SELECT * FROM outer o WHERE o.a < (SELECT avg(i.x) FROM inner i WHERE o.b = i.y)
 * This extracts interesting pieces of the subquery so as to create SELECT i.y, avg(i.x) from inner i GROUP by i.y
 */
static void
SubqueryToJoinWalker(Node *node, ConvertSubqueryToJoinContext *context)
{
	Assert(context);
	Assert(context->safeToConvert);

	if (node == NULL)
	{
		return;
	}

	if (IsA(node, BoolExpr))
	{

		/**
		 * Be extremely conservative. If there are any outer vars under an or or a not expression, then give up.
		 */
		if (is_notclause(node)
			|| is_orclause(node))
		{
			if (contain_vars_of_level_or_above(node, 1))
			{
				context->safeToConvert = false;
				return;
			}
			context->innerQual = make_and_qual(context->innerQual, node);
			return;
		}

		Assert(is_andclause(node));

		BoolExpr   *bexp = (BoolExpr *) node;
		ListCell   *lc = NULL;

		foreach(lc, bexp->args)
		{
			Node	   *arg = (Node *) lfirst(lc);

			/**
			 * If there is an outer var anywhere in the boolean expression, walk recursively.
			 */
			if (contain_vars_of_level_or_above(arg, 1))
			{
				SubqueryToJoinWalker(arg, context);

				if (!context->safeToConvert)
				{
					return;
				}
			}
			else
			{
				/**
				 * This qual should be part of the subquery's inner qual.
				 */
				context->innerQual = make_and_qual(context->innerQual, arg);
			}
		}
		return;
	}

	/**
	 * If this is a correlated opexpression, we'd need to look inside.
	 */
	else if (contain_vars_of_level_or_above(node, 1) && IsA(node, OpExpr))
	{
		OpExpr	   *opexp = (OpExpr *) node;

		/**
		 * If this is an expression of the form foo(outervar) = bar(innervar), then we want to know about the inner expression.
		 */
		Oid			eqOp = InvalidOid;
		Oid			sortOp = InvalidOid;
		bool		hashable = false;
		Expr	   *innerExpr = NULL;
		bool		considerOpExpr = false;

		considerOpExpr = IsCorrelatedEqualityOpExpr(opexp, &innerExpr, &eqOp, &sortOp, &hashable);

		if (considerOpExpr)
		{
			TargetEntry *tle;
			SortGroupClause *gc;

			tle = makeTargetEntry(innerExpr,
											   list_length(context->targetList) + 1,
											   NULL,
											   false);
			tle->ressortgroupref = list_length(context->targetList) + 1;
			context->targetList = lappend(context->targetList, tle);

			gc = makeNode(SortGroupClause);
			gc->tleSortGroupRef = list_length(context->groupClause) + 1;
			gc->eqop = eqOp;
			gc->sortop = sortOp;
			gc->hashable = hashable;
			context->groupClause = lappend(context->groupClause, gc);

			Assert(list_length(context->groupClause) == list_length(context->targetList));

			context->joinQual = make_and_qual(context->joinQual, (Node *) opexp);

			return;
		}

		/**
		 * Correlated join expression contains incompatible operators. Not safe to convert.
		 */
		context->safeToConvert = false;
	}
	else if (contain_vars_of_level_or_above(node, 1))
	{
		/*
		 * This is a correlated expression, but we don't know how to deal with
		 * it. Give up.
		 */
		context->safeToConvert = false;
	}

	return;
}



/**
 * Safe to convert expr sublink to a join
 */
static bool
safe_to_convert_EXPR(SubLink *sublink, ConvertSubqueryToJoinContext *ctx1)
{
	Assert(ctx1);

	Query	   *subselect = (Query *) sublink->subselect;

	if (subselect->jointree->fromlist == NULL)
		return false;

	if (expression_returns_set((Node *) subselect->targetList))
		return false;

	/* No set operations in the subquery */
	if (subselect->setOperations)
		return false;

	/**
	 * If there are no correlations in the WHERE clause, then don't bother.
	 */
	if (!IsSubqueryCorrelated(subselect))
		return false;

	/**
	 * If deeply correlated, don't bother.
	 */
	if (IsSubqueryMultiLevelCorrelated(subselect))
		return false;

	/**
	 * If there are correlations in a func expr in the from clause, then don't bother.
	 */
	if (has_correlation_in_funcexpr_rte(subselect->rtable))
	{
		return false;
	}

	/**
	 * If there is a having qual, then don't bother.
	 */
	if (subselect->havingQual != NULL)
		return false;

	/**
	 * If it does not have aggs, then don't bother. This could result in a run-time error.
	 */
	if (!subselect->hasAggs)
		return false;

	/**
	 * A LIMIT or OFFSET could interfere with the transformation of the
	 * correlated qual to GROUP BY. (LIMIT >0 in a subquery that contains a
	 * plain aggregate is actually a no-op, so we could try to remove it,
	 * but it doesn't seem worth the trouble to optimize queries with
	 * pointless limits like that.)
	 */
	if (subselect->limitOffset || subselect->limitCount)
		return false;

	/**
	 * Cannot support grouping clause in subselect.
	 */
	if (subselect->groupClause)
		return false;

	/**
	 * If targetlist of the subquery does not contain exactly one element, don't bother.
	 */
	if (list_length(subselect->targetList) != 1)
		return false;


	/**
	 * Walk the quals of the subquery to do a more fine grained check as to whether this subquery
	 * may be pulled up. Identify useful fragments to construct join condition if possible to pullup.
	 */
	ProcessSubqueryToJoin(subselect, ctx1);

	/**
	 * There should be no outer vars in innerQual
	 */
	Assert(!contain_vars_of_level_or_above(ctx1->innerQual, 1));

	return ctx1->safeToConvert;
}


/**
 * convert_EXPR_to_join
 *
 * Method attempts to convert an EXPR_SUBLINK of the form select * from T where a > (select 10*avg(x) from R where T.b=R.y)
 */
JoinExpr *
convert_EXPR_to_join(PlannerInfo *root, OpExpr *opexp)
{
	Assert(root);
	Assert(list_length(opexp->args) == 2);
	Node	   *rarg = list_nth(opexp->args, 1);

	Assert(IsA(rarg, SubLink));
	SubLink    *sublink = (SubLink *) rarg;

	ConvertSubqueryToJoinContext ctx1;

	InitConvertSubqueryToJoinContext(&ctx1);

	if (safe_to_convert_EXPR(sublink, &ctx1))
	{
		Query	   *subselect = (Query *) copyObject(sublink->subselect);

		Assert(IsA(subselect, Query));

		/**
		 * Original subselect must have a single output column (e.g. 10*avg(x) )
		 */
		Assert(list_length(subselect->targetList) == 1);

		/**
		 * To pull up the subquery, we need to construct a new "Query" object that has grouping
		 * columns extracted from the correlated join predicate and the extra column from the subquery's
		 * targetlist.
		 */
		TargetEntry *origSubqueryTLE = (TargetEntry *) list_nth(subselect->targetList, 0);

		List	   *subselectTargetList = (List *) copyObject(ctx1.targetList);

		subselectTargetList = add_to_flat_tlist(subselectTargetList, list_make1(origSubqueryTLE->expr));
		subselect->targetList = subselectTargetList;
		subselect->groupClause = ctx1.groupClause;

		RemoveInnerJoinQuals(subselect);

		subselect->jointree->quals = ctx1.innerQual;

		/**
		 * Construct a new range table entry for the new pulled up subquery.
		 */
		int			rteIndex = add_expr_subquery_rte(root->parse, subselect);

		Assert(rteIndex > 0);

		/**
		 * Construct the join expression involving the new pulled up subselect.
		 */
		JoinExpr   *join_expr = make_join_expr(NULL, rteIndex, JOIN_INNER);
		Node	   *joinQual = ctx1.joinQual;

		/**
		 * Make outer ones regular and regular ones correspond to rteIndex
		 */
		joinQual = (Node *) cdbpullup_expr((Expr *) joinQual, subselect->targetList, NULL, rteIndex);
		IncrementVarSublevelsUp(joinQual, -1, 1);

		join_expr->quals = joinQual;

		TargetEntry *subselectAggTLE = (TargetEntry *) list_nth(subselect->targetList, list_length(subselect->targetList) - 1);

		/**
		 *	modify the op expr to involve the column that has the computed aggregate that needs to compared.
		 */
		Var		   *aggVar = (Var *) makeVar(rteIndex,
											 subselectAggTLE->resno,
											 exprType((Node *) subselectAggTLE->expr),
											 exprTypmod((Node *) subselectAggTLE->expr),
											 exprCollation((Node *) subselectAggTLE->expr),
											 0);

		list_nth_replace(opexp->args, 1, aggVar);

		return join_expr;
	}

	return NULL;
}

/* NOTIN subquery transformation -start */

/* check if NOT IN conversion to antijoin is possible */
static bool
safe_to_convert_NOTIN(SubLink *sublink, Relids available_rels)
{
	Query	   *subselect = (Query *) sublink->subselect;
	Relids		left_varnos;

	/* cases we don't currently handle are listed below. */

	/* ARRAY sublinks have empty test expressions */
	if (sublink->testexpr == NULL)
		return false;

	/* No volatile functions in the subquery */
	if (contain_volatile_functions(sublink->testexpr))
		return false;

	/**
	 * If there are correlations in a func expr in the from clause, then don't bother.
	 */
	if (has_correlation_in_funcexpr_rte(subselect->rtable))
	{
		return false;
	}

	/* Left-hand expressions must contain some Vars of the current */
	left_varnos = pull_varnos(sublink->testexpr);
	if (bms_is_empty(left_varnos))
		return false;

	/*
	 * However, it can't refer to anything outside available_rels.
	 */
	if (!bms_is_subset(left_varnos, available_rels))
		return false;

	/* Correlation - subquery referencing Vars of parent not handled */
	if (contain_vars_of_level((Node *) subselect, 1))
		return false;

	/* No set operations in the subquery */
	if (subselect->setOperations)
		return false;

	return true;
}

/*
 * Find if the supplied targetlist has any resjunk
 * entries. We only have to check the tail since
 * resjunks (if any) can only appear in the end.
 */
inline static bool
has_resjunk(List *tlist)
{
	bool		resjunk = false;
	Node	   *tlnode = (Node *) (lfirst(tlist->tail));

	if (IsA(tlnode, TargetEntry))
	{
		TargetEntry *te = (TargetEntry *) tlnode;

		if (te->resjunk)
			resjunk = true;
	}
	return resjunk;
}

/* add a dummy constant var to the end of the supplied list */
static List *
add_dummy_const(List *tlist)
{
	TargetEntry *dummy;
	Const	   *zconst;
	int			resno;

	zconst = makeConst(INT4OID, -1, InvalidOid, sizeof(int32), (Datum) 0,
					   false, true);	/* isnull, byval */
	resno = list_length(tlist) + 1;
	dummy = makeTargetEntry((Expr *) zconst,
							resno,
							DUMMY_COLUMN_NAME,
							false /* resjunk */ );

	if (tlist == NIL)
		tlist = list_make1(dummy);
	else
		tlist = lappend(tlist, dummy);

	return tlist;
}

/* Add a dummy variable to the supplied target list. The
 * variable is added to end of targetlist but before all
 * resjunk vars (if any). The caller should make use of
 * the returned targetlist since this code might modify
 * the list in-place.
 */
static List *
mutate_targetlist(List *tlist)
{
	List	   *new_list = NIL;

	if (has_resjunk(tlist))
	{
		ListCell   *curr = NULL;
		bool		junk = false;

		foreach(curr, tlist)
		{
			TargetEntry *tle = (TargetEntry *) lfirst(curr);

			if (tle->resjunk)
			{
				tle->resno = tle->resno + 1;
				if (!junk)
				{
					junk = true;
					new_list = add_dummy_const(new_list);
				}
			}
			new_list = lappend(new_list, tle);
		}
	}
	else
	{
		new_list = tlist;
		new_list = add_dummy_const(new_list);
	}
	return new_list;
}

/* Pulls up the subquery into the top-level range table.
 * Before that add a dummy column zero to the target list
 * of the subquery.
 */
static int
add_notin_subquery_rte(Query *parse, Query *subselect)
{
	RangeTblEntry *subq_rte;
	int			subq_indx;

	/*
	 * Create a RTE entry in the parent query for the subquery.
	 * It is marked as lateral, because any correlation quals will
	 * refer to other RTEs in the parent query.
	 */
	subselect->targetList = mutate_targetlist(subselect->targetList);
	subq_rte = addRangeTableEntryForSubquery(NULL,	/* pstate */
											 subselect,
											 makeAlias("NotIn_SUBQUERY", NIL),
											 false, /* not lateral */
											 false /* inFromClause */ );
	parse->rtable = lappend(parse->rtable, subq_rte);

	/* assume new rte is at end */
	subq_indx = list_length(parse->rtable);
	Assert(subq_rte == rt_fetch(subq_indx, parse->rtable));

	return subq_indx;
}

/*
 * Pulls up the expr sublink subquery into the top-level range table.
 */
static int
add_expr_subquery_rte(Query *parse, Query *subselect)
{
	RangeTblEntry *subq_rte;
	int			subq_indx;

	/**
	 * Generate column names.
	 * TODO: improve this to keep old names around
	 */
	ListCell   *lc = NULL;
	int			teNum = 0;

	foreach(lc, subselect->targetList)
	{
		TargetEntry *te = (TargetEntry *) lfirst(lc);

		te->resname = psprintf("csq_c%d", teNum);
		teNum++;
	}

	/*
	 * Create a RTE entry in the parent query for the subquery.
	 * It is marked as lateral, because any correlation quals will
	 * refer to other RTEs in the parent query.
	 */
	subq_rte = addRangeTableEntryForSubquery(NULL,	/* pstate */
											 subselect,
											 makeAlias("Expr_SUBQUERY", NIL),
											 true, /* lateral */
											 false /* inFromClause */ );
	parse->rtable = lappend(parse->rtable, subq_rte);

	/* assume new rte is at end */
	subq_indx = list_length(parse->rtable);
	Assert(subq_rte == rt_fetch(subq_indx, parse->rtable));

	return subq_indx;
}


/* Create a join expression linking the supplied larg node
 * with the pulled up NOT IN subquery located at r_rtindex
 * in the range table. The appropriate JOIN_RTE has already
 * been created by the caller and can be located at j_rtindex
 */
static JoinExpr *
make_join_expr(Node *larg, int r_rtindex, int join_type)
{
	JoinExpr   *jexpr;
	RangeTblRef *rhs;

	rhs = makeNode(RangeTblRef);
	rhs->rtindex = r_rtindex;

	jexpr = makeNode(JoinExpr);
	jexpr->jointype = join_type;
	jexpr->isNatural = false;
	jexpr->larg = larg;
	jexpr->rarg = (Node *) rhs;
	jexpr->rtindex = 0;

	return jexpr;
}

/*
 * Convert subquery's test expr to a suitable predicate.
 * If we wanted to add correlated subquery support, this would be the place to do it.
 */
static Node *
make_lasj_quals(PlannerInfo *root, SubLink *sublink, int subquery_indx)
{
	Query	   *subselect = (Query *) sublink->subselect;
	Expr	   *join_pred;
	List	   *subquery_vars;

	Assert(sublink->subLinkType == ALL_SUBLINK);

	/*
	 * Build a list of Vars representing the subselect outputs.
	 */
	subquery_vars = generate_subquery_vars(root,
										   subselect->targetList,
										   subquery_indx);

	/*
	 * Build the result qual expression, replacing Params with these Vars.
	 */
	join_pred = (Expr *) convert_testexpr(root,
										  sublink->testexpr,
										  subquery_vars);

	join_pred = canonicalize_qual(make_notclause(join_pred), false);

	Assert(join_pred != NULL);
	return (Node *) join_pred;
}

/* add IS NOT FALSE clause on top of the clause */
static Node *
add_null_match_clause(Node *clause)
{
	BooleanTest *btest;

	Assert(clause != NULL);
	btest = makeNode(BooleanTest);
	btest->arg = (Expr *) clause;
	btest->booltesttype = IS_NOT_FALSE;
	return (Node *) btest;
}


/**
 * Is the attribute of a base relation non-nullable?
 * Input:
 *	relationOid
 *	attributeNumber
 * Output:
 *	true if the attribute is non-nullable
 */
static bool
is_attribute_nonnullable(Oid relationOid, AttrNumber attrNumber)
{
	HeapTuple	attributeTuple;
	Form_pg_attribute attribute;
	bool		result = true;

	attributeTuple = SearchSysCache2(ATTNUM,
									 ObjectIdGetDatum(relationOid),
									 Int16GetDatum(attrNumber));
	if (!HeapTupleIsValid(attributeTuple))
		return false;

	attribute = (Form_pg_attribute) GETSTRUCT(attributeTuple);

	if (attribute->attisdropped)
		result = false;

	if (!attribute->attnotnull)
		result = false;

	ReleaseSysCache(attributeTuple);

	return result;
}


/**
 * This walker goes through a query's join-tree to determine the set of non-nullable
 * vars. E.g.
 * select x from t1, t2 where x=y .. the walker determines that x and y are involved in an inner join
 * and therefore are non-nullable.
 * select x from t1 where x > 20 .. the walker determines that the quals ensures that x is non-nullable
 *
 */
static bool
cdb_find_nonnullable_vars_walker(Node *node, NonNullableVarsContext *context)
{
	Assert(context);
	Assert(context->query);

	if (node == NULL)
		return false;

	switch (nodeTag(node))
	{
		case T_Var:
			{
				Var		   *var = (Var *) node;

				if (var->varlevelsup == 0)
				{
					context->nonNullableVars = list_append_unique(context->nonNullableVars, var);
				}
				return false;
			}
		case T_FuncExpr:
			{
				FuncExpr   *expr = (FuncExpr *) node;

				if (!func_strict(expr->funcid))
				{
					/*
					 * If a function is not strict, it can return non-null
					 * values for null inputs. Thus, input vars can be null
					 * and sneak through. Therefore, ignore all vars
					 * underneath.
					 */
					return false;
				}
				break;
			}
		case T_OpExpr:
			{
				OpExpr	   *expr = (OpExpr *) node;

				if (!op_strict(expr->opno))
				{
					/*
					 * If an op is not strict, it can return non-null values
					 * for null inputs. Ignore all vars underneath.
					 */
					return false;
				}

				break;
			}
		case T_BoolExpr:
			{
				BoolExpr   *expr = (BoolExpr *) node;

				if (expr->boolop == NOT_EXPR)
				{
					/**
					 * Not negates all conditions underneath. We choose to not handle
					 * this situation.
					 */
					return false;
				}
				else if (expr->boolop == OR_EXPR)
				{
					/**
					 * We add the intersection of variables determined to be
					 * non-nullable by each arg to the OR expression.
					 */
					NonNullableVarsContext c1;

					c1.query = context->query;
					c1.varsToCheck = context->varsToCheck;
					c1.nonNullableVars = NIL;
					ListCell   *lc = NULL;
					int			orArgNum = 0;

					foreach(lc, expr->args)
					{
						Node	   *orArg = lfirst(lc);

						NonNullableVarsContext c2;

						c2.query = context->query;
						c2.varsToCheck = context->varsToCheck;
						c2.nonNullableVars = NIL;
						expression_tree_walker(orArg, cdb_find_nonnullable_vars_walker, &c2);

						if (orArgNum == 0)
						{
							Assert(c1.nonNullableVars == NIL);
							c1.nonNullableVars = c2.nonNullableVars;
						}
						else
						{
							c1.nonNullableVars = list_intersection(c1.nonNullableVars, c2.nonNullableVars);
						}
						orArgNum++;
					}

					context->nonNullableVars = list_concat_unique(context->nonNullableVars, c1.nonNullableVars);
					return false;
				}

				Assert(expr->boolop == AND_EXPR);

				/**
				 * AND_EXPR is automatically handled by the walking algorithm.
				 */
				break;
			}
		case T_NullTest:
			{
				NullTest   *expr = (NullTest *) node;

				if (expr->nulltesttype != IS_NOT_NULL)
				{
					return false;
				}

				break;
			}
		case T_BooleanTest:
			{
				BooleanTest *expr = (BooleanTest *) node;

				if (!(expr->booltesttype == IS_NOT_UNKNOWN
					  || expr->booltesttype == IS_TRUE
					  || expr->booltesttype == IS_FALSE))
				{
					/* Other tests may allow a null value to pass through. */
					return false;
				}
				break;
			}
		case T_JoinExpr:
			{
				JoinExpr   *expr = (JoinExpr *) node;

				if (expr->jointype != JOIN_INNER)
				{
					/* Do not descend below any other join type */
					return false;
				}
				break;
			}
		case T_FromExpr:
		case T_List:
			{
				/*
				 * Top-level where clause is fine -- equivalent to an inner
				 * join
				 */
				break;
			}
		case T_RangeTblRef:
			{
				/*
				 * If we've gotten this far, then we can look for non-null
				 * constraints on the vars in the query's targetlist.
				 */
				RangeTblRef *rtf = (RangeTblRef *) node;
				RangeTblEntry *rte = rt_fetch(rtf->rtindex, context->query->rtable);

				if (rte->rtekind == RTE_RELATION)
				{
					/*
					 * Find all vars in the query's targetlist that are from
					 * this relation and check if the attribute is
					 * non-nullable by base table constraint.
					 */

					ListCell   *lc = NULL;

					foreach(lc, context->varsToCheck)
					{
						Var		   *var = (Var *) lfirst(lc);

						if (var->varno == rtf->rtindex)
						{
							int			attNum = var->varattno;
							int			relOid = rte->relid;

							Assert(relOid != InvalidOid);

							if (is_attribute_nonnullable(relOid, attNum))
							{
								/*
								 * Base table constraint on the var. Add
								 * it to the list!
								 */
								context->nonNullableVars = list_append_unique(context->nonNullableVars, var);
							}
						}
					}
				}
				else if (rte->rtekind == RTE_SUBQUERY)
				{
					/*
					 * When the RTE is a subquery, the algorithm to extend non-nullable Vars is:
					 *   1. based on the interested VarsToCheck, find those pointing to this RTE,
					 *      and build a list of exprs containing the corresponding target entry's
					 *      expr.
					 *   2. recusrively invoke is_exprs_nullable for The new list of exprs and the
					 *      subquery:
					 *        a. if they are non-nullable, let's expand the knowledage databse
					 *           using the corresponding Vars in original VarsToCheck
					 *        b. otherwise, we know the result is nullable, terminate the walk.
					 */
					ListCell *lc = NULL;
					List     *exprs = NIL;
					foreach(lc, context->varsToCheck)
					{
						Var *var = (Var *) lfirst(lc);
						if (var->varno == rtf->rtindex)
						{
							TargetEntry *tle;
							tle = list_nth(rte->subquery->targetList, var->varattno-1);
							exprs = lappend(exprs, tle->expr);
						}
					}

					/*
					 * The vars fetched from special RTEs (RTE_JOIN ) are mapped to those in base RTEs,
					 * so when the walker reach these special RTEs, the exprs can be NIL.
					 * Empty exprs means no check is needed in this step and the walker should continue
					 * scanning other parts of the jointree.
					 * NOTE: break is also needed to avoid call the function 'is_exprs_nullable' on NIL exprs,
					 * which reports NIL as nullable and terminates the walker
					 */
					if (exprs == NIL)
						break;

					if (is_exprs_nullable((Node *) exprs, rte->subquery))
					{
						/*
						 * The VarsTocheck must be nullable, terminate here.
						 * Since we are sure the nullable check will fail, so
						 * set the knowledage database to NIL here.
						 * */
						context->nonNullableVars = NIL;
						return true;
					}

					foreach(lc, context->varsToCheck)
					{
						Var *var = (Var *) lfirst(lc);
						if (var->varno == rtf->rtindex)
						{
							context->nonNullableVars = list_append_unique(
											context->nonNullableVars, var);
						}
					}

				}
				else if (rte->rtekind == RTE_VALUES)
				{
					/*
					 * TODO: make this work for values scan someday.
					 */
				}
				return false;
			}
		case T_PlaceHolderVar:
			{
				/*
				 * GPDB_84_MERGE_FIXME: Confirm if we need to do special
				 * handling for PlaceHolderVar. Currently we are just fall
				 * through the mutator.
				 */
				break;
			}
		default:
			{
				/* Do not descend beyond any other node */
				return false;
			}
	}
	return expression_tree_walker(node, cdb_find_nonnullable_vars_walker, context);
}

static List *
fetch_targetlist_exprs(List *targetlist)
{
	List        *exprs = NIL;
	ListCell    *lc    = NULL;

	foreach(lc, targetlist)
	{
		TargetEntry *tle = (TargetEntry *) lfirst(lc);

		if (tle->resjunk)
		{
			/*
			 * Previously, we take it nullable when we
			 * see any resjunk target entry. I think it
			 * is safe to just ignore them.
			 */
			continue;
		}

		exprs = lappend(exprs, tle->expr);
	}

	return exprs;
}

/*
 * fetch_outer_exprs
 *   @param testexpr: the NOT-IN sublink's test exprs
 *
 *   For a two-col NOT_IN query: select * from t1 where (a,b) not in (select a,b from t2)
 *   this testexpr should be:
 *   BoolExpr [boolop=NOT_EXPR]
 *      BoolExpr [boolop=AND_EXPR]
 *        OpExpr [opno=96 opfuncid=65 opresulttype=16 opretset=false]
 *                Var [varno=1 varattno=1 vartype=23 varnoold=1 varoattno=1]
 *                Param [paramkind=PARAM_SUBLINK paramid=1 paramtype=23]
 *        OpExpr [opno=96 opfuncid=65 opresulttype=16 opretset=false]
 *                Var [varno=1 varattno=2 vartype=23 varnoold=1 varoattno=2]
 *                Param [paramkind=PARAM_SUBLINK paramid=2 paramtype=23]
 *
 *  For a two-col <> ALL query: select * from t1 where (a,b) <> (select a,b from t2)
 *  this testexpr should be:
 *  BoolExpr [boolop=OR_EXPR]
 *     OpExpr [opno=518 opfuncid=144 opresulttype=16 opretset=false]
 *              Var [varno=1 varattno=1 vartype=23 varnoold=1 varoattno=1]
 *              Param [paramkind=PARAM_SUBLINK paramid=1 paramtype=23]
 *      OpExpr [opno=518 opfuncid=144 opresulttype=16 opretset=false]
 *              Var [varno=1 varattno=2 vartype=23 varnoold=1 varoattno=2]
 *              Param [paramkind=PARAM_SUBLINK paramid=2 paramtype=23]
 *
 * This function fetches all the outer parts and put them in a list as the
 * result.
 *
 * NOTE: we want to be conservative for cases we are not interested or
 * we are not sure. Returning a NIL is conservative policy here since
 * is_exprs_nullable will return true for NULL input.
 */
static List *
fetch_outer_exprs(Node *testexpr)
{
	if (testexpr == NULL)
		return NIL;

	if (IsA(testexpr, BoolExpr))
	{
		BoolExpr *be = (BoolExpr *) testexpr;
		bool      seen_not_atop;
		Node     *expr;
		seen_not_atop = be->boolop == NOT_EXPR;

		/* strip off the top NOT */
		if (seen_not_atop)
			expr = linitial(be->args);
		else
			expr = (Node *) be;

		/*
		 * The above expr should be a single OpExpr when single-column not-in,
		 * or a BoolExpr of AND when multi-column not-in. We are not interested
		 * in other cases.
		 */
		if (IsA(expr, BoolExpr))
		{
			BoolExpr *be    = (BoolExpr *) expr;
			List     *exprs = NIL;
			ListCell *lc;

			/*
			 * The following cases should not happen, instead of
			 * erroring out, let's be conservative by returning NIL.
			 */
			if (be->boolop == AND_EXPR && !seen_not_atop)
				return NIL;
			if (be->boolop == OR_EXPR && seen_not_atop)
				return NIL;
			if (be->boolop != OR_EXPR && be->boolop != AND_EXPR)
				return NIL;

			foreach(lc, be->args)
			{
				OpExpr *op_expr = (OpExpr *) lfirst(lc);
				if (!IsA(op_expr, OpExpr))
					return NIL;
				exprs = lappend(exprs, linitial(op_expr->args));
			}
			return exprs;
		}
		else if (IsA(expr, OpExpr))
			return list_make1(linitial(((OpExpr *)expr)->args));
		else
			return NIL;
	}
	else if (IsA(testexpr, OpExpr))
		return list_make1(linitial(((OpExpr *)testexpr)->args));
	else
		return NIL;
}

/*
 * is_exprs_nullable
 *   Return true if any of the exprs might be null, otherwise false.
 *   We want to be conservative for those cases either we are not
 *   interested or not sure.
 */
static bool
is_exprs_nullable(Node *exprs, Query *query)
{
	NonNullableVarsContext context;
	context.query           = query;
	context.varsToCheck     = cdb_find_all_vars(exprs, query->rtable);
	context.nonNullableVars = NIL;

	/* Find nullable vars in the jointree */
	(void) expression_tree_walker((Node *) query->jointree,
								  cdb_find_nonnullable_vars_walker, &context);

	return is_exprs_nullable_internal(exprs, context.nonNullableVars, query->rtable);
}

static bool
is_exprs_nullable_internal(Node *exprs, List *nonnullable_vars, List *rtable)
{
	if (exprs == NULL)
	{
		/*
		 * Be conservative when input is Empty. Keep consistent
		 * with fetch_outer_exprs and fetch_targetlist_exprs.
		 */
		return true;
	}

	if (IsA(exprs, Var))
	{
		Var		   *var = cdb_map_to_base_var((Var *) exprs, rtable);
		return !list_member(nonnullable_vars, var);
	}
	else if (IsA(exprs, List))
	{
		ListCell *lc;
		foreach(lc, (List *) exprs)
		{
			if (is_exprs_nullable_internal((Node *) lfirst(lc),
										   nonnullable_vars, rtable))
				return true;
		}
		return false;
	}
	else if (IsA(exprs, Const))
	{
		Const	   *constant = (Const *) exprs;
		return constant->constisnull;
	}
	else if (IsA(exprs, RelabelType))
	{
		RelabelType    *rt = (RelabelType *) exprs;
		return is_exprs_nullable_internal((Node *) rt->arg, nonnullable_vars, rtable);
	}
	else if (IsA(exprs, OpExpr))
	{
		OpExpr   *op_expr = (OpExpr *) exprs;
		ListCell *lc;
		foreach(lc, op_expr->args)
		{
			if (is_exprs_nullable_internal((Node *) lfirst(lc),
										   nonnullable_vars, rtable))
				return true;
		}
		return false;
	}
	else
	{
		/* Be conservative here */
		return true;
	}
}

/*
 * convert_IN_to_antijoin: can we convert an ALL SubLink to join style?
 * If not appropriate to process this SubLink, return it as it is.
 * Side effects of a successful conversion include adding the SubLink's
 * subselect to the top-level rangetable, adding a JOIN RTE linking the outer
 * query with the subselect and setting up the qualifiers correctly.
 *
 * The transformation is to rewrite a query of the form:
 *		select c1 from t1 where c1 NOT IN (select c2 from t2);
 *						(to)
 *		select c1 from t1 left anti semi join (select 0 as zero, c2 from t2) foo
 *						  ON (c1 = c2) IS NOT FALSE where zero is NULL;
 *
 * The pseudoconstant column zero is needed to correctly pipe in the NULLs
 * from the subselect upstream.
 *
 * The current implementation assumes that the sublink expression occurs
 * in a top-level where clause (or through a series of inner joins).
 */
JoinExpr *
convert_IN_to_antijoin(PlannerInfo *root, SubLink *sublink,
					   Relids available_rels)
{
	Query	   *parse = root->parse;
	Query	   *subselect = (Query *) sublink->subselect;

	if (safe_to_convert_NOTIN(sublink, available_rels))
	{
		int			subq_indx      = add_notin_subquery_rte(parse, subselect);
		List       *inner_exprs    = NIL;
		List       *outer_exprs    = NIL;
		bool        inner_nullable = true;
		bool        outer_nullable = true;

		inner_exprs = fetch_targetlist_exprs(subselect->targetList);
		outer_exprs = fetch_outer_exprs(sublink->testexpr);
		inner_nullable = is_exprs_nullable((Node *) inner_exprs, subselect);
		outer_nullable = is_exprs_nullable((Node *) outer_exprs, parse);

		JoinExpr   *join_expr = make_join_expr(NULL, subq_indx, JOIN_LASJ_NOTIN);

		join_expr->quals = make_lasj_quals(root, sublink, subq_indx);
		if (inner_nullable || outer_nullable)
			join_expr->quals = add_null_match_clause(join_expr->quals);

		return join_expr;
	}
	/* Not safe to perform transformation. */
	return NULL;
}

/*
 * Check if there is a range table entry of type func expr whose arguments
 * are correlated
 */
bool
has_correlation_in_funcexpr_rte(List *rtable)
{
	/*
	 * check if correlation occurs in a func expr in the from clause of the
	 * subselect
	 */
	ListCell   *lc_rte;

	foreach(lc_rte, rtable)
	{
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc_rte);

		if (rte->functions  && contain_vars_of_level_or_above((Node *) rte->functions, 1))
		{
			return true;
		}
	}
	return false;
}

static List *
cdb_find_all_vars(Node *exprs, List *rtable)
{
	FindAllVarsContext    context;

	context.rtable = rtable;
	context.vars = NIL;

	expression_tree_walker(exprs, cdb_find_all_vars_walker, (void *) &context);

	return context.vars;
}

static bool
cdb_find_all_vars_walker(Node *node, FindAllVarsContext *context)
{
	if (node == NULL)
		return false;

	if (IsA(node, Var))
	{
		Var     *var;

		/*
		 * The vars fetched from targetList/testexpr.. can be from virtual range table (RTE_JOIN),
		 * which do not directly match base vars fetched by cdb_find_nonnullable_vars_walker, aligning
		 * them to base vars is needed before check nullable.
		 */
		var = cdb_map_to_base_var((Var *) node, context->rtable);
		context->vars = list_append_unique(context->vars, var);
		return false;
	}

	return expression_tree_walker(node, cdb_find_all_vars_walker, context);
}

static Var *
cdb_map_to_base_var(Var *var, List *rtable)
{
	RangeTblEntry *rte    = rt_fetch(var->varno, rtable);

	while(rte->rtekind == RTE_JOIN && rte->joinaliasvars)
	{
		var = (Var *) list_nth(rte->joinaliasvars, var->varattno-1);
		rte = rt_fetch(var->varno, rtable);
	}

	return var;
}

相关信息

greenplumn 源码目录

相关文章

greenplumn cdbappendonlystorageformat 源码

greenplumn cdbappendonlystorageread 源码

greenplumn cdbappendonlystoragewrite 源码

greenplumn cdbappendonlyxlog 源码

greenplumn cdbbufferedappend 源码

greenplumn cdbbufferedread 源码

greenplumn cdbcat 源码

greenplumn cdbcopy 源码

greenplumn cdbdistributedsnapshot 源码

greenplumn cdbdistributedxacts 源码

0 赞

所属分类： 大数据
本文标签： greenplum
版权声明： 原创文章如转载，请注明本文链接: https://www.seaxiang.com/blog/f04cc349e4d24a66902c8129cfc05729

热门推荐

1、直接访问google.com
2、 - 优质文章
3、 gate.io
4、 harmony 鸿蒙hdc使用指导
5、 harmony 鸿蒙ArkUI组件（ArkTS）开发常见问题
6、 harmony 鸿蒙初识ArkTS语言
7、 flink kafka connector scan.startup.mode 的几个选项
8、 golang
9、 openharmony
10、 Vue中input框自动聚焦

Loading...