greenplumn CExpressionPreprocessor 源码
greenplumn CExpressionPreprocessor 代码
文件路径:/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp
//---------------------------------------------------------------------------
// Greenplum Database
// Copyright 2012 EMC Corp.
//
// @filename:
// CExpressionPreprocessor.cpp
//
// @doc:
// Expression tree preprocessing routines, needed to prepare an input
// logical expression to be optimized
//---------------------------------------------------------------------------
#include "gpopt/operators/CExpressionPreprocessor.h"
#include "gpos/base.h"
#include "gpos/common/CAutoRef.h"
#include "gpos/common/CAutoTimer.h"
#include "gpopt/base/CColRefSetIter.h"
#include "gpopt/base/CColRefTable.h"
#include "gpopt/base/CConstraintInterval.h"
#include "gpopt/base/COptCtxt.h"
#include "gpopt/base/CUtils.h"
#include "gpopt/exception.h"
#include "gpopt/mdcache/CMDAccessor.h"
#include "gpopt/operators/CExpressionFactorizer.h"
#include "gpopt/operators/CExpressionUtils.h"
#include "gpopt/operators/CLogicalCTEAnchor.h"
#include "gpopt/operators/CLogicalCTEConsumer.h"
#include "gpopt/operators/CLogicalCTEProducer.h"
#include "gpopt/operators/CLogicalConstTableGet.h"
#include "gpopt/operators/CLogicalDynamicGet.h"
#include "gpopt/operators/CLogicalGbAgg.h"
#include "gpopt/operators/CLogicalInnerJoin.h"
#include "gpopt/operators/CLogicalLimit.h"
#include "gpopt/operators/CLogicalNAryJoin.h"
#include "gpopt/operators/CLogicalProject.h"
#include "gpopt/operators/CLogicalSelect.h"
#include "gpopt/operators/CLogicalSequenceProject.h"
#include "gpopt/operators/CLogicalSetOp.h"
#include "gpopt/operators/CLogicalUnion.h"
#include "gpopt/operators/CLogicalUnionAll.h"
#include "gpopt/operators/CLogicalUpdate.h"
#include "gpopt/operators/CNormalizer.h"
#include "gpopt/operators/COrderedAggPreprocessor.h"
#include "gpopt/operators/CPredicateUtils.h"
#include "gpopt/operators/CScalarCmp.h"
#include "gpopt/operators/CScalarIdent.h"
#include "gpopt/operators/CScalarNAryJoinPredList.h"
#include "gpopt/operators/CScalarProjectElement.h"
#include "gpopt/operators/CScalarProjectList.h"
#include "gpopt/operators/CScalarSubquery.h"
#include "gpopt/operators/CScalarSubqueryAny.h"
#include "gpopt/operators/CScalarSubqueryExists.h"
#include "gpopt/operators/CScalarSubqueryQuantified.h"
#include "gpopt/operators/CWindowPreprocessor.h"
#include "gpopt/optimizer/COptimizerConfig.h"
#include "gpopt/translate/CTranslatorDXLToExpr.h"
#include "gpopt/xforms/CXform.h"
#include "naucrates/md/IMDScalarOp.h"
#include "naucrates/md/IMDType.h"
#include "naucrates/statistics/CStatistics.h"
#include "naucrates/traceflags/traceflags.h"
using namespace gpopt;
// maximum number of equality predicates to be derived from existing equalities
#define GPOPT_MAX_DERIVED_PREDS 50
// eliminate self comparisons in the given expression
CExpression *
CExpressionPreprocessor::PexprEliminateSelfComparison(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
if (CUtils::FScalarCmp(pexpr))
{
return CPredicateUtils::PexprEliminateSelfComparison(mp, pexpr);
}
// recursively process children
const ULONG arity = pexpr->Arity();
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild =
PexprEliminateSelfComparison(mp, (*pexpr)[ul]);
pdrgpexprChildren->Append(pexprChild);
}
COperator *pop = pexpr->Pop();
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// remove superfluous equality operations
CExpression *
CExpressionPreprocessor::PexprPruneSuperfluousEquality(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
if (pexpr->Pop()->FScalar())
{
return CPredicateUtils::PexprPruneSuperfluosEquality(mp, pexpr);
}
// recursively process children
const ULONG arity = pexpr->Arity();
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild =
PexprPruneSuperfluousEquality(mp, (*pexpr)[ul]);
pdrgpexprChildren->Append(pexprChild);
}
COperator *pop = pexpr->Pop();
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// an existential subquery whose inner expression is a GbAgg
// with no grouping columns is replaced with a Boolean constant
//
// Example:
//
// exists(select sum(i) from X) --> True
// not exists(select sum(i) from X) --> False
CExpression *
CExpressionPreprocessor::PexprTrimExistentialSubqueries(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
if (CUtils::FExistentialSubquery(pop))
{
CExpression *pexprInner = (*pexpr)[0];
if (COperator::EopLogicalGbAgg == pexprInner->Pop()->Eopid() &&
0 ==
CLogicalGbAgg::PopConvert(pexprInner->Pop())->Pdrgpcr()->Size())
{
GPOS_ASSERT(0 < (*pexprInner)[1]->Arity() &&
"Project list of GbAgg is expected to be non-empty");
BOOL fValue = true;
if (COperator::EopScalarSubqueryNotExists == pop->Eopid())
{
fValue = false;
}
return CUtils::PexprScalarConstBool(mp, fValue);
}
}
// recursively process children
const ULONG arity = pexpr->Arity();
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild =
PexprTrimExistentialSubqueries(mp, (*pexpr)[ul]);
pdrgpexprChildren->Append(pexprChild);
}
if (CPredicateUtils::FAnd(pexpr))
{
return CPredicateUtils::PexprConjunction(mp, pdrgpexprChildren);
}
if (CPredicateUtils::FOr(pexpr))
{
return CPredicateUtils::PexprDisjunction(mp, pdrgpexprChildren);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// a quantified subquery with maxcard 1 is simplified as a scalar subquery
//
// Example:
// a = ANY (select sum(i) from X) --> a = (select sum(i) from X)
// a <> ALL (select sum(i) from X) --> a <> (select sum(i) from X)
CExpression *
CExpressionPreprocessor::PexprSimplifyQuantifiedSubqueries(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
if (CUtils::FQuantifiedSubquery(pop) &&
1 == (*pexpr)[0]->DeriveMaxCard().Ull())
{
CExpression *pexprInner = (*pexpr)[0];
// skip intermediate unary nodes
CExpression *pexprChild = pexprInner;
COperator *popChild = pexprChild->Pop();
while (nullptr != pexprChild && CUtils::FLogicalUnary(popChild))
{
pexprChild = (*pexprChild)[0];
popChild = pexprChild->Pop();
}
// inspect next node
BOOL fGbAggWithoutGrpCols =
COperator::EopLogicalGbAgg == popChild->Eopid() &&
0 == CLogicalGbAgg::PopConvert(popChild)->Pdrgpcr()->Size();
BOOL fOneRowConstTable =
COperator::EopLogicalConstTableGet == popChild->Eopid() &&
1 == CLogicalConstTableGet::PopConvert(popChild)
->Pdrgpdrgpdatum()
->Size();
if (fGbAggWithoutGrpCols || fOneRowConstTable)
{
// quantified subquery with max card 1
CExpression *pexprScalar = (*pexpr)[1];
CScalarSubqueryQuantified *popSubqQuantified =
CScalarSubqueryQuantified::PopConvert(pexpr->Pop());
const CColRef *colref = popSubqQuantified->Pcr();
pexprInner->AddRef();
CExpression *pexprSubquery = GPOS_NEW(mp) CExpression(
mp,
GPOS_NEW(mp)
CScalarSubquery(mp, colref, false /*fGeneratedByExist*/,
true /*fGeneratedByQuantified*/),
pexprInner);
CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
IMDId *mdid = popSubqQuantified->MdIdOp();
const CWStringConst *str =
md_accessor->RetrieveScOp(mdid)->Mdname().GetMDName();
mdid->AddRef();
pexprScalar->AddRef();
return CUtils::PexprScalarCmp(mp, pexprScalar, pexprSubquery, *str,
mdid);
}
}
// recursively process children
const ULONG arity = pexpr->Arity();
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild =
PexprSimplifyQuantifiedSubqueries(mp, (*pexpr)[ul]);
pdrgpexprChildren->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// preliminary unnesting of scalar subqueries
// Example:
// Input: SELECT k, (SELECT (SELECT Y.i FROM Y WHERE Y.j=X.j)) from X
// Output: SELECT k, (SELECT Y.i FROM Y WHERE Y.j=X.j) from X
CExpression *
CExpressionPreprocessor::PexprUnnestScalarSubqueries(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
// look for a Project Element with a scalar subquery below it
if (CUtils::FProjElemWithScalarSubq(pexpr))
{
// recursively process scalar subquery
CExpression *pexprSubq = PexprUnnestScalarSubqueries(mp, (*pexpr)[0]);
// if the scalar subquery is replaced by the CScalarIdent in the previous
// recursive call we simply return the CScalarIdent and stop preprocessing
// at this stage.
// +--CScalarProjectList
// +--CScalarProjectElement "?column?" (2)
// +--CScalarIdent "column1" (1)
if (COperator::EopScalarIdent == pexprSubq->Pop()->Eopid())
{
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pexprSubq);
}
// check if subquery is defined as a Project on Const Table
CExpression *pexprSubqChild = (*pexprSubq)[0];
if (CUtils::FProjectConstTableWithOneScalarSubq(pexprSubqChild))
{
CExpression *pexprConstTable = (*pexprSubqChild)[0];
CExpression *pexprPrjList = (*pexprSubqChild)[1];
GPOS_ASSERT(1 == pexprPrjList->Arity());
CExpression *pexprPrjElem = (*pexprPrjList)[0];
CExpression *pexprInnerSubq = (*pexprPrjElem)[0];
GPOS_ASSERT(COperator::EopScalarSubquery ==
pexprInnerSubq->Pop()->Eopid());
// make sure that inner subquery has no outer references to Const Table
// since Const Table will be eliminated in output expression
CColRefSet *pcrsConstTableOutput =
pexprConstTable->DeriveOutputColumns();
CColRefSet *outer_refs =
(*pexprInnerSubq)[0]->DeriveOuterReferences();
if (0 == outer_refs->Size() ||
outer_refs->IsDisjoint(pcrsConstTableOutput))
{
// recursively process inner subquery
CExpression *pexprUnnestedSubq =
PexprUnnestScalarSubqueries(mp, pexprInnerSubq);
// the original subquery is processed and can be removed now
pexprSubq->Release();
// build the new Project Element after eliminating outer subquery
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pexprUnnestedSubq);
}
}
// otherwise, return a Project Element with the processed outer subquery
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pexprSubq);
}
else if (CUtils::FScalarSubqWithConstTblGet(pexpr))
{
const CColRef *pcrSubq =
CScalarSubquery::PopConvert(pexpr->Pop())->Pcr();
CColRefSet *pcrsConstTableOutput = (*pexpr)[0]->DeriveOutputColumns();
// if the subquery has outer ref, we do not make use of the output columns of constant table get.
// In this scenairo, we replace the entire scalar subquery with a CScalarIdent with the outer reference.
// Otherwise, the subquery remains unchanged.
// Input:
// +--CScalarSubquery["b" (8)]
// +--CLogicalConstTableGet Columns: ["" (16)] Values: [(1)]
// Output:
// +--CScalarIdent "b" (8)
if (!pcrsConstTableOutput->FMember(pcrSubq))
{
CScalarSubquery *pScalarSubquery =
CScalarSubquery::PopConvert(pexpr->Pop());
return CUtils::PexprScalarIdent(mp, pScalarSubquery->Pcr());
}
}
// recursively process children
const ULONG arity = pexpr->Arity();
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild = PexprUnnestScalarSubqueries(mp, (*pexpr)[ul]);
pdrgpexprChildren->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// an intermediate limit is removed if it has neither row count nor offset
CExpression *
CExpressionPreprocessor::PexprRemoveSuperfluousLimit(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
// if current operator is a logical limit with zero offset, and no specified
// row count, skip to limit's logical child
if (COperator::EopLogicalLimit == pop->Eopid() &&
CUtils::FHasZeroOffset(pexpr) &&
!CLogicalLimit::PopConvert(pop)->FHasCount())
{
CLogicalLimit *popLgLimit = CLogicalLimit::PopConvert(pop);
if (!popLgLimit->IsTopLimitUnderDMLorCTAS() ||
(popLgLimit->IsTopLimitUnderDMLorCTAS() &&
GPOS_FTRACE(EopttraceRemoveOrderBelowDML)))
{
return PexprRemoveSuperfluousLimit(mp, (*pexpr)[0]);
}
}
// recursively process children
const ULONG arity = pexpr->Arity();
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild = PexprRemoveSuperfluousLimit(mp, (*pexpr)[ul]);
pdrgpexprChildren->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// distinct is removed from a DQA if it has a max or min agg
// e.g. select max(distinct(a)) from tbl -> select max(a) from tbl
CExpression *
CExpressionPreprocessor::PexprRemoveSuperfluousDistinctInDQA(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
if (COperator::EopLogicalGbAgg == pop->Eopid())
{
const CExpression *const pexprProjectList = (*pexpr)[1];
GPOS_ASSERT(COperator::EopScalarProjectList ==
pexprProjectList->Pop()->Eopid());
const ULONG arity = pexprProjectList->Arity();
CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *const pexprPrjElem = (*pexprProjectList)[ul];
if (COperator::EopScalarAggFunc ==
(*pexprPrjElem)[0]->Pop()->Eopid())
{
CScalarAggFunc *popAggFunc =
CScalarAggFunc::PopConvert((*pexprPrjElem)[0]->Pop());
IMDId *agg_child_mdid =
CScalar::PopConvert((*pexprPrjElem)[0]->Pop())->MdidType();
const IMDType *agg_child_type =
md_accessor->RetrieveType(agg_child_mdid);
if (popAggFunc->IsDistinct() &&
popAggFunc->IsMinMax(agg_child_type))
{
popAggFunc->SetIsDistinct(false);
}
}
}
}
// recursively process children
const ULONG arity = pexpr->Arity();
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild =
PexprRemoveSuperfluousDistinctInDQA(mp, (*pexpr)[ul]);
pdrgpexprChildren->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// Remove outer references from order spec inside limit, grouping columns
// in GbAgg, and Partition/Order columns in window operators. Also handle
// cases where we would end up with an empty groupby list and project list,
// which is not supported.
//
// Example, for the schema: t(a, b), s(i, j)
// The query:
// select * from t where a < all (select i from s order by j, b limit 1);
// should be equivalent to:
// select * from t where a < all (select i from s order by j limit 1);
// after removing the outer reference (b) from the order by clause of the
// subquery (all tuples in the subquery have the same value for the outer ref)
//
// Similarly,
// select * from t where a in (select count(i) from s group by j, b);
// is equivalent to:
// select * from t where a in (select count(i) from s group by j);
//
// Similarly,
// select * from t where a in (select row_number() over (partition by t.a order by t.b) from s);
// is equivalent to:
// select * from t where a in (select row_number() over () from s);
CExpression *
CExpressionPreprocessor::PexprRemoveSuperfluousOuterRefs(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
// operator, possibly altered below if we need to change the operator
COperator *pop = pexpr->Pop();
// expression, possibly altered below if we need to change the children
CExpression *newExpr = pexpr;
COperator::EOperatorId op_id = pop->Eopid();
BOOL fHasOuterRefs = (pop->FLogical() && CUtils::HasOuterRefs(pexpr));
pop->AddRef();
if (fHasOuterRefs)
{
// special handling for three operator types: Limit, GrbyAgg, Sequence
if (COperator::EopLogicalLimit == op_id)
{
CColRefSet *outer_refs = pexpr->DeriveOuterReferences();
CLogicalLimit *popLimit = CLogicalLimit::PopConvert(pop);
COrderSpec *pos = popLimit->Pos();
COrderSpec *posNew = pos->PosExcludeColumns(mp, outer_refs);
pop->Release();
pop = GPOS_NEW(mp) CLogicalLimit(
mp, posNew, popLimit->FGlobal(), popLimit->FHasCount(),
popLimit->IsTopLimitUnderDMLorCTAS());
}
else if (COperator::EopLogicalGbAgg == op_id)
{
CColRefSet *outer_refs = pexpr->DeriveOuterReferences();
CLogicalGbAgg *popAgg = CLogicalGbAgg::PopConvert(pop);
CColRefArray *colref_array = CUtils::PdrgpcrExcludeColumns(
mp, popAgg->Pdrgpcr(), outer_refs);
CExpression *pExprProjList = (*pexpr)[1];
// It's only valid to remove the outer reference if:
// the projection list is NOT empty
// or
// the outer references are NOT the ONLY Group By column
//
// For example:
// -- Cannot remove t.b from groupby, because this will produce an invalid plan
// -- with both groupby list and project list empty, in this case we need to add
// -- a project node below the GrbyAgg
// select a from t where c in (select distinct t.b from s)
//
// -- remove t.b from groupby is ok, because there is at least one agg function: count()
// select a from t where c in (select count(s.j) from s group by t.b)
//
// -- remove t.b from groupby is ok, because there is other groupby column s.j
// select a from t where c in (select s.j from s group by t.b, s.j)
//
// -- remove t.b from groupby is ok, because outer reference is a
// -- constant for each invocation of subquery
// select a from t where c in (select count(s.j) from s group by s.i, t.b)
//
if (0 < pExprProjList->Arity() || 0 < colref_array->Size())
{
// remove outer refs from the groupby columns list
CColRefArray *pdrgpcrMinimal = popAgg->PdrgpcrMinimal();
if (nullptr != pdrgpcrMinimal)
{
pdrgpcrMinimal = CUtils::PdrgpcrExcludeColumns(
mp, pdrgpcrMinimal, outer_refs);
}
CColRefArray *pdrgpcrArgDQA = popAgg->PdrgpcrArgDQA();
if (nullptr != pdrgpcrArgDQA)
{
pdrgpcrArgDQA->AddRef();
}
pop->Release();
pop = GPOS_NEW(mp) CLogicalGbAgg(
mp, colref_array, pdrgpcrMinimal, popAgg->Egbaggtype(),
popAgg->FGeneratesDuplicates(), pdrgpcrArgDQA);
}
else
{
// grouping_cols has outer references that can't be removed, because
// that would make both pExprProjList and grouping_cols empty, which is not allowed.
// The solution in this case is to add a project node below that will simply echo
// the outer reference, and to use that newly produced ColRef as groupby column.
CExpression *child = (*pexpr)[0];
CExpressionArray *grouping_cols_arr =
CUtils::PdrgpexprScalarIdents(mp, popAgg->Pdrgpcr());
GPOS_ASSERT(0 < grouping_cols_arr->Size());
child->AddRef();
// add a project node on top of our child
CExpression *projectExpr = CUtils::PexprAddProjection(
mp, child, grouping_cols_arr,
false // don't add to hash table,
// this is done at the end
// of preprocessing
);
grouping_cols_arr->Release();
// build a children array for the new GrbyAgg expression
CExpressionArray *new_children =
GPOS_NEW(mp) CExpressionArray(mp);
new_children->Append(projectExpr);
for (ULONG ul = 1; ul < pexpr->PdrgPexpr()->Size(); ul++)
{
new_children->Append((*pexpr->PdrgPexpr())[ul]);
(*pexpr->PdrgPexpr())[ul]->AddRef();
}
// build a new CLogicalGbAgg operator, with a new grouping columns list
CColRefArray *new_grouping_cols = GPOS_NEW(mp) CColRefArray(mp);
CExpression *new_projected_cols = (*projectExpr)[1];
for (ULONG ul = 0; ul < new_projected_cols->Arity(); ul++)
{
new_grouping_cols->Append(
CUtils::PcrFromProjElem((*new_projected_cols)[ul]));
}
GPOS_ASSERT(nullptr == popAgg->PdrgpcrArgDQA());
pop = GPOS_NEW(mp) CLogicalGbAgg(mp, new_grouping_cols, nullptr,
popAgg->Egbaggtype(),
popAgg->FGeneratesDuplicates(),
nullptr // no DQA cols
);
// release the previous pop
popAgg->Release();
popAgg = nullptr;
// finally, put it all together, our new GrbyAgg now has a project node below
// it that will turn the outer reference into a produced ColRef that is used
// as a groupby column
pop->AddRef();
newExpr = GPOS_NEW(mp) CExpression(mp, pop, new_children);
// clean up
colref_array->Release();
}
}
else if (COperator::EopLogicalSequenceProject == op_id)
{
CExpressionHandle exprhdl(mp);
exprhdl.Attach(pexpr);
exprhdl.DeriveProps(nullptr /*pdpctxt*/);
CLogicalSequenceProject *popSequenceProject =
CLogicalSequenceProject::PopConvert(pop);
if (popSequenceProject->FHasLocalReferencesTo(
exprhdl.DeriveOuterReferences()))
{
COperator *popNew =
popSequenceProject->PopRemoveLocalOuterRefs(mp, exprhdl);
pop->Release();
pop = popNew;
}
}
}
// recursively process children
const ULONG arity = newExpr->Arity();
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild =
PexprRemoveSuperfluousOuterRefs(mp, (*newExpr)[ul]);
pdrgpexprChildren->Append(pexprChild);
}
if (newExpr != pexpr)
{
newExpr->Release();
}
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// generate a ScalarBoolOp expression or simply return the only expression
// in the array if there is only one.
CExpression *
CExpressionPreprocessor::PexprScalarBoolOpConvert2In(
CMemoryPool *mp, CScalarBoolOp::EBoolOperator eboolop,
CExpressionArray *pdrgpexpr)
{
GPOS_ASSERT(nullptr != pdrgpexpr);
GPOS_ASSERT(0 < pdrgpexpr->Size());
if (1 == pdrgpexpr->Size())
{
// if there is one child, do not wrap it in a bool op
CExpression *pexpr = (*pdrgpexpr)[0];
pexpr->AddRef();
pdrgpexpr->Release();
return pexpr;
}
return GPOS_NEW(mp)
CExpression(mp, GPOS_NEW(mp) CScalarBoolOp(mp, eboolop), pdrgpexpr);
}
// checks if the given expression is likely to be simplified by the constraints
// framework during array conversion. eboolop is the CScalarBoolOp type
// of the expression which contains the argument expression
BOOL
CExpressionPreprocessor::FConvert2InIsConvertable(
CExpression *pexpr, CScalarBoolOp::EBoolOperator eboolopParent)
{
bool fConvertableExpression = false;
if (CPredicateUtils::FCompareIdentToConst(pexpr))
{
fConvertableExpression |=
IMDType::EcmptEq ==
CScalarCmp::PopConvert(pexpr->Pop())->ParseCmpType() &&
CScalarBoolOp::EboolopOr == eboolopParent;
fConvertableExpression |=
IMDType::EcmptNEq ==
CScalarCmp::PopConvert(pexpr->Pop())->ParseCmpType() &&
CScalarBoolOp::EboolopAnd == eboolopParent;
}
else if (CPredicateUtils::FCompareIdentToConstArray(pexpr) ||
CPredicateUtils::FCompareCastIdentToConstArray(pexpr))
{
fConvertableExpression = true;
}
if (fConvertableExpression)
{
GPOS_ASSERT(0 < pexpr->Arity());
CScalarIdent *pscid = nullptr;
if (CUtils::FScalarIdent((*pexpr)[0]))
{
pscid = CScalarIdent::PopConvert((*pexpr)[0]->Pop());
}
else
{
GPOS_ASSERT(CScalarIdent::FCastedScId((*pexpr)[0]));
pscid = CScalarIdent::PopConvert((*(*pexpr)[0])[0]->Pop());
}
if (!CUtils::FConstrainableType(pscid->MdidType()))
{
fConvertableExpression = false;
}
}
return fConvertableExpression;
}
// converts series of AND or OR comparisons into array IN expressions. For
// example, x = 1 OR x = 2 will convert to x IN (1,2). This stage assumes
// the expression has been unnested using CExpressionUtils::PexprUnnest.
CExpression *
CExpressionPreprocessor::PexprConvert2In(
CMemoryPool *mp,
CExpression *pexpr // does not take ownership
)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
if (CPredicateUtils::FOr(pexpr) || CPredicateUtils::FAnd(pexpr))
{
// the bool op type of this node
CScalarBoolOp::EBoolOperator eboolop =
CScalarBoolOp::PopConvert(pop)->Eboolop();
// derive constraints on all of the simple scalar children
// and add them to a new AND or OR expression
CExpressionArray *pdrgpexprCollapse = GPOS_NEW(mp) CExpressionArray(mp);
CExpressionArray *pdrgpexprRemainder =
GPOS_NEW(mp) CExpressionArray(mp);
const ULONG arity = pexpr->Arity();
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild = (*pexpr)[ul];
if (FConvert2InIsConvertable(pexprChild, eboolop))
{
pexprChild->AddRef();
pdrgpexprCollapse->Append(pexprChild);
}
else
{
// recursively convert the remainder and add to the array
pdrgpexprRemainder->Append(PexprConvert2In(mp, pexprChild));
}
}
if (0 != pdrgpexprCollapse->Size())
{
// create the constraint, rederive the collapsed expression
// add the new derived expr to remainder
CColRefSetArray *colref_array = nullptr;
pop->AddRef();
CAutoRef<CExpression> apexprPreCollapse(
GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprCollapse));
CAutoRef<CConstraint> apcnst(CConstraint::PcnstrFromScalarExpr(
mp, apexprPreCollapse.Value(), &colref_array));
GPOS_ASSERT(nullptr != apcnst.Value());
CExpression *pexprPostCollapse = apcnst->PexprScalar(mp);
pexprPostCollapse->AddRef();
pdrgpexprRemainder->Append(pexprPostCollapse);
CRefCount::SafeRelease(colref_array);
}
else
{
pdrgpexprCollapse->Release();
}
GPOS_ASSERT(0 < pdrgpexprRemainder->Size());
return PexprScalarBoolOpConvert2In(mp, eboolop, pdrgpexprRemainder);
}
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
CExpressionArray *pdrgexprChildren = pexpr->PdrgPexpr();
for (ULONG ul = 0; ul < pexpr->Arity(); ul++)
{
pdrgpexpr->Append(PexprConvert2In(mp, (*pdrgexprChildren)[ul]));
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexpr);
}
// collapse cascaded inner and left outer joins into NAry-joins
CExpression *
CExpressionPreprocessor::PexprCollapseJoins(CMemoryPool *mp, CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
const ULONG arity = pexpr->Arity();
if (CPredicateUtils::FInnerOrNAryJoin(pexpr) ||
(GPOS_FTRACE(EopttraceEnableLOJInNAryJoin) &&
CPredicateUtils::FLeftOuterJoin(pexpr)))
{
CExpressionArray *newChildNodes = GPOS_NEW(mp) CExpressionArray(mp);
ULongPtrArray *lojChildPredIndexes = GPOS_NEW(mp) ULongPtrArray(mp);
CExpressionArray *innerJoinPredicates =
GPOS_NEW(mp) CExpressionArray(mp);
CExpressionArray *lojPredicates = GPOS_NEW(mp) CExpressionArray(mp);
CollectJoinChildrenRecursively(mp, pexpr, newChildNodes,
lojChildPredIndexes, innerJoinPredicates,
lojPredicates);
if (lojPredicates->Size() > 0)
{
// each logical child must have an associated predicate index
GPOS_ASSERT(newChildNodes->Size() == lojChildPredIndexes->Size());
// this NAry join involves LOJs; create a CScalarNAryJoinPredList to hold
// the information which predicates are inner join preds and which ON predicates
// are associated with the LOJs' right children
CExpressionArray *naryJoinPredicates =
GPOS_NEW(mp) CExpressionArray(mp);
// create a new CScalarNAryJoinPredList as the last child of the NAry join
// the first child are all the inner join predicates
naryJoinPredicates->Append(
CPredicateUtils::PexprConjunction(mp, innerJoinPredicates));
// the remaining children are the LOJ predicates, one by one
for (ULONG ul = 0; ul < lojPredicates->Size(); ul++)
{
CExpression *predicate = (*lojPredicates)[ul];
predicate->AddRef();
naryJoinPredicates->Append(predicate);
}
CExpression *nAryJoinPredicateList = GPOS_NEW(mp)
CExpression(mp, GPOS_NEW(mp) CScalarNAryJoinPredList(mp),
naryJoinPredicates);
newChildNodes->Append(nAryJoinPredicateList);
// some sanity checks
// Example: t1 join t2 on p12 left outer join t3 on p23 join t4 on p24 left outer join t5 on p35
// results from this call:
// newChildNodes: [ t1, t2, t3, t4, t5 ]
// lojChildPredIndexes: [ 0, 0, 1, 0, 2 ] (one entry per logical leaf node)
// innerjoinPredicates: [ p12, p24 ] (all correspond to child pred index 0 (GPOPT_ZERO_INNER_JOIN_PRED_INDEX))
// lojPredicates: [ p23, p35 ] (p23 corresponds to child pred index 1, p35 corresponds to child pred index 2)
// the leftmost child must have a predicate index of
// GPOPT_ZERO_INNER_JOIN_PRED_INDEX, since it cannot be the right child of an LOJ
GPOS_ASSERT(GPOPT_ZERO_INNER_JOIN_PRED_INDEX ==
*(*lojChildPredIndexes)[0]);
#ifdef GPOS_DEBUG
// lojChildPredIndexes must contain the numbers 1 ... lojPredicates->Size()
// in ascending order, each number exactly once, with optional additional
// GPOPT_ZERO_INNER_JOIN_PRED_INDEX (0) entries in-between entries
ULONG highestNumberSeen = 0;
for (ULONG ix = 1; ix < lojChildPredIndexes->Size(); ix++)
{
ULONG nextNumber = *((*lojChildPredIndexes)[ix]);
if (nextNumber == highestNumberSeen + 1)
{
// child is right child of an LOJ
highestNumberSeen = nextNumber;
}
else
{
// if we don't see the next number for a child, it must
// be associated with the collective inner join predicates
GPOS_ASSERT(GPOPT_ZERO_INNER_JOIN_PRED_INDEX == nextNumber);
}
}
GPOS_ASSERT(highestNumberSeen == lojPredicates->Size());
#endif
}
else
{
// no LOJs involved, just add the ANDed preds as the scalar child
newChildNodes->Append(
CPredicateUtils::PexprConjunction(mp, innerJoinPredicates));
lojChildPredIndexes->Release();
lojChildPredIndexes = nullptr;
}
CExpression *pexprNAryJoin = GPOS_NEW(mp) CExpression(
mp, GPOS_NEW(mp) CLogicalNAryJoin(mp, lojChildPredIndexes),
newChildNodes);
COptimizerConfig *optimizer_config =
COptCtxt::PoctxtFromTLS()->GetOptimizerConfig();
ULONG ulJoinArityLimit =
optimizer_config->GetHint()
->UlJoinArityForAssociativityCommutativity();
// The last child of an n-ary join expression is the scalar expression
if (pexprNAryJoin->Arity() - 1 > ulJoinArityLimit)
{
GPOPT_DISABLE_XFORM(CXform::ExfJoinCommutativity);
GPOPT_DISABLE_XFORM(CXform::ExfJoinAssociativity);
}
lojPredicates->Release();
return pexprNAryJoin;
}
// current operator is not an inner-join or supported LOJ, recursively process children
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild = PexprCollapseJoins(mp, (*pexpr)[ul]);
pdrgpexprChildren->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// collect the children of a join backbone into an array of logical leaf
// nodes (leaves of the backbone, that is) and arrays of predicates, such
// that we can still associate the correct ON predicates to the children
void
CExpressionPreprocessor::CollectJoinChildrenRecursively(
CMemoryPool *mp, CExpression *pexpr, CExpressionArray *logicalLeafNodes,
ULongPtrArray *lojChildPredIndexes, CExpressionArray *innerJoinPredicates,
CExpressionArray *lojPredicates)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(pexpr->Pop()->FLogical());
if (CPredicateUtils::FInnerOrNAryJoin(pexpr))
{
const ULONG arity = pexpr->Arity();
CExpression *pexprScalar = (*pexpr)[arity - 1];
if (COperator::EopScalarNAryJoinPredList != pexprScalar->Pop()->Eopid())
{
for (ULONG ul = 0; ul < arity - 1; ul++)
{
CExpression *child = (*pexpr)[ul];
CollectJoinChildrenRecursively(
mp, child, logicalLeafNodes, lojChildPredIndexes,
innerJoinPredicates, lojPredicates);
}
innerJoinPredicates->Append(PexprCollapseJoins(mp, pexprScalar));
}
else
{
// we have collapsed this join before and it already has some non-inner join info,
// merge the existing and new lists
CLogicalNAryJoin *naryJoin =
CLogicalNAryJoin::PopConvert(pexpr->Pop());
ULongPtrArray *naryJoinPredIndexes =
naryJoin->GetLojChildPredIndexes();
// add all the inner join predicates
innerJoinPredicates->Append(
PexprCollapseJoins(mp, (*pexprScalar)[0]));
// loop over the logical children
for (ULONG ul = 0; ul < arity - 1; ul++)
{
if (GPOPT_ZERO_INNER_JOIN_PRED_INDEX ==
*(*naryJoinPredIndexes)[ul])
{
// inner join child, collapse recursively
CollectJoinChildrenRecursively(
mp, (*pexpr)[ul], logicalLeafNodes, lojChildPredIndexes,
innerJoinPredicates, lojPredicates);
}
else
{
// this is the right child of a non-inner join
ULONG oldPredIndex = *(*naryJoinPredIndexes)[ul];
CExpression *lojPred =
PexprCollapseJoins(mp, (*pexprScalar)[oldPredIndex]);
// don't collapse this child into our current join node
logicalLeafNodes->Append(
PexprCollapseJoins(mp, (*pexpr)[ul]));
lojPredicates->Append(lojPred);
ULONG newPredIndex = lojPredicates->Size();
lojChildPredIndexes->Append(GPOS_NEW(mp)
ULONG(newPredIndex));
}
}
}
}
else if (GPOS_FTRACE(EopttraceEnableLOJInNAryJoin) &&
CPredicateUtils::FLeftOuterJoin(pexpr))
{
GPOS_ASSERT(3 == pexpr->Arity());
CExpression *leftChild = (*pexpr)[0];
CExpression *rightChild = (*pexpr)[1];
CExpression *pexprScalar = (*pexpr)[2];
CollectJoinChildrenRecursively(mp, leftChild, logicalLeafNodes,
lojChildPredIndexes, innerJoinPredicates,
lojPredicates);
// stop collecting join children at the right child of the LOJ,
// just add the child, regardless of whether it is a join or not
logicalLeafNodes->Append(PexprCollapseJoins(mp, rightChild));
// create an entry in lojPredicates...
lojPredicates->Append(PexprCollapseJoins(mp, pexprScalar));
// ... and point to this new entry in lojChildPredIndexes
ULONG *indexOfThisLOJInTheArray =
GPOS_NEW(mp) ULONG(lojPredicates->Size());
lojChildPredIndexes->Append(indexOfThisLOJInTheArray);
}
else
{
// pexpr is not the right child of a supported LOJ and is not a supported join
logicalLeafNodes->Append(PexprCollapseJoins(mp, pexpr));
// this logical "leaf" node is a child of an inner join or it is the left child
// of an LOJ, either way it is associated with the inner join predicates
ULONG *innerJoinPredIndex =
GPOS_NEW(mp) ULONG(GPOPT_ZERO_INNER_JOIN_PRED_INDEX);
lojChildPredIndexes->Append(innerJoinPredIndex);
}
}
// collapse cascaded logical project operators
CExpression *
CExpressionPreprocessor::PexprCollapseProjects(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
const ULONG arity = pexpr->Arity();
// recursively process children
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild = PexprCollapseProjects(mp, (*pexpr)[ul]);
pdrgpexpr->Append(pexprChild);
}
COperator *pop = pexpr->Pop();
pop->AddRef();
CExpression *pexprNew = GPOS_NEW(mp) CExpression(mp, pop, pdrgpexpr);
CExpression *pexprCollapsed = CUtils::PexprCollapseProjects(mp, pexprNew);
if (nullptr == pexprCollapsed)
{
return pexprNew;
}
pexprNew->Release();
return pexprCollapsed;
}
// insert dummy project element below scalar subquery when the (a) the scalar
// subquery is below a project and (b) output column is an outer reference
CExpression *
CExpressionPreprocessor::PexprProjBelowSubquery(CMemoryPool *mp,
CExpression *pexpr,
BOOL fUnderPrList)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
/*
* Consider the following subquery:
* SELECT (SELECT foo.b from bar) FROM foo
* If bar is empty we should return null.
*
* For this query during DXL->Expr translation, the project element
* (SELECT b FROM bar) is represented as scalar subquery that returns
* an output column. To ensure that this scalar subquery under the project
* operator is returned when bar (or an arbitrary tree instead of bar)
* we insert a dummy project element that points to FOO.b under the
* scalar subquery. This dummy project element prevents its incorrect
* transformation into a non-correlated plan.
*
* One of the reasons we add this dummy project is to force the subquery
* handler transformation to not produce a de-correlated plan
* for queries such as this.
*
* We want to limit the of such introduction dummy projects only when the
* following conditions are all satisfied:
* a) The scalar subquery is in the project element scalar tree
* Another use case: SELECT (SELECT foo.b from bar) + 1 FROM foo
* b) The output of the scalar subquery is the column from the outer expression.
* Consider the query: SELECT (SELECT foo.b + 5 from bar) FROM foo. In such cases,
* since the foo.b + 5 is a new computed column inside the subquery with its own
* project element, we do not need to add anything.
*/
BOOL fUnderPrListChild = fUnderPrList;
COperator *pop = pexpr->Pop();
if (pop->FLogical())
{
if (COperator::EopLogicalProject == pop->Eopid())
{
CExpression *pexprRel = (*pexpr)[0];
CExpression *pexprRelNew =
PexprProjBelowSubquery(mp, pexprRel, false /* fUnderPrList */);
CExpression *pexprPrList = (*pexpr)[1];
CExpression *pexprPrListNew = PexprProjBelowSubquery(
mp, pexprPrList, true /* fUnderPrList */);
return GPOS_NEW(mp)
CExpression(mp, GPOS_NEW(mp) CLogicalProject(mp), pexprRelNew,
pexprPrListNew);
}
fUnderPrListChild = false;
}
else if (COperator::EopScalarSubquery == pop->Eopid() && fUnderPrList)
{
CExpression *pexprRel = (*pexpr)[0];
CExpression *pexprRelNew =
PexprProjBelowSubquery(mp, pexprRel, false /* fUnderPrList */);
const CColRefSet *prcsOutput = pexprRelNew->DeriveOutputColumns();
const CColRef *pcrSubquery = CScalarSubquery::PopConvert(pop)->Pcr();
if (nullptr != prcsOutput && !prcsOutput->FMember(pcrSubquery))
{
CColumnFactory *col_factory = COptCtxt::PoctxtFromTLS()->Pcf();
CColRef *pcrNewSubquery = col_factory->PcrCreate(
pcrSubquery->RetrieveType(), pcrSubquery->TypeModifier());
CExpression *pexprPrEl = CUtils::PexprScalarProjectElement(
mp, pcrNewSubquery, CUtils::PexprScalarIdent(mp, pcrSubquery));
CExpression *pexprProjList = GPOS_NEW(mp)
CExpression(mp, GPOS_NEW(mp) CScalarProjectList(mp), pexprPrEl);
CExpression *pexprProj =
GPOS_NEW(mp) CExpression(mp, GPOS_NEW(mp) CLogicalProject(mp),
pexprRelNew, pexprProjList);
CScalarSubquery *popSubq = GPOS_NEW(mp)
CScalarSubquery(mp, pcrNewSubquery, false /*fGeneratedByExist*/,
false /*fGeneratedByQuantified*/);
CExpression *pexprResult =
GPOS_NEW(mp) CExpression(mp, popSubq, pexprProj);
return pexprResult;
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pexprRelNew);
}
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
const ULONG arity = pexpr->Arity();
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild =
PexprProjBelowSubquery(mp, (*pexpr)[ul], fUnderPrListChild);
pdrgpexpr->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexpr);
}
// collapse cascaded union/union all into an NAry union/union all operator
CExpression *
CExpressionPreprocessor::PexprCollapseUnionUnionAll(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
const ULONG arity = pexpr->Arity();
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
// recursively process children
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild = PexprCollapseUnionUnionAll(mp, (*pexpr)[ul]);
pdrgpexpr->Append(pexprChild);
}
pop->AddRef();
CExpression *pexprNew = GPOS_NEW(mp) CExpression(mp, pop, pdrgpexpr);
if (!CPredicateUtils::FUnionOrUnionAll(pexprNew))
{
return pexprNew;
}
// array of input children and its column references
CExpressionArray *pdrgpexprNew = GPOS_NEW(mp) CExpressionArray(mp);
CColRef2dArray *pdrgdrgpcrOrig =
CLogicalSetOp::PopConvert(pop)->PdrgpdrgpcrInput();
CColRef2dArray *pdrgdrgpcrNew = GPOS_NEW(mp) CColRef2dArray(mp);
BOOL fCollapsed = false;
for (ULONG ul = 0; ul < arity; ul++)
{
if (CPredicateUtils::FCollapsibleChildUnionUnionAll(pexprNew, ul))
{
fCollapsed = true;
CPredicateUtils::CollectGrandChildrenUnionUnionAll(
mp, pexprNew, ul, pdrgpexprNew, pdrgdrgpcrNew);
}
else
{
CExpression *pexprChild = (*pexprNew)[ul];
pexprChild->AddRef();
pdrgpexprNew->Append(pexprChild);
CColRefArray *pdrgpcrInput = (*pdrgdrgpcrOrig)[ul];
pdrgpcrInput->AddRef();
pdrgdrgpcrNew->Append(pdrgpcrInput);
}
}
if (!fCollapsed)
{
// clean up
pdrgdrgpcrNew->Release();
pdrgpexprNew->Release();
return pexprNew;
}
COperator *popNew = nullptr;
CColRefArray *pdrgpcrOutput =
CLogicalSetOp::PopConvert(pop)->PdrgpcrOutput();
pdrgpcrOutput->AddRef();
if (pop->Eopid() == COperator::EopLogicalUnion)
{
popNew = GPOS_NEW(mp) CLogicalUnion(mp, pdrgpcrOutput, pdrgdrgpcrNew);
}
else
{
GPOS_ASSERT(pop->Eopid() == COperator::EopLogicalUnionAll);
popNew =
GPOS_NEW(mp) CLogicalUnionAll(mp, pdrgpcrOutput, pdrgdrgpcrNew);
}
// clean up
pexprNew->Release();
return GPOS_NEW(mp) CExpression(mp, popNew, pdrgpexprNew);
}
// transform outer joins into inner joins
CExpression *
CExpressionPreprocessor::PexprOuterJoinToInnerJoin(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
const ULONG arity = pexpr->Arity();
if (COperator::EopLogicalSelect == pop->Eopid() &&
COperator::EopLogicalLeftOuterJoin == (*pexpr)[0]->Pop()->Eopid())
{
// a Select on top of LOJ can be turned into InnerJoin by normalization
return CNormalizer::PexprNormalize(mp, pexpr);
}
if (CPredicateUtils::FInnerOrNAryJoin(pexpr))
{
// the predicates of an inner join on top of outer join can be used to turn the child outer join into another inner join
CExpression *pexprScalar = (*pexpr)[arity - 1];
if (COperator::EopScalarNAryJoinPredList == pexprScalar->Pop()->Eopid())
{
// since we have ScalarNAryJoinPredList, it means we have already
// converted all possible LOJs to Inner Joins and collapsed them
pexpr->AddRef();
return pexpr;
}
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild = (*pexpr)[ul];
BOOL fNewChild = false;
if (COperator::EopLogicalLeftOuterJoin ==
pexprChild->Pop()->Eopid())
{
CColRefSet *pcrsLOJInnerOutput =
(*pexprChild)[1]->DeriveOutputColumns();
if (!GPOS_FTRACE(EopttraceDisableOuterJoin2InnerJoinRewrite) &&
CPredicateUtils::FNullRejecting(mp, pexprScalar,
pcrsLOJInnerOutput))
{
CExpression *pexprNewOuter =
PexprOuterJoinToInnerJoin(mp, (*pexprChild)[0]);
CExpression *pexprNewInner =
PexprOuterJoinToInnerJoin(mp, (*pexprChild)[1]);
CExpression *pexprNewScalar =
PexprOuterJoinToInnerJoin(mp, (*pexprChild)[2]);
pexprChild = CUtils::PexprLogicalJoin<CLogicalInnerJoin>(
mp, pexprNewOuter, pexprNewInner, pexprNewScalar);
fNewChild = true;
}
}
// Consider the following join tree:
// +--CLogicalNAryJoin
// |--CLogicalLeftOuterJoin
// | |--CLogicalLeftOuterJoin
// | | |--CLogicalGet "t1"
// | | |--CLogicalGet "t2"
// | |--CLogicalGet "t3"
// |--CLogicalGet "t4"
//
// If the predicate between the CLogicalNAryJoin and first CLogicalLeftOuterJoin
// is NULL rejecting, we convert the left join to an inner join and create a new
// expression. Then the modified tree would be:
//
// +--CLogicalNAryJoin
// |--CLogicalLeftOuterJoin
// | |--CLogicalGet "t1"
// | |--CLogicalGet "t2"
// |--CLogicalGet "t3"
// |--CLogicalGet "t4"
//
// Note that we can still convert the second CLogicalLeftOuterJoin into an inner join
// if the predicate between the CLogicalNAryJoin is NULL rejecting. So we need to recurse
// into the child and continue checking if we can convert the LOJs into inner joins.
CExpression *pexprChildNew =
PexprOuterJoinToInnerJoin(mp, pexprChild);
if (fNewChild)
{
pexprChild->Release();
}
pdrgpexprChildren->Append(pexprChildNew);
}
return GPOS_NEW(mp) CExpression(mp, GPOS_NEW(mp) CLogicalNAryJoin(mp),
pdrgpexprChildren);
}
// current operator is not an NAry-join, recursively process children
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild = PexprOuterJoinToInnerJoin(mp, (*pexpr)[ul]);
pdrgpexprChildren->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// generate n*(n-1)/2 equality predicates, up to GPOPT_MAX_DERIVED_PREDS, between
// the n columns in the given equivalence class (set),
CExpression *
CExpressionPreprocessor::PexprConjEqualityPredicates(CMemoryPool *mp,
CColRefSet *pcrs)
{
GPOS_ASSERT(nullptr != pcrs);
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
ULONG ulPreds = 0;
CColRefSetIter crsiRight(*pcrs);
while (crsiRight.Advance() && GPOPT_MAX_DERIVED_PREDS > ulPreds)
{
CColRef *pcrRight = crsiRight.Pcr();
CColRefSetIter crsiLeft(*pcrs);
while (crsiLeft.Advance() && GPOPT_MAX_DERIVED_PREDS > ulPreds)
{
CColRef *pcrLeft = crsiLeft.Pcr();
if (pcrLeft == pcrRight)
{
break;
}
pdrgpexpr->Append(CUtils::PexprScalarEqCmp(mp, pcrLeft, pcrRight));
ulPreds++;
}
}
return CPredicateUtils::PexprConjunction(mp, pdrgpexpr);
}
// check if all columns in the given equivalent class come from one of the
// children of the given expression
BOOL
CExpressionPreprocessor::FEquivClassFromChild(CColRefSet *pcrs,
CExpression *pexpr)
{
GPOS_ASSERT(nullptr != pcrs);
GPOS_ASSERT(nullptr != pexpr);
const ULONG ulChildren = pexpr->Arity();
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild = (*pexpr)[ul];
if (!pexprChild->Pop()->FLogical())
{
continue;
}
CColRefSetArray *pdrgpcrs =
pexprChild->DerivePropertyConstraint()->PdrgpcrsEquivClasses();
if (pcrs->FContained(pdrgpcrs))
{
return true;
}
}
return false;
}
// additional equality predicates are generated based on the equivalence
// classes in the constraint properties of the expression.
CExpression *
CExpressionPreprocessor::PexprAddEqualityPreds(CMemoryPool *mp,
CExpression *pexpr,
CColRefSet *pcrsProcessed)
{
GPOS_ASSERT(nullptr != pcrsProcessed);
GPOS_ASSERT(nullptr != pexpr);
GPOS_ASSERT(pexpr->Pop()->FLogical());
const ULONG ulChildren = pexpr->Arity();
CPropConstraint *ppc = pexpr->DerivePropertyConstraint();
CExpression *pexprPred = nullptr;
COperator *pop = pexpr->Pop();
if (CUtils::FLogicalDML(pop))
{
pexprPred = CUtils::PexprScalarConstBool(mp, true);
}
else
{
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
CColRefSetArray *pdrgpcrs = ppc->PdrgpcrsEquivClasses();
GPOS_ASSERT(nullptr != pdrgpcrs);
const ULONG ulEquivClasses = pdrgpcrs->Size();
for (ULONG ul = 0; ul < ulEquivClasses; ul++)
{
CColRefSet *pcrsEquivClass = (*pdrgpcrs)[ul];
CColRefSet *pcrsEquality = GPOS_NEW(mp) CColRefSet(mp);
pcrsEquality->Include(pcrsEquivClass);
pcrsEquality->Exclude(pcrsProcessed);
// if equivalence class comes from any of the children, then skip it
if (FEquivClassFromChild(pcrsEquality, pexpr))
{
pcrsEquality->Release();
continue;
}
CExpression *pexprEquality =
PexprConjEqualityPredicates(mp, pcrsEquality);
pcrsProcessed->Include(pcrsEquality);
pcrsEquality->Release();
pdrgpexpr->Append(pexprEquality);
}
pexprPred = CPredicateUtils::PexprConjunction(mp, pdrgpexpr);
}
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild = (*pexpr)[ul];
if (pexprChild->Pop()->FLogical())
{
CExpression *pexprChildNew =
PexprAddEqualityPreds(mp, pexprChild, pcrsProcessed);
pdrgpexprChildren->Append(pexprChildNew);
}
else
{
pexprChild->AddRef();
pdrgpexprChildren->Append(pexprChild);
}
}
pop->AddRef();
return CUtils::PexprSafeSelect(
mp, GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren), pexprPred);
}
// generate predicates for the given set of columns based on the given
// constraint property. Columns for which predicates are generated will be
// added to the set of processed columns
CExpression *
CExpressionPreprocessor::PexprScalarPredicates(
CMemoryPool *mp, CPropConstraint *ppc,
CPropConstraint *constraintsForOuterRefs, CColRefSet *pcrsNotNull,
CColRefSet *pcrs, CColRefSet *pcrsProcessed)
{
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
CColRefSetIter crsi(*pcrs);
while (crsi.Advance())
{
CColRef *colref = crsi.Pcr();
CExpression *pexprScalar = ppc->PexprScalarMappedFromEquivCols(
mp, colref, constraintsForOuterRefs);
if (nullptr == pexprScalar)
{
continue;
}
// do not add a NOT NULL predicate if column is not nullable or if it
// already has another predicate on it
if (CUtils::FScalarNotNull(pexprScalar) &&
(pcrsNotNull->FMember(colref) ||
ppc->Pcnstr()->FConstraint(colref)))
{
pexprScalar->Release();
continue;
}
pdrgpexpr->Append(pexprScalar);
// set the column `processed` after generate new predicate on this column.
pcrsProcessed->Include(colref);
}
if (0 == pdrgpexpr->Size())
{
pdrgpexpr->Release();
return nullptr;
}
return CPredicateUtils::PexprConjunction(mp, pdrgpexpr);
}
// process scalar expressions for generating additional predicates based on
// derived constraints. This function is needed because scalar expressions
// can have relational children when there are subqueries
CExpression *
CExpressionPreprocessor::PexprFromConstraintsScalar(
CMemoryPool *mp, CExpression *pexpr,
CPropConstraint *constraintsForOuterRefs)
{
GPOS_ASSERT(nullptr != pexpr);
GPOS_ASSERT(pexpr->Pop()->FScalar());
if (!CUtils::FHasSubquery(pexpr))
{
pexpr->AddRef();
return pexpr;
}
const ULONG ulChildren = pexpr->Arity();
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild = (*pexpr)[ul];
if (pexprChild->Pop()->FScalar())
{
pexprChild = PexprFromConstraintsScalar(mp, pexprChild,
constraintsForOuterRefs);
}
else
{
GPOS_ASSERT(pexprChild->Pop()->FLogical());
CColRefSet *pcrsProcessed = GPOS_NEW(mp) CColRefSet(mp);
pexprChild = PexprFromConstraints(mp, pexprChild, pcrsProcessed,
constraintsForOuterRefs);
pcrsProcessed->Release();
}
pdrgpexprChildren->Append(pexprChild);
}
COperator *pop = pexpr->Pop();
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// Imply new predicates on LOJ's inner child based on constraints derived
// from LOJ's outer child and join predicate
CExpression *
CExpressionPreprocessor::PexprWithImpliedPredsOnLOJInnerChild(
CMemoryPool *mp, CExpression *pexprLOJ,
BOOL *
pfAddedPredicates // output: set to True if new predicates are added to inner child
)
{
GPOS_ASSERT(nullptr != pexprLOJ);
GPOS_ASSERT(nullptr != pfAddedPredicates);
GPOS_ASSERT(COperator::EopLogicalLeftOuterJoin == pexprLOJ->Pop()->Eopid());
CExpression *pexprOuter = (*pexprLOJ)[0];
CExpression *pexprInner = (*pexprLOJ)[1];
CExpression *pexprOuterJoinPred = (*pexprLOJ)[2];
// merge children constraints with constraints derived from join's predicate
CExpressionHandle exprhdl(mp);
exprhdl.Attach(pexprLOJ);
CPropConstraint *ppc =
CLogical::PpcDeriveConstraintFromPredicates(mp, exprhdl);
// use the computed constraint to derive a scalar predicate on the inner child
CColRefSet *pcrsInnerOutput = pexprInner->DeriveOutputColumns();
CColRefSet *pcrsInnerNotNull = pexprInner->DeriveNotNullColumns();
// generate a scalar predicate from the computed constraint, restricted to LOJ inner child
CColRefSet *pcrsProcessed = GPOS_NEW(mp) CColRefSet(mp);
CExpression *pexprPred =
PexprScalarPredicates(mp, ppc, nullptr /*no outer refs*/,
pcrsInnerNotNull, pcrsInnerOutput, pcrsProcessed);
pcrsProcessed->Release();
ppc->Release();
pexprInner->AddRef();
if (nullptr != pexprPred && !CUtils::FScalarConstTrue(pexprPred))
{
// if a new predicate was added, set the output flag to True
*pfAddedPredicates = true;
pexprPred->AddRef();
CExpression *pexprSelect =
CUtils::PexprLogicalSelect(mp, pexprInner, pexprPred);
CExpression *pexprInnerNormalized =
CNormalizer::PexprNormalize(mp, pexprSelect);
pexprSelect->Release();
pexprInner = pexprInnerNormalized;
}
CRefCount::SafeRelease(pexprPred);
// recursively process inner child
CExpression *pexprNewInner =
PexprOuterJoinInferPredsFromOuterChildToInnerChild(mp, pexprInner,
pfAddedPredicates);
pexprInner->Release();
// recursively process outer child
CExpression *pexprNewOuter =
PexprOuterJoinInferPredsFromOuterChildToInnerChild(mp, pexprOuter,
pfAddedPredicates);
pexprOuterJoinPred->AddRef();
COperator *pop = pexprLOJ->Pop();
pop->AddRef();
return GPOS_NEW(mp)
CExpression(mp, pop, pexprNewOuter, pexprNewInner, pexprOuterJoinPred);
}
// Infer predicate from outer child to inner child of the outer join,
//
// for LOJ expressions with predicates on outer child, e.g.,
//
// +-LOJ(x=y)
// |---Select(x=5)
// | +----X
// +----Y
//
// this function implies an equivalent predicate (y=5) on the inner child of LOJ:
//
// +-LOJ(x=y)
// |---Select(x=5)
// | +----X
// +---Select(y=5)
// +----Y
//
// the correctness of this rewrite can be proven as follows:
// - By removing all tuples from Y that do not satisfy (y=5), the LOJ
// results, where x=y, are retained. The reason is that any such join result
// must satisfy (x=5 ^ x=y) which implies that (y=5).
//
// - LOJ results that correspond to tuples from X not joining with any tuple
// from Y are also retained. The reason is that such join results can only be
// produced if for all tuples in Y, we have (y!=5). By selecting Y tuples where (y=5),
// if we end up with no Y tuples, the LOJ results will be generated by joining X with empty Y.
// This is the same as joining with all tuples from Y with (y!=5). If we end up with
// any tuple in Y satisfying (y=5), no LOJ results corresponding to X tuples not joining
// with Y can be produced.
//
// to implement this rewrite in a general form, we need to imply general constraints on
// LOJ's inner child from constraints that exist on LOJ's outer child. The generated predicate
// from this inference can only be inserted below LOJ (on top of the inner child), and cannot be
// inserted on top of LOJ, otherwise we may wrongly convert LOJ to inner-join.
CExpression *
CExpressionPreprocessor::PexprOuterJoinInferPredsFromOuterChildToInnerChild(
CMemoryPool *mp, CExpression *pexpr,
BOOL *
pfAddedPredicates // output: set to True if new predicates are added to inner child
)
{
GPOS_ASSERT(nullptr != pexpr);
GPOS_ASSERT(nullptr != pfAddedPredicates);
COperator *pop = pexpr->Pop();
if (COperator::EopLogicalLeftOuterJoin == pop->Eopid())
{
return PexprWithImpliedPredsOnLOJInnerChild(mp, pexpr,
pfAddedPredicates);
}
// not an outer join, process children recursively
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
const ULONG ulChildren = pexpr->Arity();
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild =
PexprOuterJoinInferPredsFromOuterChildToInnerChild(
mp, (*pexpr)[ul], pfAddedPredicates);
pdrgpexpr->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexpr);
}
// additional predicates are generated based on the derived constraint
// properties of the expression. No predicates are generated for the columns
// in the already processed set. This set is expanded with more columns
// that get processed along the way
CExpression *
CExpressionPreprocessor::PexprFromConstraints(
CMemoryPool *mp, CExpression *pexpr, CColRefSet *pcrsProcessed,
CPropConstraint *constraintsForOuterRefs)
{
GPOS_ASSERT(nullptr != pcrsProcessed);
GPOS_ASSERT(nullptr != pexpr);
GPOS_ASSERT(pexpr->Pop()->FLogical());
const ULONG ulChildren = pexpr->Arity();
CPropConstraint *ppc = pexpr->DerivePropertyConstraint();
CColRefSet *pcrsNotNull = pexpr->DeriveNotNullColumns();
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild = (*pexpr)[ul];
if (pexprChild->Pop()->FScalar())
{
// we could potentially combine constraintsForOuterRefs and ppc,
// but for now just pass ppc, the constraints from the direct ancestor
// of the subquery
pexprChild = PexprFromConstraintsScalar(mp, pexprChild, ppc);
pdrgpexprChildren->Append(pexprChild);
continue;
}
// process child
CExpression *pexprChildNew = PexprFromConstraints(
mp, pexprChild, pcrsProcessed, constraintsForOuterRefs);
CColRefSet *pcrsOutChild = GPOS_NEW(mp) CColRefSet(mp);
// output columns on which predicates must be inferred
pcrsOutChild->Include(pexprChild->DeriveOutputColumns());
// exclude column references on which predicates had been already inferred,
// this avoids generating duplicate predicates on the parent node if a
// predicate has already been placed on the child.
pcrsOutChild->Exclude(pcrsProcessed);
// generate predicates for the output columns of child
CExpression *pexprPred =
PexprScalarPredicates(mp, ppc, constraintsForOuterRefs, pcrsNotNull,
pcrsOutChild, pcrsProcessed);
pcrsOutChild->Release();
if (nullptr != pexprPred)
{
pdrgpexprChildren->Append(
CUtils::PexprSafeSelect(mp, pexprChildNew, pexprPred));
}
else
{
pdrgpexprChildren->Append(pexprChildNew);
}
}
COperator *pop = pexpr->Pop();
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// eliminate subtrees that have a zero output cardinality, replacing them
// with a const table get with the same output schema and zero tuples
CExpression *
CExpressionPreprocessor::PexprPruneEmptySubtrees(CMemoryPool *mp,
CExpression *pexpr)
{
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
if (pop->FLogical() && !CUtils::FLogicalDML(pop))
{
// if maxcard = 0: return a const table get with same output columns and zero tuples
if (0 == pexpr->DeriveMaxCard())
{
// output columns
CColRefArray *colref_array =
pexpr->DeriveOutputColumns()->Pdrgpcr(mp);
// empty output data
IDatum2dArray *pdrgpdrgpdatum = GPOS_NEW(mp) IDatum2dArray(mp);
COperator *popCTG = GPOS_NEW(mp)
CLogicalConstTableGet(mp, colref_array, pdrgpdrgpdatum);
return GPOS_NEW(mp) CExpression(mp, popCTG);
}
}
// process children
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
const ULONG ulChildren = pexpr->Arity();
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild = PexprPruneEmptySubtrees(mp, (*pexpr)[ul]);
pdrgpexpr->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexpr);
}
// eliminate CTE Anchors for CTEs that have zero consumers
CExpression *
CExpressionPreprocessor::PexprRemoveUnusedCTEs(CMemoryPool *mp,
CExpression *pexpr)
{
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
if (COperator::EopLogicalCTEAnchor == pop->Eopid())
{
ULONG id = CLogicalCTEAnchor::PopConvert(pop)->Id();
if (!COptCtxt::PoctxtFromTLS()->Pcteinfo()->FUsed(id))
{
GPOS_ASSERT(1 == pexpr->Arity());
return PexprRemoveUnusedCTEs(mp, (*pexpr)[0]);
}
}
// process children
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
const ULONG ulChildren = pexpr->Arity();
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild = PexprRemoveUnusedCTEs(mp, (*pexpr)[ul]);
pdrgpexpr->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexpr);
}
// for all consumers of the same CTE, collect all selection predicates
// on top of these consumers, if any, and store them in hash map
void
CExpressionPreprocessor::CollectCTEPredicates(CMemoryPool *mp,
CExpression *pexpr,
CTEPredsMap *phm)
{
GPOS_CHECK_STACK_SIZE;
if (COperator::EopLogicalSelect == pexpr->Pop()->Eopid() &&
COperator::EopLogicalCTEConsumer == (*pexpr)[0]->Pop()->Eopid() &&
0 == pexpr->DeriveOuterReferences()
->Size() // no outer references in selection predicate
)
{
CExpression *pexprScalar = (*pexpr)[1];
if (!pexprScalar->DeriveHasSubquery())
{
CExpression *pexprChild = (*pexpr)[0];
CLogicalCTEConsumer *popConsumer =
CLogicalCTEConsumer::PopConvert(pexprChild->Pop());
ULONG ulCTEId = popConsumer->UlCTEId();
CExpression *pexprProducer =
COptCtxt::PoctxtFromTLS()->Pcteinfo()->PexprCTEProducer(
ulCTEId);
GPOS_ASSERT(nullptr != pexprProducer);
CLogicalCTEProducer *popProducer =
CLogicalCTEProducer::PopConvert(pexprProducer->Pop());
UlongToColRefMap *colref_mapping = CUtils::PhmulcrMapping(
mp, popConsumer->Pdrgpcr(), popProducer->Pdrgpcr());
CExpression *pexprRemappedScalar =
pexprScalar->PexprCopyWithRemappedColumns(mp, colref_mapping,
true /*must_exist*/);
colref_mapping->Release();
CExpressionArray *pdrgpexpr = phm->Find(&ulCTEId);
if (nullptr == pdrgpexpr)
{
pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
BOOL fInserted GPOS_ASSERTS_ONLY =
phm->Insert(GPOS_NEW(mp) ULONG(ulCTEId), pdrgpexpr);
GPOS_ASSERT(fInserted);
}
pdrgpexpr->Append(pexprRemappedScalar);
}
}
// process children recursively
const ULONG ulChildren = pexpr->Arity();
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CollectCTEPredicates(mp, (*pexpr)[ul], phm);
}
}
// add CTE predicates collected from consumers to producer expressions
void
CExpressionPreprocessor::AddPredsToCTEProducers(CMemoryPool *mp,
CExpression *pexpr)
{
CTEPredsMap *phm = GPOS_NEW(mp) CTEPredsMap(mp);
CollectCTEPredicates(mp, pexpr, phm);
CCTEInfo *pcteinfo = COptCtxt::PoctxtFromTLS()->Pcteinfo();
CTEPredsMapIter mi(phm);
while (mi.Advance())
{
ULONG ulCTEId = *(mi.Key());
CExpression *pexprProducer = pcteinfo->PexprCTEProducer(ulCTEId);
GPOS_ASSERT(nullptr != pexprProducer);
ULONG ulConsumers = pcteinfo->UlConsumers(ulCTEId);
CExpressionArray *pdrgpexpr =
const_cast<CExpressionArray *>(mi.Value());
// skip the propagation of predicate contains volatile function e.g. random() (value change within a scan)
if (CPredicateUtils::FContainsVolatileFunction(pdrgpexpr))
{
continue;
}
if (0 < ulConsumers && pdrgpexpr->Size() == ulConsumers)
{
// add new predicate to CTE producer only if all consumers have selection predicates,
// for example, in the following query
// 'with v as (select * from A) select * from v where a > 5 union select * from v where b > 5'
// we add the new predicate '(a > 5 or b > 5)' to CTE producer expression,
// while for the following query
// 'with v as (select * from A) select * from v where a > 5 union select * from v'
// we do not add any new predicates to CTE producer expression
pdrgpexpr->AddRef();
CExpression *pexprPred =
CPredicateUtils::PexprDisjunction(mp, pdrgpexpr);
(*pexprProducer)[0]->AddRef();
CExpression *pexprSelect =
CUtils::PexprLogicalSelect(mp, (*pexprProducer)[0], pexprPred);
COperator *pop = pexprProducer->Pop();
pop->AddRef();
CExpression *pexprNewProducer =
GPOS_NEW(mp) CExpression(mp, pop, pexprSelect);
pcteinfo->ReplaceCTEProducer(pexprNewProducer);
pexprNewProducer->Release();
}
}
phm->Release();
}
// derive constraints on given expression tree, and add new predicates by implication
CExpression *
CExpressionPreprocessor::PexprAddPredicatesFromConstraints(CMemoryPool *mp,
CExpression *pexpr)
{
// normalize the tree, push down predicates (since we infer predicates bottom-up,
// we want the predicates/constraints to be at the lowest possible point in the tree)
CExpression *pexprNormalized = CNormalizer::PexprNormalize(mp, pexpr);
// walk the tree and generate additional predicates from constraint properties
// based on equivalence classes, e.g. constraint a=1 and equiv class {a,b} adds pred b=1
CColRefSet *pcrsProcessed = GPOS_NEW(mp) CColRefSet(mp);
CExpression *pexprConstraints =
PexprFromConstraints(mp, pexprNormalized, pcrsProcessed, nullptr);
GPOS_CHECK_ABORT;
pexprNormalized->Release();
pcrsProcessed->Release();
// walk the tree again and generate equality predicates for columns in
// equivalence classes, e.g. {cr1,cr2,cr3} results in cr1=cr2 and cr1=cr3 and cr2=cr3
pcrsProcessed = GPOS_NEW(mp) CColRefSet(mp);
CExpression *pexprAddEqualityPreds =
PexprAddEqualityPreds(mp, pexprConstraints, pcrsProcessed);
// normalize the tree, push down predicates
CExpression *pexprEqualityNormalized =
CNormalizer::PexprNormalize(mp, pexprAddEqualityPreds);
GPOS_CHECK_ABORT;
pcrsProcessed->Release();
pexprConstraints->Release();
pexprAddEqualityPreds->Release();
// remove generated duplicate predicates
CExpression *pexprDeduped =
CExpressionUtils::PexprDedupChildren(mp, pexprEqualityNormalized);
pexprEqualityNormalized->Release();
return pexprDeduped;
}
// driver for inferring predicates from constraints
CExpression *
CExpressionPreprocessor::PexprInferPredicates(CMemoryPool *mp,
CExpression *pexpr)
{
GPOS_ASSERT(nullptr != pexpr);
// generate new predicates from constraint properties and normalize the result
CExpression *pexprWithPreds = PexprAddPredicatesFromConstraints(mp, pexpr);
// infer predicates from outer child to inner child of outer join
BOOL fNewPreds = false;
CExpression *pexprInferredPreds =
PexprOuterJoinInferPredsFromOuterChildToInnerChild(mp, pexprWithPreds,
&fNewPreds);
pexprWithPreds->Release();
pexprWithPreds = pexprInferredPreds;
if (fNewPreds)
{
// if we succeeded in generating new predicates below outer join, we need to
// re-derive constraints to generate any other potential predicates
pexprWithPreds =
PexprAddPredicatesFromConstraints(mp, pexprInferredPreds);
pexprInferredPreds->Release();
}
return pexprWithPreds;
}
// Workhorse for pruning unused computed columns
//
// The required columns passed by the query is passed to this pre-processing
// stage and the list of columns are copied to a new list. This driver function
// calls the PexprPruneUnusedComputedColsRecursive function with the copied
// required column set. The original required columns set is not modified by
// this preprocessor.
//
// Extra copy of the required columns set is avoided in each recursive call by
// creating a one-time copy and passing it by reference for all the recursive
// calls.
//
// The functional behavior of the PruneUnusedComputedCols changed slightly
// because we do not delete the required column set at the end of every
// call but pass it to the next and consecutive recursive calls. However,
// it is safe to add required columns by each operator we traverse, because non
// of the required columns from other child of a tree will appear on the project
// list of the other children.
//
// Therefore, the added columns to the required columns which is caused by
// the recursive call and passing by reference will not have a bad affect
// on the overall result.
CExpression *
CExpressionPreprocessor::PexprPruneUnusedComputedCols(CMemoryPool *mp,
CExpression *pexpr,
CColRefSet *pcrsReqd)
{
GPOS_ASSERT(nullptr != pexpr);
if (nullptr == pcrsReqd ||
GPOS_FTRACE(EopttraceDisablePruneUnusedComputedColumns))
{
pexpr->AddRef();
return pexpr;
}
CColRefSet *pcrsReqdNew = GPOS_NEW(mp) CColRefSet(mp);
pcrsReqdNew->Include(pcrsReqd);
CExpression *pExprNew =
PexprPruneUnusedComputedColsRecursive(mp, pexpr, pcrsReqdNew);
pcrsReqdNew->Release();
return pExprNew;
}
// Workhorse for pruning unused computed columns
CExpression *
CExpressionPreprocessor::PexprPruneUnusedComputedColsRecursive(
CMemoryPool *mp, CExpression *pexpr, CColRefSet *pcrsReqd)
{
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
// leave subquery alone
if (CUtils::FSubquery(pop))
{
pexpr->AddRef();
return pexpr;
}
if (COperator::EopLogicalProject == pop->Eopid() ||
COperator::EopLogicalGbAgg == pop->Eopid())
{
CExpression *pexprProjList = (*pexpr)[1];
CColRefSet *pcrsDefined = pexprProjList->DeriveDefinedColumns();
CColRefSet *pcrsSetReturningFunction =
pexprProjList->DeriveSetReturningFunctionColumns();
pcrsReqd->Include(CLogical::PopConvert(pop)->PcrsLocalUsed());
// columns containing set-returning functions are needed for correct query results
pcrsReqd->Union(pcrsSetReturningFunction);
CColRefSet *pcrsUnusedLocal = GPOS_NEW(mp) CColRefSet(mp);
pcrsUnusedLocal->Include(pcrsDefined);
pcrsUnusedLocal->Difference(pcrsReqd);
if (0 < pcrsUnusedLocal->Size()) // need to prune
{
// actual construction of new operators without unnecessary project elements
CExpression *pexprResult = PexprPruneProjListProjectOrGbAgg(
mp, pexpr, pcrsUnusedLocal, pcrsDefined, pcrsReqd);
pcrsUnusedLocal->Release();
return pexprResult;
}
pcrsUnusedLocal->Release();
}
if (pop->FLogical())
{
// for logical operators, collect the used columns
// this includes columns used by the operator itself and its scalar children
CExpressionHandle exprhdl(mp);
exprhdl.Attach(pexpr);
CColRefSet *pcrsLogicalUsed = exprhdl.PcrsUsedColumns(mp);
pcrsReqd->Include(pcrsLogicalUsed);
pcrsLogicalUsed->Release();
}
// process children
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
const ULONG ulChildren = pexpr->Arity();
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild =
PexprPruneUnusedComputedColsRecursive(mp, (*pexpr)[ul], pcrsReqd);
pdrgpexpr->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexpr);
}
// Construct new Project or GroupBy operator without unused computed
// columns as project elements
CExpression *
CExpressionPreprocessor::PexprPruneProjListProjectOrGbAgg(
CMemoryPool *mp, CExpression *pexpr, CColRefSet *pcrsUnused,
CColRefSet *pcrsDefined, const CColRefSet *pcrsReqd)
{
GPOS_ASSERT(nullptr != pexpr);
GPOS_ASSERT(nullptr != pcrsUnused);
GPOS_ASSERT(nullptr != pcrsDefined);
GPOS_ASSERT(nullptr != pcrsReqd);
CExpression *pexprResult = nullptr;
COperator *pop = pexpr->Pop();
CColRefSet *pcrsReqdNew = GPOS_NEW(mp) CColRefSet(mp);
pcrsReqdNew->Include(pcrsReqd);
GPOS_ASSERT(COperator::EopLogicalProject == pop->Eopid() ||
COperator::EopLogicalGbAgg == pop->Eopid());
CExpression *pexprRelational = (*pexpr)[0];
CExpression *pexprProjList = (*pexpr)[1];
// recursively process the relational child
CExpression *pexprRelationalNew = nullptr;
if (pcrsUnused->Size() == pcrsDefined->Size())
{
// the entire project list needs to be pruned
if (COperator::EopLogicalProject == pop->Eopid())
{
pexprRelationalNew = PexprPruneUnusedComputedColsRecursive(
mp, pexprRelational, pcrsReqdNew);
pexprResult = pexprRelationalNew;
}
else
{
GPOS_ASSERT(COperator::EopLogicalGbAgg == pop->Eopid());
CExpression *pexprProjectListNew = nullptr;
CColRefArray *pdrgpcrGroupingCols =
CLogicalGbAgg::PopConvert(pop)->Pdrgpcr();
if (0 < pdrgpcrGroupingCols->Size())
{
// if grouping cols exist, we need to maintain the GbAgg with an empty project list
pexprProjectListNew = GPOS_NEW(mp)
CExpression(mp, GPOS_NEW(mp) CScalarProjectList(mp));
pcrsReqdNew->Include(pdrgpcrGroupingCols);
}
else
{
// TODO: 10/15/2015: if there is no grouping cols, we could remove the entire GbAgg and plug in a ConstTableGet instead
pexprProjList->AddRef();
pexprProjectListNew = pexprProjList;
CExpressionHandle exprhdl(mp);
exprhdl.Attach(pexpr);
CColRefSet *pcrsLogicalUsed = exprhdl.PcrsUsedColumns(mp);
pcrsReqdNew->Include(pcrsLogicalUsed);
pcrsLogicalUsed->Release();
}
pop->AddRef();
pexprRelationalNew = PexprPruneUnusedComputedColsRecursive(
mp, pexprRelational, pcrsReqdNew);
pexprResult = GPOS_NEW(mp)
CExpression(mp, pop, pexprRelationalNew, pexprProjectListNew);
}
}
else
{
// only remove part of the project elements
CExpressionArray *pdrgpexprPrElRemain =
GPOS_NEW(mp) CExpressionArray(mp);
const ULONG ulPrjEls = pexprProjList->Arity();
CExpressionHandle exprhdl(mp);
for (ULONG ul = 0; ul < ulPrjEls; ul++)
{
CExpression *pexprPrEl = (*pexprProjList)[ul];
CScalarProjectElement *popPrEl =
CScalarProjectElement::PopConvert(pexprPrEl->Pop());
if (!pcrsUnused->FMember(popPrEl->Pcr()))
{
pexprPrEl->AddRef();
pdrgpexprPrElRemain->Append(pexprPrEl);
pcrsReqdNew->Include(pexprPrEl->DeriveUsedColumns());
}
}
GPOS_ASSERT(0 < pdrgpexprPrElRemain->Size());
CExpression *pexprNewProjectList = GPOS_NEW(mp) CExpression(
mp, GPOS_NEW(mp) CScalarProjectList(mp), pdrgpexprPrElRemain);
pop->AddRef();
pexprRelationalNew = PexprPruneUnusedComputedColsRecursive(
mp, pexprRelational, pcrsReqdNew);
pexprResult = GPOS_NEW(mp)
CExpression(mp, pop, pexprRelationalNew, pexprNewProjectList);
}
pcrsReqdNew->Release();
return pexprResult;
}
// reorder the child for scalar comparision to ensure that left child is a scalar ident and right child is a scalar const if not
CExpression *
CExpressionPreprocessor::PexprReorderScalarCmpChildren(CMemoryPool *mp,
CExpression *pexpr)
{
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
if (CUtils::FScalarCmp(pexpr) ||
COperator::EopScalarIsDistinctFrom == pexpr->Pop()->Eopid())
{
GPOS_ASSERT(2 == pexpr->Arity());
CExpression *pexprLeft = (*pexpr)[0];
CExpression *pexprRight = (*pexpr)[1];
if (CUtils::FScalarConst(pexprLeft) && CUtils::FScalarIdent(pexprRight))
{
CScalarCmp *popScalarCmpCommuted =
(dynamic_cast<CScalarCmp *>(pop))->PopCommutedOp(mp);
if (popScalarCmpCommuted)
{
pexprLeft->AddRef();
pexprRight->AddRef();
return GPOS_NEW(mp) CExpression(mp, popScalarCmpCommuted,
pexprRight, pexprLeft);
}
}
}
// process children
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
const ULONG ulChildren = pexpr->Arity();
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild =
PexprReorderScalarCmpChildren(mp, (*pexpr)[ul]);
pdrgpexpr->Append(pexprChild);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexpr);
}
// converts IN subquery to a predicate AND an EXISTS subquery
// Example Algebrized queries:
// 1. Without a Project List:
// Input:
// +--CScalarSubqueryAny(=)["c2" (0)]
// |--CLogicalGet "foo" ("foo"), Columns: ["c1" (8) ...] Key sets: {[1,7]}
// +--CScalarIdent "c2" (0)
//
// Output:
// +--CScalarBoolOp (EboolopAnd)
// |--CScalarCmp (=)
// | |--CScalarIdent "c2" (0)
// | +--CScalarIdent "c2" (0)
// +--CScalarSubqueryExists
// +--CLogicalGet "foo" ("foo"), Columns: ["c1" (8) ...] Key sets: {[1,7]}
//
// 2. With a Project List:
// Input:
// +--CScalarSubqueryAny(=)["?column?" (16)]
// |--CLogicalProject
// | |--CLogicalGet "foo" ("foo"), Columns: ["c1" (8) ...] Key sets: {[1,7]}
// | +--CScalarProjectList
// | +--CScalarProjectElement "?column?" (16)
// | +--CScalarOp (+)
// | |--CScalarIdent "c2" (0)
// | +--CScalarConst (1)
// +--CScalarIdent "c2" (0)
//
// Output:
// +--CScalarBoolOp (EboolopAnd)
// |--CScalarCmp (=)
// | |--CScalarIdent "c2" (0)
// | +--CScalarOp (+)
// | |--CScalarIdent "c2" (0)
// | +--CScalarConst (1)
// +--CScalarSubqueryExists
// +--CLogicalGet "foo" ("foo"), Columns: ["c1" (8) ...] Key sets: {[1,7]}
CExpression *
CExpressionPreprocessor::ConvertInToSimpleExists(CMemoryPool *mp,
CExpression *pexpr)
{
GPOS_ASSERT(COperator::EopScalarSubqueryAny == pexpr->Pop()->Eopid());
COperator *pop = pexpr->Pop();
CExpression *pexprRelational = (*pexpr)[0];
// Example for below variables:
// SELECT * FROM bar WHERE
// bar.a in (SELECT bar.b FROM foo) <- Input expression (pexpr)
// | |
// pexprLeft pexprRight
// generate scalarOp expression by using column reference of the IN subquery's
// inner child's column reference as well as the expression extracted above
// from the project element
CExpression *pexprLeft = (*pexpr)[1];
if (CUtils::FSubquery(pexprLeft->Pop()))
{
// don't convert if inner child is a subquery
// Example: SELECT * FROM bar WHERE (SELECT 1) IN (SELECT c2 FROM foo);
return nullptr;
}
// since Orca doesn't support IN subqueries of multiple columns such as
// (a,a) in (select foo.a, foo.a from ...) ,
// only extract the first expression under the first project element in the
// project list and make it as the right operand to the scalar operation.
CExpression *pexprRight = nullptr;
CExpression *pexprSubqOfExists = nullptr;
if (COperator::EopLogicalProject == pexprRelational->Pop()->Eopid())
{
pexprRight = CUtils::PNthProjectElementExpr(pexprRelational, 0);
pexprRight->AddRef();
pexprSubqOfExists = (*pexprRelational)[0];
}
else
{
pexprRight = CUtils::PexprScalarIdent(
mp, CScalarSubqueryAny::PopConvert(pop)->Pcr());
pexprSubqOfExists = pexprRelational;
}
CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
IMDId *mdid = CScalarSubqueryAny::PopConvert(pop)->MdIdOp();
const CWStringConst *str =
md_accessor->RetrieveScOp(mdid)->Mdname().GetMDName();
mdid->AddRef();
pexprLeft->AddRef();
CExpression *pexprScalarOp =
CUtils::PexprScalarCmp(mp, pexprLeft, pexprRight, *str, mdid);
pexprSubqOfExists->AddRef();
CExpression *pexprScalarSubqExists = GPOS_NEW(mp) CExpression(
mp, GPOS_NEW(mp) CScalarSubqueryExists(mp), pexprSubqOfExists);
// AND the generated predicate with the EXISTS subquery expression and return.
CExpressionArray *pdrgpexprBoolOperands = GPOS_NEW(mp) CExpressionArray(mp);
pdrgpexprBoolOperands->Append(pexprScalarOp);
pdrgpexprBoolOperands->Append(pexprScalarSubqExists);
return CUtils::PexprScalarBoolOp(mp, CScalarBoolOp::EboolopAnd,
pdrgpexprBoolOperands);
}
// rewrite IN subquery to EXIST subquery with a predicate
// Example:
// Input: SELECT * FROM foo WHERE foo.a IN (SELECT foo.b+1 FROM bar);
// Output: SELECT * FROM foo WHERE foo.a=foo.b+1 AND EXISTS (SELECT * FROM bar);
CExpression *
CExpressionPreprocessor::PexprExistWithPredFromINSubq(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
// recursively process children
const ULONG arity = pexpr->Arity();
pop->AddRef();
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild =
PexprExistWithPredFromINSubq(mp, (*pexpr)[ul]);
pdrgpexprChildren->Append(pexprChild);
}
CExpression *pexprNew =
GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
//Check if the inner is a SubqueryAny
if (CUtils::FAnySubquery(pop))
{
CExpression *pexprLogicalProject = (*pexprNew)[0];
// we do the conversion if the project list has an outer reference and
// it does not include any column from the relational child.
if (COperator::EopLogicalProject == pexprLogicalProject->Pop()->Eopid())
{
// bail out if subquery has an inner reference or does not have any outer reference
if (!CUtils::HasOuterRefs(pexprLogicalProject) ||
CUtils::FInnerRefInProjectList(pexprLogicalProject))
{
return pexprNew;
}
}
else
{
// perform conversion if subquery does not output any of the columns from relational child
const CColRef *pcrSubquery =
CScalarSubqueryAny::PopConvert(pop)->Pcr();
CColRefSet *pcrsRelationalChild =
(*pexpr)[0]->DeriveOutputColumns();
if (pcrsRelationalChild->FMember(pcrSubquery))
{
return pexprNew;
}
}
CExpression *pexprNewConverted = ConvertInToSimpleExists(mp, pexprNew);
if (nullptr != pexprNewConverted)
{
pexprNew->Release();
pexprNew = pexprNewConverted;
;
}
}
return pexprNew;
}
CExpression *
CExpressionPreprocessor::PrunePartitions(CMemoryPool *mp, CExpression *expr)
{
GPOS_ASSERT(nullptr != expr);
CMDAccessor *mda = COptCtxt::PoctxtFromTLS()->Pmda();
COperator *pop = expr->Pop();
if (pop->Eopid() == COperator::EopLogicalSelect &&
(*expr)[0]->Pop()->Eopid() == COperator::EopLogicalDynamicGet)
{
CExpression *filter_pred = (*expr)[1];
CLogicalDynamicGet *dyn_get =
CLogicalDynamicGet::PopConvert((*expr)[0]->Pop());
CColRefSetArray *pdrgpcrsChild = nullptr;
CConstraint *pred_cnstr =
CConstraint::PcnstrFromScalarExpr(mp, filter_pred, &pdrgpcrsChild);
CRefCount::SafeRelease(pdrgpcrsChild);
// GPDB_12_MERGE_FIXME: skip all this if pred_cnstr = NULL
IMdIdArray *selected_partition_mdids = GPOS_NEW(mp) IMdIdArray(mp);
CConstraintArray *selected_partition_cnstrs =
GPOS_NEW(mp) CConstraintArray(mp);
IMdIdArray *all_partition_mdids = dyn_get->GetPartitionMdids();
for (ULONG ul = 0; ul < all_partition_mdids->Size(); ++ul)
{
IMDId *part_mdid = (*all_partition_mdids)[ul];
const IMDRelation *partrel = mda->RetrieveRel(part_mdid);
CConstraint *rel_cnstr = PcnstrFromChildPartition(
partrel, dyn_get->PdrgpcrOutput(),
(*dyn_get->GetRootColMappingPerPart())[ul]);
CConstraint *pcnstr = nullptr;
{
CConstraintArray *preds = GPOS_NEW(mp) CConstraintArray(mp);
if (pred_cnstr != nullptr)
{
pred_cnstr->AddRef();
preds->Append(pred_cnstr);
}
if (rel_cnstr != nullptr)
{
preds->Append(rel_cnstr);
}
pcnstr = CConstraint::PcnstrConjunction(mp, preds);
}
// Include the partition if it's not a contradiction, or if it's
// undefined (e.g only has default partition)
if (nullptr == pcnstr || !pcnstr->FContradiction())
{
part_mdid->AddRef();
selected_partition_mdids->Append(part_mdid);
rel_cnstr = PcnstrFromChildPartition(
partrel, dyn_get->PdrgpcrOutput(),
(*dyn_get->GetRootColMappingPerPart())[ul]);
if (rel_cnstr)
{
selected_partition_cnstrs->Append(rel_cnstr);
}
}
CRefCount::SafeRelease(pcnstr);
}
CRefCount::SafeRelease(pred_cnstr);
if (selected_partition_mdids->Size() == 0)
{
// Return const false if there are no partitions left to scan
selected_partition_mdids->Release();
selected_partition_cnstrs->Release();
CColRefArray *colref_array =
expr->DeriveOutputColumns()->Pdrgpcr(mp);
IDatum2dArray *pdrgpdrgpdatum = GPOS_NEW(mp) IDatum2dArray(mp);
COperator *popCTG = GPOS_NEW(mp)
CLogicalConstTableGet(mp, colref_array, pdrgpdrgpdatum);
return GPOS_NEW(mp) CExpression(mp, popCTG);
}
CName *new_alias = GPOS_NEW(mp) CName(mp, dyn_get->Name());
dyn_get->Ptabdesc()->AddRef();
dyn_get->PdrgpcrOutput()->AddRef();
dyn_get->PdrgpdrgpcrPart()->AddRef();
CConstraint *selected_part_cnstr_disj =
CConstraint::PcnstrDisjunction(mp, selected_partition_cnstrs);
CLogicalDynamicGet *new_dyn_get = GPOS_NEW(mp) CLogicalDynamicGet(
mp, new_alias, dyn_get->Ptabdesc(), dyn_get->ScanId(),
dyn_get->PdrgpcrOutput(), dyn_get->PdrgpdrgpcrPart(),
selected_partition_mdids, selected_part_cnstr_disj, true);
CExpressionArray *select_children = GPOS_NEW(mp) CExpressionArray(mp);
select_children->Append(GPOS_NEW(mp) CExpression(mp, new_dyn_get));
select_children->Append(PrunePartitions(mp, filter_pred));
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, select_children);
}
// process children
CExpressionArray *children = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < expr->Arity(); ul++)
{
CExpression *expr_child = (*expr)[ul];
CExpression *new_expr_child = PrunePartitions(mp, expr_child);
children->Append(new_expr_child);
}
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, children);
}
// Translate the part constraint of a child partition into an ORCA expr using
// corresponding colrefs of the root table, instead of those from the child
// partition.
CConstraint *
CExpressionPreprocessor::PcnstrFromChildPartition(
const IMDRelation *partrel, CColRefArray *pdrgpcrOutput,
ColRefToUlongMap *root_col_mapping)
{
CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
CMemoryPool *mp = COptCtxt::PoctxtFromTLS()->Pmp();
CExpression *part_constraint_expr = nullptr;
CDXLNode *dxlnode = partrel->MDPartConstraint();
if (nullptr == dxlnode)
{
return nullptr;
}
// Create a list of indexes into the columns of the table descriptor of the
// child partition. This is used in PexprTranslateScalar() to construct a
// reverse mapping from the colid of each (non-dropped) column in the child
// partition to it's corresponding colref in the root table.
// NB: For the indexes in root_col_mapping to be applied correctly here, the
// part constraint should have been translated without dropped cols
// (see RetrievePartConstraintForRel()).
ULongPtrArray *mapped_colids = GPOS_NEW(mp) ULongPtrArray(mp);
for (ULONG ul = 0; ul < pdrgpcrOutput->Size(); ++ul)
{
CColRef *colref = (*pdrgpcrOutput)[ul];
ULONG *colid = root_col_mapping->Find(colref);
GPOS_ASSERT(nullptr != colid);
mapped_colids->Append(GPOS_NEW(mp) ULONG(*colid));
}
CTranslatorDXLToExpr dxltr(mp, md_accessor);
part_constraint_expr =
dxltr.PexprTranslateScalar(dxlnode, pdrgpcrOutput, mapped_colids);
mapped_colids->Release();
GPOS_ASSERT(CUtils::FPredicate(part_constraint_expr));
CColRefSetArray *pdrgpcrsChild = nullptr;
CConstraint *cnstr = CConstraint::PcnstrFromScalarExpr(
mp, part_constraint_expr, &pdrgpcrsChild, true /* infer_nulls_as */);
CRefCount::SafeRelease(part_constraint_expr);
CRefCount::SafeRelease(pdrgpcrsChild);
GPOS_ASSERT(cnstr);
return cnstr;
}
// Collapse a select over a project and update column reference.
static CExpression *
CollapseSelectAndReplaceColref(CMemoryPool *mp, CExpression *pexpr,
CColRef *pcolref, CExpression *pprojExpr)
{
// remove the logical project
//
// Input:
// +--CLogicalSelect (x = 'meh')
// +--CLogicalProject (col1...n, expr as x)
// +-- CLogicalNAryJoin
// Output:
// +--CLogicalSelect (expr = 'meh')
// +-- CLogicalNAryJoin
if (pexpr->Pop()->Eopid() == COperator::EopLogicalSelect &&
(*pexpr)[0]->Pop()->Eopid() == COperator::EopLogicalProject &&
(*(*pexpr)[0])[0]->Pop()->Eopid() == COperator::EopLogicalNAryJoin)
{
(*(*pexpr)[0])[0]->AddRef();
return GPOS_NEW(mp)
CExpression(mp, GPOS_NEW(mp) CLogicalSelect(mp), (*(*pexpr)[0])[0],
CollapseSelectAndReplaceColref(mp, (*pexpr)[1], pcolref,
pprojExpr));
}
// replace reference
if (pexpr->Pop()->Eopid() == COperator::EopScalarIdent &&
CColRef::Equals(CScalarIdent::PopConvert(pexpr->Pop())->Pcr(), pcolref))
{
pprojExpr->AddRef();
return pprojExpr;
}
// recurse to children
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < pexpr->Arity(); ul++)
{
pdrgpexprChildren->Append(CollapseSelectAndReplaceColref(
mp, (*pexpr)[ul], pcolref, pprojExpr));
}
COperator *pop = pexpr->Pop();
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
// Transpose a select over a project
//
// This preprocessing step enables additional opportunities for predicate push
// down. In the following example, the select constraint references "b" (18)
// from the project list. As-is the normalizer cannot push the contraint further
// than the project. This step enables further push down by transposing the
// project/select operators and fixing up corresponding column references.
//
// Example:
//
// Input:
// +--CLogicalSelect
// |--CLogicalProject
// | |--CLogicalNAryJoin
// | | |--CLogicalGet "foo" ("foo"), Columns: ["a" (0), "b" (1), ...
// | | |--CLogicalGet "bar" ("bar"), Columns: ["c" (9), "d" (10), ...
// | | +--CScalarCmp (=)
// | | |--CScalarIdent "a" (0)
// | | +--CScalarIdent "c" (9)
// | +--CScalarProjectList
// | +--CScalarProjectElement "b" (18)
// | +--CScalarFunc (varchar)
// | |--CScalarCast
// | | +--CScalarIdent "b" (1)
// | |--CScalarConst (104)
// | +--CScalarConst (1)
// +--CScalarCmp (=)
// |--CScalarCast
// | +--CScalarIdent "b" (18)
// +--CScalarConst (1828233457.000)
// Output:
// +--CLogicalProject
// |--CLogicalSelect
// | |--CLogicalNAryJoin
// | | |--CLogicalGet "foo" ("foo"), Columns: ["a" (0), "b" (1), ...
// | | |--CLogicalGet "bar" ("bar"), Columns: ["c" (9), "d" (10), ...
// | | +--CScalarCmp (=)
// | | |--CScalarIdent "a" (0)
// | | +--CScalarIdent "c" (9)
// | +--CScalarCmp (=)
// | |--CScalarCast
// | | +--CScalarFunc (varchar)
// | | |--CScalarCast
// | | | +--CScalarIdent "b" (1)
// | | |--CScalarConst (104)
// | | +--CScalarConst (1)
// | +--CScalarConst (1828233457.000)
// +--CScalarProjectList
// +--CScalarProjectElement "b" (18)
// +--CScalarFunc (varchar)
// |--CScalarCast
// | +--CScalarIdent "b" (1)
// |--CScalarConst (104)
// +--CScalarConst (1)
CExpression *
CExpressionPreprocessor::PexprTransposeSelectAndProject(CMemoryPool *mp,
CExpression *pexpr)
{
// protect against stack overflow during recursion
GPOS_CHECK_STACK_SIZE;
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
// Transpose:
//
// Input:
// +--CLogicalSelect (x = 'meh')
// +--CLogicalProject (col1...n, expr as x)
// +-- CLogicalNAryJoin
// Output:
// +--CLogicalProject (col1..n, expr as x)
// +--CLogicalSelect (expr = 'meh')
// +-- CLogicalNAryJoin
if (pexpr->Pop()->Eopid() == COperator::EopLogicalSelect &&
(*pexpr)[0]->Pop()->Eopid() == COperator::EopLogicalProject &&
(*(*pexpr)[0])[0]->Pop()->Eopid() == COperator::EopLogicalNAryJoin)
{
CExpression *pproject = (*pexpr)[0];
CExpression *pprojectList = (*pproject)[1];
CExpression *pselectNew = pexpr;
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < pprojectList->Arity(); ul++)
{
CExpression *pprojexpr =
CUtils::PNthProjectElementExpr(pproject, ul);
CExpressionHandle exprhdl(mp);
exprhdl.Attach(pprojexpr);
exprhdl.DeriveProps(nullptr /*pdpctxt*/);
if (exprhdl.Arity() > 1 && exprhdl.DeriveHasNonScalarFunction(1))
{
// Bail if project expression contains a set-returning function
pdrgpexpr->Release();
pexpr->AddRef();
return pexpr;
}
if (exprhdl.FChildrenHaveVolatileFunc())
{
// Bail if project expression contains a volatile function
pdrgpexpr->Release();
pexpr->AddRef();
return pexpr;
}
// TODO: In order to support mixed pushable and non-pushable
// predicates we need to be able to deconstruct a select
// conjunction constraint into pushable and non-pushable
// parts.
//
// NB: JoinOnViewWithMixOfPushableAndNonpushablePredicates.mdp
pselectNew = CollapseSelectAndReplaceColref(
mp, pselectNew, CUtils::PNthProjectElement(pproject, ul)->Pcr(),
CUtils::PNthProjectElementExpr(pproject, ul));
}
pdrgpexpr->Append(pselectNew);
CExpressionArray *pdrgpprojelems = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < pprojectList->Arity(); ul++)
{
(*pprojectList)[ul]->AddRef();
pdrgpprojelems->Append((*pprojectList)[ul]);
}
pdrgpexpr->Append(GPOS_NEW(mp) CExpression(
mp, GPOS_NEW(mp) CScalarProjectList(mp), pdrgpprojelems));
return GPOS_NEW(mp)
CExpression(mp, GPOS_NEW(mp) CLogicalProject(mp), pdrgpexpr);
}
else
{
CExpressionArray *pdrgpexprChildren = GPOS_NEW(mp) CExpressionArray(mp);
for (ULONG ul = 0; ul < pexpr->Arity(); ul++)
{
pdrgpexprChildren->Append(
PexprTransposeSelectAndProject(mp, (*pexpr)[ul]));
}
COperator *pop = pexpr->Pop();
pop->AddRef();
return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
}
}
// Preprocessor function to decide if the Update operator to be proceeded with
// split update or inplace update based on the columns modified by the update
// operation.
// Split Update if any of the modified columns is a distribution column or a partition key,
// InPlace Update if all the modified columns are non-distribution columns and not partition keys.
// Example: Update Modified non-distribution columns.
// Input:
// +--CLogicalUpdate ("foo"), Split Update, Delete Columns: ["a" (0), "b" (1)], Insert Columns: ["a" (0), "b" (9)], "ctid" (2), "gp_segment_id" (8)
// +--CLogicalProject
// |--CLogicalGet
// +--CScalarProjectList
//
// Output:
// +--CLogicalUpdate ("foo"), In-place Update, Delete Columns: ["a" (0), "b" (1)], Insert Columns: ["a" (0), "b" (9)], "ctid" (2), "gp_segment_id" (8)
// +--CLogicalProject
// |--CLogicalGet
// +--CScalarProjectList
CExpression *
CExpressionPreprocessor::ConvertSplitUpdateToInPlaceUpdate(CMemoryPool *mp,
CExpression *pexpr)
{
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
COperator *pop = pexpr->Pop();
if (COperator::EopLogicalUpdate != pop->Eopid())
{
pexpr->AddRef();
return pexpr;
}
CLogicalUpdate *popUpdate = CLogicalUpdate::PopConvert(pop);
CTableDescriptor *tabdesc = popUpdate->Ptabdesc();
CColRefArray *pdrgpcrInsert = popUpdate->PdrgpcrInsert();
CColRefArray *pdrgpcrDelete = popUpdate->PdrgpcrDelete();
const ULONG num_cols = pdrgpcrInsert->Size();
BOOL split_update = false;
CColRefArray *ppartColRefs = GPOS_NEW(mp) CColRefArray(mp);
const ULongPtrArray *pdrgpulPart = tabdesc->PdrgpulPart();
const ULONG ulPartKeys = pdrgpulPart->Size();
// Uses split update if any of the modified columns are either
// distribution or partition keys.
// FIXME: Tested on distribution columns. Validate this after DML on
// partitioned tables is implemented in Orca
for (ULONG ul = 0; ul < ulPartKeys; ul++)
{
ULONG *pulPartKey = (*pdrgpulPart)[ul];
CColRef *colref = (*pdrgpcrInsert)[*pulPartKey];
ppartColRefs->Append(colref);
}
for (ULONG ul = 0; ul < num_cols; ul++)
{
CColRef *pcrInsert = (*pdrgpcrInsert)[ul];
CColRef *pcrDelete = (*pdrgpcrDelete)[ul];
// Checking if column is either distribution or partition key.
if (pcrInsert != pcrDelete &&
(pcrDelete->IsDistCol() ||
ppartColRefs->Find(pcrInsert) != nullptr))
{
split_update = true;
break;
}
}
ppartColRefs->Release();
if (!split_update)
{
CExpression *pexprChild = (*pexpr)[0];
pexprChild->AddRef();
pdrgpcrInsert->AddRef();
pdrgpcrDelete->AddRef();
tabdesc->AddRef();
CExpression *pexprNew = GPOS_NEW(mp) CExpression(
mp,
GPOS_NEW(mp) CLogicalUpdate(mp, tabdesc, pdrgpcrDelete,
pdrgpcrInsert, popUpdate->PcrCtid(),
popUpdate->PcrSegmentId(), false),
pexprChild);
return pexprNew;
}
pexpr->AddRef();
return pexpr;
}
// main driver, pre-processing of input logical expression
CExpression *
CExpressionPreprocessor::PexprPreprocess(
CMemoryPool *mp, CExpression *pexpr,
CColRefSet *
pcrsOutputAndOrderCols // query output cols and cols used in the order specs
)
{
GPOS_ASSERT(nullptr != mp);
GPOS_ASSERT(nullptr != pexpr);
CAutoTimer at("\n[OPT]: Expression Preprocessing Time",
GPOS_FTRACE(EopttracePrintOptimizationStatistics));
// (1) remove unused CTE anchors
CExpression *pexprNoUnusedCTEs = PexprRemoveUnusedCTEs(mp, pexpr);
GPOS_CHECK_ABORT;
// (2.a) remove intermediate superfluous limit
CExpression *pexprSimplifiedLimit =
PexprRemoveSuperfluousLimit(mp, pexprNoUnusedCTEs);
GPOS_CHECK_ABORT;
pexprNoUnusedCTEs->Release();
// (2.b) remove intermediate superfluous distinct
CExpression *pexprSimplifiedDistinct =
PexprRemoveSuperfluousDistinctInDQA(mp, pexprSimplifiedLimit);
GPOS_CHECK_ABORT;
pexprSimplifiedLimit->Release();
// (3) trim unnecessary existential subqueries
CExpression *pexprTrimmed =
PexprTrimExistentialSubqueries(mp, pexprSimplifiedDistinct);
GPOS_CHECK_ABORT;
pexprSimplifiedDistinct->Release();
// (4) collapse cascaded union / union all
CExpression *pexprNaryUnionUnionAll =
PexprCollapseUnionUnionAll(mp, pexprTrimmed);
GPOS_CHECK_ABORT;
pexprTrimmed->Release();
// (5) remove superfluous outer references from the order spec in limits, grouping columns in GbAgg, and
// Partition/Order columns in window operators
CExpression *pexprOuterRefsEleminated =
PexprRemoveSuperfluousOuterRefs(mp, pexprNaryUnionUnionAll);
GPOS_CHECK_ABORT;
pexprNaryUnionUnionAll->Release();
// (6) remove superfluous equality
CExpression *pexprTrimmed2 =
PexprPruneSuperfluousEquality(mp, pexprOuterRefsEleminated);
GPOS_CHECK_ABORT;
pexprOuterRefsEleminated->Release();
// (7) simplify quantified subqueries
CExpression *pexprSubqSimplified =
PexprSimplifyQuantifiedSubqueries(mp, pexprTrimmed2);
GPOS_CHECK_ABORT;
pexprTrimmed2->Release();
// (8) do preliminary unnesting of scalar subqueries
CExpression *pexprSubqUnnested =
PexprUnnestScalarSubqueries(mp, pexprSubqSimplified);
GPOS_CHECK_ABORT;
pexprSubqSimplified->Release();
// (9) unnest AND/OR/NOT predicates
CExpression *pexprUnnested =
CExpressionUtils::PexprUnnest(mp, pexprSubqUnnested);
GPOS_CHECK_ABORT;
pexprSubqUnnested->Release();
CExpression *pexprConvert2In = pexprUnnested;
// GPDB_12_MERGE_FIXME: Although we've enabled EopttraceArrayConstraints,
// the following conversion is causing problems; and might be very
// inefficient! Disable for noe.
if (GPOS_FTRACE(EopttraceArrayConstraints) && false)
{
// (9.5) ensure predicates are array IN or NOT IN where applicable
pexprConvert2In = PexprConvert2In(mp, pexprUnnested);
GPOS_CHECK_ABORT;
pexprUnnested->Release();
}
// (10) infer predicates from constraints
CExpression *pexprInferredPreds = PexprInferPredicates(mp, pexprConvert2In);
GPOS_CHECK_ABORT;
pexprConvert2In->Release();
// (11) eliminate self comparisons
CExpression *pexprSelfCompEliminated =
PexprEliminateSelfComparison(mp, pexprInferredPreds);
GPOS_CHECK_ABORT;
pexprInferredPreds->Release();
// (12) remove duplicate AND/OR children
CExpression *pexprDeduped =
CExpressionUtils::PexprDedupChildren(mp, pexprSelfCompEliminated);
GPOS_CHECK_ABORT;
pexprSelfCompEliminated->Release();
// (13) factorize common expressions
CExpression *pexprFactorized =
CExpressionFactorizer::PexprFactorize(mp, pexprDeduped);
GPOS_CHECK_ABORT;
pexprDeduped->Release();
// (14) infer filters out of components of disjunctive filters
CExpression *pexprPrefiltersExtracted =
CExpressionFactorizer::PexprExtractInferredFilters(mp, pexprFactorized);
GPOS_CHECK_ABORT;
pexprFactorized->Release();
// (15) pre-process ordered agg functions
CExpression *pexprOrderedAggPreprocessed =
COrderedAggPreprocessor::PexprPreprocess(mp, pexprPrefiltersExtracted);
GPOS_CHECK_ABORT;
pexprPrefiltersExtracted->Release();
// (16) pre-process window functions
CExpression *pexprWindowPreprocessed =
CWindowPreprocessor::PexprPreprocess(mp, pexprOrderedAggPreprocessed);
GPOS_CHECK_ABORT;
pexprOrderedAggPreprocessed->Release();
// (17) eliminate unused computed columns
CExpression *pexprNoUnusedPrEl = PexprPruneUnusedComputedCols(
mp, pexprWindowPreprocessed, pcrsOutputAndOrderCols);
GPOS_CHECK_ABORT;
pexprWindowPreprocessed->Release();
// (18) normalize expression
CExpression *pexprNormalized1 =
CNormalizer::PexprNormalize(mp, pexprNoUnusedPrEl);
GPOS_CHECK_ABORT;
pexprNoUnusedPrEl->Release();
// (19) transform outer join into inner join whenever possible
CExpression *pexprLOJToIJ = PexprOuterJoinToInnerJoin(mp, pexprNormalized1);
GPOS_CHECK_ABORT;
pexprNormalized1->Release();
// (20) collapse cascaded inner and left outer joins
CExpression *pexprCollapsed = PexprCollapseJoins(mp, pexprLOJToIJ);
GPOS_CHECK_ABORT;
pexprLOJToIJ->Release();
// (21) after transforming outer joins to inner joins, we may be able to generate more predicates from constraints
CExpression *pexprWithPreds =
PexprAddPredicatesFromConstraints(mp, pexprCollapsed);
GPOS_CHECK_ABORT;
pexprCollapsed->Release();
// (22) eliminate empty subtrees
CExpression *pexprPruned = PexprPruneEmptySubtrees(mp, pexprWithPreds);
GPOS_CHECK_ABORT;
pexprWithPreds->Release();
// (23) collapse cascade of projects
CExpression *pexprCollapsedProjects =
PexprCollapseProjects(mp, pexprPruned);
GPOS_CHECK_ABORT;
pexprPruned->Release();
// (24) insert dummy project when the scalar subquery is under a project and returns an outer reference
CExpression *pexprSubquery = PexprProjBelowSubquery(
mp, pexprCollapsedProjects, false /* fUnderPrList */);
GPOS_CHECK_ABORT;
pexprCollapsedProjects->Release();
// (25) reorder the children of scalar cmp operator to ensure that left child is scalar ident and right child is scalar const
CExpression *pexrReorderedScalarCmpChildren =
PexprReorderScalarCmpChildren(mp, pexprSubquery);
GPOS_CHECK_ABORT;
pexprSubquery->Release();
// (26) rewrite IN subquery to EXIST subquery with a predicate
CExpression *pexprExistWithPredFromINSubq =
PexprExistWithPredFromINSubq(mp, pexrReorderedScalarCmpChildren);
GPOS_CHECK_ABORT;
pexrReorderedScalarCmpChildren->Release();
// (27) prune partitions
CExpression *pexprPrunedPartitions =
PrunePartitions(mp, pexprExistWithPredFromINSubq);
GPOS_CHECK_ABORT;
pexprExistWithPredFromINSubq->Release();
// (28) swap logical select over logical project
CExpression *pexprTransposeSelectAndProject =
PexprTransposeSelectAndProject(mp, pexprPrunedPartitions);
pexprPrunedPartitions->Release();
// (29) convert split update to inplace update
CExpression *pexprSplitUpdateToInplace =
ConvertSplitUpdateToInPlaceUpdate(mp, pexprTransposeSelectAndProject);
GPOS_CHECK_ABORT;
pexprTransposeSelectAndProject->Release();
// (30) normalize expression again
CExpression *pexprNormalized2 =
CNormalizer::PexprNormalize(mp, pexprSplitUpdateToInplace);
GPOS_CHECK_ABORT;
pexprSplitUpdateToInplace->Release();
return pexprNormalized2;
}
// EOF
相关信息
相关文章
greenplumn CExpressionFactorizer 源码
greenplumn CExpressionHandle 源码
greenplumn CExpressionUtils 源码
greenplumn CHashedDistributions 源码
0
赞
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
8、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦