greenplumn CCostModelGPDB 源码
greenplumn CCostModelGPDB 代码
文件路径:/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp
//---------------------------------------------------------------------------
// Greenplum Database
// Copyright (C) 2014 VMware, Inc. or its affiliates.
//
// @filename:
// CCostModelGPDB.cpp
//
// @doc:
// Implementation of GPDB cost model
//---------------------------------------------------------------------------
#include "gpdbcost/CCostModelGPDB.h"
#include <limits>
#include "gpopt/base/CColRefSetIter.h"
#include "gpopt/base/COptCtxt.h"
#include "gpopt/base/COrderSpec.h"
#include "gpopt/base/CWindowFrame.h"
#include "gpopt/engine/CHint.h"
#include "gpopt/metadata/CIndexDescriptor.h"
#include "gpopt/metadata/CPartConstraint.h"
#include "gpopt/metadata/CTableDescriptor.h"
#include "gpopt/operators/CExpression.h"
#include "gpopt/operators/CExpressionHandle.h"
#include "gpopt/operators/CPhysicalDynamicIndexScan.h"
#include "gpopt/operators/CPhysicalHashAgg.h"
#include "gpopt/operators/CPhysicalIndexOnlyScan.h"
#include "gpopt/operators/CPhysicalIndexScan.h"
#include "gpopt/operators/CPhysicalMotion.h"
#include "gpopt/operators/CPhysicalPartitionSelector.h"
#include "gpopt/operators/CPhysicalSequenceProject.h"
#include "gpopt/operators/CPhysicalUnionAll.h"
#include "gpopt/operators/CPredicateUtils.h"
#include "gpopt/operators/CScalarBitmapIndexProbe.h"
#include "gpopt/optimizer/COptimizerConfig.h"
#include "naucrates/statistics/CStatisticsUtils.h"
using namespace gpos;
using namespace gpdbcost;
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CCostModelGPDB
//
// @doc:
// Ctor
//
//---------------------------------------------------------------------------
CCostModelGPDB::CCostModelGPDB(CMemoryPool *mp, ULONG ulSegments,
CCostModelParamsGPDB *pcp)
: m_mp(mp), m_num_of_segments(ulSegments)
{
GPOS_ASSERT(0 < ulSegments);
if (nullptr == pcp)
{
m_cost_model_params = GPOS_NEW(mp) CCostModelParamsGPDB(mp);
}
else
{
GPOS_ASSERT(nullptr != pcp);
m_cost_model_params = pcp;
}
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::DRowsPerHost
//
// @doc:
// Return number of rows per host
//
//---------------------------------------------------------------------------
CDouble
CCostModelGPDB::DRowsPerHost(CDouble dRowsTotal) const
{
return dRowsTotal / m_num_of_segments;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::~CCostModelGPDB
//
// @doc:
// Dtor
//
//---------------------------------------------------------------------------
CCostModelGPDB::~CCostModelGPDB()
{
m_cost_model_params->Release();
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostTupleProcessing
//
// @doc:
// Cost of tuple processing
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostTupleProcessing(DOUBLE rows, DOUBLE width,
ICostModelParams *pcp)
{
GPOS_ASSERT(nullptr != pcp);
const CDouble dTupDefaultProcCostUnit =
pcp->PcpLookup(CCostModelParamsGPDB::EcpTupDefaultProcCostUnit)->Get();
GPOS_ASSERT(0 < dTupDefaultProcCostUnit);
return CCost(rows * width * dTupDefaultProcCostUnit);
}
//
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostScanOutput
//
// @doc:
// Helper function to return cost of producing output tuples from
// Scan operator
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostScanOutput(CMemoryPool *, // mp
DOUBLE rows, DOUBLE width, DOUBLE num_rebinds,
ICostModelParams *pcp)
{
GPOS_ASSERT(nullptr != pcp);
const CDouble dOutputTupCostUnit =
pcp->PcpLookup(CCostModelParamsGPDB::EcpOutputTupCostUnit)->Get();
GPOS_ASSERT(0 < dOutputTupCostUnit);
return CCost(num_rebinds * (rows * width * dOutputTupCostUnit));
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostComputeScalar
//
// @doc:
// Helper function to return cost of a plan containing compute scalar
// operator
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostComputeScalar(CMemoryPool *mp, CExpressionHandle &exprhdl,
const SCostingInfo *pci,
ICostModelParams *pcp,
const CCostModelGPDB *pcmgpdb)
{
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(nullptr != pcp);
GPOS_ASSERT(nullptr != pcmgpdb);
DOUBLE rows = pci->Rows();
DOUBLE width = pci->Width();
DOUBLE num_rebinds = pci->NumRebinds();
CCost costLocal =
CCost(num_rebinds * CostTupleProcessing(rows, width, pcp).Get());
CCost costChild = CostChildren(mp, exprhdl, pci, pcp);
CCost costCompute(0);
if (exprhdl.DeriveHasScalarFuncProject(1))
{
// If the compute scalar operator has a scalar func operator in the
// project list then aggregate that cost of the scalar func. The number
// of times the scalar func is run is proportional to the number of
// rows.
costCompute =
CCost(pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpScalarFuncCost)
->Get() *
rows);
}
return costLocal + costChild + costCompute;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostUnary
//
// @doc:
// Helper function to return cost of a plan rooted by unary operator
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostUnary(CMemoryPool *mp, CExpressionHandle &exprhdl,
const SCostingInfo *pci, ICostModelParams *pcp)
{
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(nullptr != pcp);
DOUBLE rows = pci->Rows();
DOUBLE width = pci->Width();
DOUBLE num_rebinds = pci->NumRebinds();
CCost costLocal =
CCost(num_rebinds * CostTupleProcessing(rows, width, pcp).Get());
CCost costChild = CostChildren(mp, exprhdl, pci, pcp);
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostSpooling
//
// @doc:
// Helper function to compute spooling cost
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostSpooling(CMemoryPool *mp, CExpressionHandle &exprhdl,
const SCostingInfo *pci, ICostModelParams *pcp)
{
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(nullptr != pcp);
const CDouble dMaterializeCostUnit =
pcp->PcpLookup(CCostModelParamsGPDB::EcpMaterializeCostUnit)->Get();
GPOS_ASSERT(0 < dMaterializeCostUnit);
DOUBLE rows = pci->Rows();
DOUBLE width = pci->Width();
DOUBLE num_rebinds = pci->NumRebinds();
// materialization cost is correlated with the number of rows and width of returning tuples.
CCost costLocal =
CCost(num_rebinds * (width * rows * dMaterializeCostUnit));
CCost costChild = CostChildren(mp, exprhdl, pci, pcp);
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::FUnary
//
// @doc:
// Check if given operator is unary
//
//---------------------------------------------------------------------------
BOOL
CCostModelGPDB::FUnary(COperator::EOperatorId op_id)
{
return COperator::EopPhysicalAssert == op_id ||
COperator::EopPhysicalComputeScalar == op_id ||
COperator::EopPhysicalLimit == op_id ||
COperator::EopPhysicalPartitionSelector == op_id ||
COperator::EopPhysicalRowTrigger == op_id ||
COperator::EopPhysicalSplit == op_id ||
COperator::EopPhysicalSpool == op_id;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostChildren
//
// @doc:
// Add up children costs
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostChildren(CMemoryPool *mp, CExpressionHandle &exprhdl,
const SCostingInfo *pci, ICostModelParams *pcp)
{
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(nullptr != pcp);
DOUBLE *pdCost = pci->PdCost();
const ULONG size = pci->ChildCount();
BOOL fFilterParent =
(COperator::EopPhysicalFilter == exprhdl.Pop()->Eopid());
DOUBLE res = 0.0;
for (ULONG ul = 0; ul < size; ul++)
{
DOUBLE dCostChild = pdCost[ul];
COperator *popChild = exprhdl.Pop(ul);
if (nullptr != popChild &&
(CUtils::FPhysicalScan(popChild) ||
COperator::EopPhysicalPartitionSelector == popChild->Eopid()))
{
// by default, compute scan output cost based on full Scan
DOUBLE dScanRows = pci->PdRows()[ul];
COperator *scanOp = popChild;
if (fFilterParent)
{
CPhysicalPartitionSelector *ps =
dynamic_cast<CPhysicalPartitionSelector *>(popChild);
if (ps)
{
CCostContext *grandchildContext = nullptr;
scanOp = exprhdl.PopGrandchild(ul, 0, &grandchildContext);
CPhysicalDynamicScan *scan =
dynamic_cast<CPhysicalDynamicScan *>(scanOp);
if (scan && scan->ScanId() == ps->ScanId() &&
grandchildContext)
{
// We have a filter on top of a partition selector on top of a scan.
// Base the scan output cost on the combination (filter + part sel + scan)
// on the rows that are produced by the scan, since the runtime execution
// plan with be sequence(part_sel, scan+filter). Note that the cost of
// the partition selector is ignored here. It may be higher than that of
// the complete tree (filter + part sel + scan).
// See method CTranslatorExprToDXL::PdxlnPartitionSelectorWithInlinedCondition()
// for how we inline the predicate into the dynamic table scan.
dCostChild = grandchildContext->Cost().Get();
dScanRows = pci->Rows();
}
}
else
{
// if parent is filter, compute scan output cost based on rows produced by Filter operator
dScanRows = pci->Rows();
}
}
if (CUtils::FPhysicalScan(scanOp))
{
// Note: We assume that width and rebinds are the same for scan, partition selector and filter
dCostChild = dCostChild +
CostScanOutput(mp, dScanRows, pci->GetWidth()[ul],
pci->PdRebinds()[ul], pcp)
.Get();
}
}
res = res + dCostChild;
}
return CCost(res);
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostMaxChild
//
// @doc:
// Returns cost of highest costed child
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostMaxChild(CMemoryPool *, CExpressionHandle &,
const SCostingInfo *pci, ICostModelParams *)
{
GPOS_ASSERT(nullptr != pci);
DOUBLE *pdCost = pci->PdCost();
const ULONG size = pci->ChildCount();
DOUBLE res = 0.0;
for (ULONG ul = 0; ul < size; ul++)
{
if (pdCost[ul] > res)
{
res = pdCost[ul];
}
}
return CCost(res);
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostCTEProducer
//
// @doc:
// Cost of CTE producer
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostCTEProducer(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalCTEProducer == exprhdl.Pop()->Eopid());
CCost cost = CostUnary(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
// In GPDB, the child of a ShareInputScan representing the producer can
// only be a materialize or sort. Here, we check if a materialize node
// needs to be added during DXL->PlStmt translation
COperator *popChild = exprhdl.Pop(0 /*child_index*/);
if (nullptr == popChild)
{
// child operator is not known, this could happen when computing cost bound
return cost;
}
COperator::EOperatorId op_id = popChild->Eopid();
if (COperator::EopPhysicalSpool != op_id &&
COperator::EopPhysicalSort != op_id)
{
// no materialize needed
return cost;
}
// a materialize (spool) node is added during DXL->PlStmt translation,
// we need to add the cost of writing the tuples to disk
const CDouble dMaterializeCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpMaterializeCostUnit)
->Get();
GPOS_ASSERT(0 < dMaterializeCostUnit);
CCost costSpooling = CCost(pci->NumRebinds() * (pci->Rows() * pci->Width() *
dMaterializeCostUnit));
return cost + costSpooling;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostCTEConsumer
//
// @doc:
// Cost of CTE consumer
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostCTEConsumer(CMemoryPool *, // mp
CExpressionHandle &
#ifdef GPOS_DEBUG
exprhdl
#endif // GPOS_DEBUG
,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalCTEConsumer == exprhdl.Pop()->Eopid());
const CDouble dInitScan =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
->Get();
const CDouble dTableScanCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpTableScanCostUnit)
->Get();
const CDouble dOutputTupCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpOutputTupCostUnit)
->Get();
GPOS_ASSERT(0 < dOutputTupCostUnit);
GPOS_ASSERT(0 < dTableScanCostUnit);
return CCost(pci->NumRebinds() *
(dInitScan + pci->Rows() * pci->Width() *
(dTableScanCostUnit + dOutputTupCostUnit)));
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostConstTableGet
//
// @doc:
// Cost of const table get
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostConstTableGet(CMemoryPool *, // mp
CExpressionHandle &
#ifdef GPOS_DEBUG
exprhdl
#endif // GPOS_DEBUG
,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalConstTableGet == exprhdl.Pop()->Eopid());
return CCost(pci->NumRebinds() *
CostTupleProcessing(pci->Rows(), pci->Width(),
pcmgpdb->GetCostModelParams())
.Get());
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostDML
//
// @doc:
// Cost of DML
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostDML(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb, const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalDML == exprhdl.Pop()->Eopid());
const CDouble dTupUpdateBandwidth =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpTupUpdateBandwith)
->Get();
GPOS_ASSERT(0 < dTupUpdateBandwidth);
const DOUBLE num_rows_outer = pci->PdRows()[0];
const DOUBLE dWidthOuter = pci->GetWidth()[0];
CCost costLocal = CCost(pci->NumRebinds() * (num_rows_outer * dWidthOuter) /
dTupUpdateBandwidth);
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostScalarAgg
//
// @doc:
// Cost of scalar agg
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostScalarAgg(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalScalarAgg == exprhdl.Pop()->Eopid());
const DOUBLE num_rows_outer = pci->PdRows()[0];
const DOUBLE dWidthOuter = pci->GetWidth()[0];
// get the number of aggregate columns
const ULONG ulAggCols = exprhdl.DeriveUsedColumns(1)->Size();
// get the number of aggregate functions
const ULONG ulAggFunctions = exprhdl.PexprScalarRepChild(1)->Arity();
const CDouble dHashAggInputTupWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHashAggInputTupWidthCostUnit)
->Get();
GPOS_ASSERT(0 < dHashAggInputTupWidthCostUnit);
// scalarAgg cost is correlated with rows and width of the input tuples and the number of columns used in aggregate
// It also depends on the complexity of the aggregate algorithm, which is hard to model yet shared by all the aggregate
// operators, thus we ignore this part of cost for all.
CCost costLocal = CCost(pci->NumRebinds() *
(num_rows_outer * dWidthOuter * ulAggCols *
ulAggFunctions * dHashAggInputTupWidthCostUnit));
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostStreamAgg
//
// @doc:
// Cost of stream agg
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostStreamAgg(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
#ifdef GPOS_DEBUG
COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
GPOS_ASSERT(COperator::EopPhysicalStreamAgg == op_id ||
COperator::EopPhysicalStreamAggDeduplicate == op_id);
#endif // GPOS_DEBUG
const CDouble dHashAggOutputTupWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHashAggOutputTupWidthCostUnit)
->Get();
const CDouble dTupDefaultProcCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpTupDefaultProcCostUnit)
->Get();
GPOS_ASSERT(0 < dHashAggOutputTupWidthCostUnit);
GPOS_ASSERT(0 < dTupDefaultProcCostUnit);
DOUBLE num_rows_outer = pci->PdRows()[0];
DOUBLE dWidthOuter = pci->GetWidth()[0];
// streamAgg cost is correlated with rows and width of input tuples and rows and width of output tuples
CCost costLocal =
CCost(pci->NumRebinds() *
(num_rows_outer * dWidthOuter * dTupDefaultProcCostUnit +
pci->Rows() * pci->Width() * dHashAggOutputTupWidthCostUnit));
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostSequence
//
// @doc:
// Cost of sequence
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostSequence(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalSequence == exprhdl.Pop()->Eopid());
CCost costLocal = CCost(pci->NumRebinds() *
CostTupleProcessing(pci->Rows(), pci->Width(),
pcmgpdb->GetCostModelParams())
.Get());
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostSort
//
// @doc:
// Cost of sort
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostSort(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb, const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalSort == exprhdl.Pop()->Eopid());
// log operation below
const CDouble rows = CDouble(std::max(1.0, pci->Rows()));
const CDouble num_rebinds = CDouble(pci->NumRebinds());
const CDouble width = CDouble(pci->Width());
const CDouble dSortTupWidthCost =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpSortTupWidthCostUnit)
->Get();
GPOS_ASSERT(0 < dSortTupWidthCost);
// sort cost is correlated with the number of rows and width of input tuples. We use n*log(n) for sorting complexity.
CCost costLocal =
CCost(num_rebinds * (rows * rows.Log2() * width * dSortTupWidthCost));
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostTVF
//
// @doc:
// Cost of table valued function
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostTVF(CMemoryPool *, // mp
CExpressionHandle &
#ifdef GPOS_DEBUG
exprhdl
#endif // GPOS_DEBUG
,
const CCostModelGPDB *pcmgpdb, const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalTVF == exprhdl.Pop()->Eopid());
return CCost(pci->NumRebinds() *
CostTupleProcessing(pci->Rows(), pci->Width(),
pcmgpdb->GetCostModelParams())
.Get());
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostUnionAll
//
// @doc:
// Cost of UnionAll
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostUnionAll(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(nullptr != CPhysicalUnionAll::PopConvert(exprhdl.Pop()));
if (COperator::EopPhysicalParallelUnionAll == exprhdl.Pop()->Eopid())
{
return CostMaxChild(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
}
CCost costLocal = CCost(pci->NumRebinds() *
CostTupleProcessing(pci->Rows(), pci->Width(),
pcmgpdb->GetCostModelParams())
.Get());
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostHashAgg
//
// @doc:
// Cost of hash agg
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostHashAgg(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
#ifdef GPOS_DEBUG
COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
GPOS_ASSERT(COperator::EopPhysicalHashAgg == op_id ||
COperator::EopPhysicalHashAggDeduplicate == op_id);
#endif // GPOS_DEBUG
DOUBLE num_rows_outer = pci->PdRows()[0];
// A local hash agg may stream partial aggregates to global agg when it's hash table is full to avoid spilling.
// This is dertermined by the order of tuples received by local agg. In the worst case, the local hash agg may
// see a tuple from each different group until its hash table fills up all available memory, and hence it produces
// tuples as many as its input size. On the other hand, in the best case, the local agg may receive tuples sorted
// by grouping columns, which allows it to complete all local aggregation in memory and produce exactly tuples as
// the number of groups.
//
// Since we do not know the order of tuples received by local hash agg, we assume the number of output rows from local
// agg is the average between input and output rows
// the cardinality out as (rows + num_rows_outer)/2 to increase the local hash agg cost
DOUBLE rows = pci->Rows();
CPhysicalHashAgg *popAgg = CPhysicalHashAgg::PopConvert(exprhdl.Pop());
if ((COperator::EgbaggtypeLocal == popAgg->Egbaggtype()) &&
popAgg->FGeneratesDuplicates())
{
rows = (rows + num_rows_outer) / 2.0;
}
// get the number of grouping columns
const ULONG ulGrpCols = CPhysicalHashAgg::PopConvert(exprhdl.Pop())
->PdrgpcrGroupingCols()
->Size();
const CDouble dHashAggInputTupColumnCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHashAggInputTupColumnCostUnit)
->Get();
const CDouble dHashAggInputTupWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHashAggInputTupWidthCostUnit)
->Get();
const CDouble dHashAggOutputTupWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHashAggOutputTupWidthCostUnit)
->Get();
GPOS_ASSERT(0 < dHashAggInputTupColumnCostUnit);
GPOS_ASSERT(0 < dHashAggInputTupWidthCostUnit);
GPOS_ASSERT(0 < dHashAggOutputTupWidthCostUnit);
// hashAgg cost contains three parts: build hash table, aggregate tuples, and output tuples.
// 1. build hash table is correlated with the number of rows and width of input tuples and the number of columns used.
// 2. cost of aggregate tuples depends on the complexity of aggregation algorithm and thus is ignored.
// 3. cost of output tuples is correlated with rows and width of returning tuples.
CCost costLocal =
CCost(pci->NumRebinds() *
(num_rows_outer * ulGrpCols * dHashAggInputTupColumnCostUnit +
num_rows_outer * ulGrpCols * pci->Width() *
dHashAggInputTupWidthCostUnit +
rows * pci->Width() * dHashAggOutputTupWidthCostUnit));
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostHashJoin
//
// @doc:
// Cost of hash join
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostHashJoin(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
#ifdef GPOS_DEBUG
COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
GPOS_ASSERT(COperator::EopPhysicalInnerHashJoin == op_id ||
COperator::EopPhysicalLeftSemiHashJoin == op_id ||
COperator::EopPhysicalLeftAntiSemiHashJoin == op_id ||
COperator::EopPhysicalLeftAntiSemiHashJoinNotIn == op_id ||
COperator::EopPhysicalLeftOuterHashJoin == op_id ||
COperator::EopPhysicalRightOuterHashJoin == op_id);
#endif // GPOS_DEBUG
const DOUBLE num_rows_outer = pci->PdRows()[0];
const DOUBLE dWidthOuter = pci->GetWidth()[0];
const DOUBLE dRowsInner = pci->PdRows()[1];
const DOUBLE dWidthInner = pci->GetWidth()[1];
const CDouble dHJHashTableInitCostFactor =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHJHashTableInitCostFactor)
->Get();
const CDouble dHJHashTableColumnCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHJHashTableColumnCostUnit)
->Get();
const CDouble dHJHashTableWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHJHashTableWidthCostUnit)
->Get();
const CDouble dJoinFeedingTupColumnCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupColumnCostUnit)
->Get();
const CDouble dJoinFeedingTupWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupWidthCostUnit)
->Get();
const CDouble dHJHashingTupWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHJHashingTupWidthCostUnit)
->Get();
const CDouble dJoinOutputTupCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinOutputTupCostUnit)
->Get();
const CDouble dHJSpillingMemThreshold =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpHJSpillingMemThreshold)
->Get();
const CDouble dHJFeedingTupColumnSpillingCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(
CCostModelParamsGPDB::EcpHJFeedingTupColumnSpillingCostUnit)
->Get();
const CDouble dHJFeedingTupWidthSpillingCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(
CCostModelParamsGPDB::EcpHJFeedingTupWidthSpillingCostUnit)
->Get();
const CDouble dHJHashingTupWidthSpillingCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(
CCostModelParamsGPDB::EcpHJHashingTupWidthSpillingCostUnit)
->Get();
const CDouble dPenalizeHJSkewUpperLimit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpPenalizeHJSkewUpperLimit)
->Get();
GPOS_ASSERT(0 < dHJHashTableInitCostFactor);
GPOS_ASSERT(0 < dHJHashTableColumnCostUnit);
GPOS_ASSERT(0 < dHJHashTableWidthCostUnit);
GPOS_ASSERT(0 < dJoinFeedingTupColumnCostUnit);
GPOS_ASSERT(0 < dJoinFeedingTupWidthCostUnit);
GPOS_ASSERT(0 < dHJHashingTupWidthCostUnit);
GPOS_ASSERT(0 < dJoinOutputTupCostUnit);
GPOS_ASSERT(0 < dHJSpillingMemThreshold);
GPOS_ASSERT(0 < dHJFeedingTupColumnSpillingCostUnit);
GPOS_ASSERT(0 < dHJFeedingTupWidthSpillingCostUnit);
GPOS_ASSERT(0 < dHJHashingTupWidthSpillingCostUnit);
GPOS_ASSERT(0 < dPenalizeHJSkewUpperLimit);
// get the number of columns used in join condition
CExpression *pexprJoinCond = exprhdl.PexprScalarRepChild(2);
CColRefSet *pcrsUsed = pexprJoinCond->DeriveUsedColumns();
const ULONG ulColsUsed = pcrsUsed->Size();
// TODO 2014-03-14
// currently, we hard coded a spilling memory threshold for judging whether hash join spills or not
// In the future, we should calculate it based on the number of memory-intensive operators and statement memory available
CCost costLocal(0);
// inner tuples fit in memory
if (dRowsInner * dWidthInner <= dHJSpillingMemThreshold)
{
// hash join cost contains four parts:
// 1. build hash table with inner tuples. This part is correlated with rows and width of
// inner tuples and the number of columns used in join condition.
// 2. feeding outer tuples. This part is correlated with rows and width of outer tuples
// and the number of columns used.
// 3. matching inner tuples. This part is correlated with rows and width of inner tuples.
// 4. output tuples. This part is correlated with outer rows and width of the join result.
costLocal = CCost(
pci->NumRebinds() *
(
// cost of building hash table
dRowsInner * (ulColsUsed * dHJHashTableColumnCostUnit +
dWidthInner * dHJHashTableWidthCostUnit) +
// cost of feeding outer tuples
ulColsUsed * num_rows_outer * dJoinFeedingTupColumnCostUnit +
dWidthOuter * num_rows_outer * dJoinFeedingTupWidthCostUnit +
// cost of matching inner tuples
dWidthInner * dRowsInner * dHJHashingTupWidthCostUnit +
// cost of output tuples
pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));
}
else
{
// inner tuples spill
// hash join cost if spilling is the same as the non-spilling case, except that
// parameter values are different.
costLocal = CCost(
pci->NumRebinds() *
(dHJHashTableInitCostFactor +
dRowsInner * (ulColsUsed * dHJHashTableColumnCostUnit +
dWidthInner * dHJHashTableWidthCostUnit) +
ulColsUsed * num_rows_outer * dHJFeedingTupColumnSpillingCostUnit +
dWidthOuter * num_rows_outer * dHJFeedingTupWidthSpillingCostUnit +
dWidthInner * dRowsInner * dHJHashingTupWidthSpillingCostUnit +
pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));
}
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
CDouble skew_ratio = 1;
ULONG arity = exprhdl.Arity();
// Hashjoin with skewed HashRedistribute below them are expensive
// find out if there is a skewed redistribute child of this HashJoin.
if (!GPOS_FTRACE(EopttracePenalizeSkewedHashJoin))
{
for (ULONG ul = 0; ul < arity - 1; ++ul)
{
COperator *popChild = exprhdl.Pop(ul);
if (nullptr == popChild ||
COperator::EopPhysicalMotionHashDistribute != popChild->Eopid())
{
continue;
}
CPhysicalMotion *motion = CPhysicalMotion::PopConvert(popChild);
CColRefSet *columns = motion->Pds()->PcrsUsed(mp);
// we decide if there is a skew by calculating the NDVs of the HashRedistribute
CDouble ndv = 1.0;
CColRefSetIter iter(*columns);
while (iter.Advance())
{
CColRef *colref = iter.Pcr();
ndv = ndv * pci->Pcstats(ul)->GetNDVs(colref);
}
// if the NDVs are less than number of segments then there is definitely
// a skew. NDV < 1 implies no stats exist for the columns involved. So we don't
// want to take any decision.
// In case of a skew, penalize the local cost of HashJoin with a
// skew ratio = (num of segments)/ndv
if (ndv < pcmgpdb->UlHosts() && (ndv >= 1))
{
CDouble sk = pcmgpdb->UlHosts() / ndv;
skew_ratio = CDouble(std::max(sk.Get(), skew_ratio.Get()));
}
columns->Release();
}
}
// if we end up penalizing redistribute too much, we will start getting gather motions
// which are not necessarily a good idea. So we maintain a upper limit of skew.
skew_ratio =
CDouble(std::min(dPenalizeHJSkewUpperLimit.Get(), skew_ratio.Get()));
return costChild + CCost(costLocal.Get() * skew_ratio);
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostMergeJoin
//
// @doc:
// Cost of merge join
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostMergeJoin(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
#ifdef GPOS_DEBUG
COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
GPOS_ASSERT(COperator::EopPhysicalFullMergeJoin == op_id);
#endif // GPOS_DEBUG
const DOUBLE num_rows_outer = pci->PdRows()[0];
const DOUBLE dWidthOuter = pci->GetWidth()[0];
const DOUBLE dRowsInner = pci->PdRows()[1];
const DOUBLE dWidthInner = pci->GetWidth()[1];
const CDouble dJoinFeedingTupColumnCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupColumnCostUnit)
->Get();
const CDouble dJoinFeedingTupWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupWidthCostUnit)
->Get();
const CDouble dJoinOutputTupCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinOutputTupCostUnit)
->Get();
const CDouble dFilterColCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpFilterColCostUnit)
->Get();
const CDouble dOutputTupCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpOutputTupCostUnit)
->Get();
GPOS_ASSERT(0 < dJoinFeedingTupColumnCostUnit);
GPOS_ASSERT(0 < dJoinFeedingTupWidthCostUnit);
GPOS_ASSERT(0 < dJoinOutputTupCostUnit);
GPOS_ASSERT(0 < dFilterColCostUnit);
GPOS_ASSERT(0 < dOutputTupCostUnit);
// get the number of columns used in join condition
CExpression *pexprJoinCond = exprhdl.PexprScalarRepChild(2);
CColRefSet *pcrsUsed = pexprJoinCond->DeriveUsedColumns();
const ULONG ulColsUsed = pcrsUsed->Size();
// We assume for costing, that the outer tuples are unique. This means that
// we will never have to rescan a portion of the inner side.
CCost costLocal = CCost(
pci->NumRebinds() *
(
// feeding cost of outer
ulColsUsed * num_rows_outer * dJoinFeedingTupColumnCostUnit +
dWidthOuter * num_rows_outer * dJoinFeedingTupWidthCostUnit +
// cost of matching
(dRowsInner + num_rows_outer) * ulColsUsed * dFilterColCostUnit +
// cost of extracting matched inner side
pci->Rows() * dWidthInner * dOutputTupCostUnit +
// cost of output tuples
pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
return costChild + costLocal;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostIndexNLJoin
//
// @doc:
// Cost of inner or outer index-nljoin
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostIndexNLJoin(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(
COperator::EopPhysicalInnerIndexNLJoin == exprhdl.Pop()->Eopid() ||
COperator::EopPhysicalLeftOuterIndexNLJoin == exprhdl.Pop()->Eopid());
const DOUBLE num_rows_outer = pci->PdRows()[0];
const DOUBLE dWidthOuter = pci->GetWidth()[0];
const CDouble dJoinFeedingTupColumnCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupColumnCostUnit)
->Get();
const CDouble dJoinFeedingTupWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupWidthCostUnit)
->Get();
const CDouble dJoinOutputTupCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinOutputTupCostUnit)
->Get();
GPOS_ASSERT(0 < dJoinFeedingTupColumnCostUnit);
GPOS_ASSERT(0 < dJoinFeedingTupWidthCostUnit);
GPOS_ASSERT(0 < dJoinOutputTupCostUnit);
// get the number of columns used in join condition
CExpression *pexprJoinCond = exprhdl.PexprScalarRepChild(2);
CColRefSet *pcrsUsed = pexprJoinCond->DeriveUsedColumns();
const ULONG ulColsUsed = pcrsUsed->Size();
// cost of Index apply contains three parts:
// 1. feeding outer tuples. This part is correlated with rows and width of outer tuples
// and the number of columns used.
// 2. repetitive index scan of inner side for each feeding tuple. This part of cost is
// calculated and counted in its index scan child node.
// 3. output tuples. This part is correlated with outer rows and width of the join result.
CCost costLocal =
CCost(pci->NumRebinds() *
(
// cost of feeding outer tuples
ulColsUsed * num_rows_outer * dJoinFeedingTupColumnCostUnit +
dWidthOuter * num_rows_outer * dJoinFeedingTupWidthCostUnit +
// cost of output tuples
pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
ULONG risk = pci->Pcstats()->StatsEstimationRisk();
ULONG ulPenalizationFactor = 1;
const CDouble dIndexJoinAllowedRiskThreshold =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpIndexJoinAllowedRiskThreshold)
->Get();
BOOL fInnerJoin =
COperator::EopPhysicalInnerIndexNLJoin == exprhdl.Pop()->Eopid();
// Only apply penalize factor for inner index nestloop join, because we are more confident
// on the cardinality estimation of outer join than inner join. So don't penalize outer join
// cost, otherwise Orca generate bad plan.
if (fInnerJoin && dIndexJoinAllowedRiskThreshold < risk)
{
ulPenalizationFactor = risk;
}
return CCost(ulPenalizationFactor * (costLocal + costChild));
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostNLJoin
//
// @doc:
// Cost of nljoin
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostNLJoin(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(CUtils::FNLJoin(exprhdl.Pop()));
const DOUBLE num_rows_outer = pci->PdRows()[0];
const DOUBLE dWidthOuter = pci->GetWidth()[0];
const DOUBLE dRowsInner = pci->PdRows()[1];
const DOUBLE dWidthInner = pci->GetWidth()[1];
const CDouble dJoinFeedingTupColumnCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupColumnCostUnit)
->Get();
const CDouble dJoinFeedingTupWidthCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinFeedingTupWidthCostUnit)
->Get();
const CDouble dJoinOutputTupCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpJoinOutputTupCostUnit)
->Get();
const CDouble dInitScan =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
->Get();
const CDouble dTableScanCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpTableScanCostUnit)
->Get();
const CDouble dFilterColCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpFilterColCostUnit)
->Get();
const CDouble dOutputTupCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpOutputTupCostUnit)
->Get();
const CDouble dNLJFactor =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpNLJFactor)
->Get();
GPOS_ASSERT(0 < dJoinFeedingTupColumnCostUnit);
GPOS_ASSERT(0 < dJoinFeedingTupWidthCostUnit);
GPOS_ASSERT(0 < dJoinOutputTupCostUnit);
GPOS_ASSERT(0 < dInitScan);
GPOS_ASSERT(0 < dTableScanCostUnit);
GPOS_ASSERT(0 < dFilterColCostUnit);
GPOS_ASSERT(0 < dOutputTupCostUnit);
GPOS_ASSERT(0 < dNLJFactor);
// get the number of columns used in join condition
CExpression *pexprJoinCond = exprhdl.PexprScalarRepChild(2);
CColRefSet *pcrsUsed = pexprJoinCond->DeriveUsedColumns();
const ULONG ulColsUsed = pcrsUsed->Size();
// cost of nested loop join contains three parts:
// 1. feeding outer tuples. This part is correlated with rows and width of outer tuples
// and the number of columns used.
// 2. repetitive scan of inner side for each feeding tuple. This part of cost consists of
// the following:
// a. repetitive scan and filter of the materialized inner side
// b. extract matched inner side tuples
// with the cardinality of outer tuples, rows and width of the materialized inner side.
// 3. output tuples. This part is correlated with outer rows and width of the join result.
CCost costLocal = CCost(
pci->NumRebinds() *
(
// cost of feeding outer tuples
ulColsUsed * num_rows_outer * dJoinFeedingTupColumnCostUnit +
dWidthOuter * num_rows_outer * dJoinFeedingTupWidthCostUnit +
// cost of repetitive table scan of inner side
dInitScan +
num_rows_outer * (
// cost of scan of inner side
dRowsInner * dWidthInner * dTableScanCostUnit +
// cost of filter of inner side
dRowsInner * ulColsUsed * dFilterColCostUnit)
// cost of extracting matched inner side
+ pci->Rows() * dWidthInner * dOutputTupCostUnit +
// cost of output tuples
pci->Rows() * pci->Width() * dJoinOutputTupCostUnit));
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
CCost costTotal = CCost(costLocal + costChild);
// amplify NLJ cost based on NLJ factor and stats estimation risk
// we don't want to penalize index join compared to nested loop join, so we make sure
// that every time index join is penalized, we penalize nested loop join by at least the
// same amount
CDouble dPenalization = dNLJFactor;
const CDouble dRisk(pci->Pcstats()->StatsEstimationRisk());
if (dRisk > dPenalization)
{
const CDouble dIndexJoinAllowedRiskThreshold =
pcmgpdb->GetCostModelParams()
->PcpLookup(
CCostModelParamsGPDB::EcpIndexJoinAllowedRiskThreshold)
->Get();
if (dIndexJoinAllowedRiskThreshold < dRisk)
{
dPenalization = dRisk;
}
}
return CCost(costTotal * dPenalization);
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostMotion
//
// @doc:
// Cost of motion
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostMotion(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
GPOS_ASSERT(COperator::EopPhysicalMotionGather == op_id ||
COperator::EopPhysicalMotionBroadcast == op_id ||
COperator::EopPhysicalMotionHashDistribute == op_id ||
COperator::EopPhysicalMotionRandom == op_id ||
COperator::EopPhysicalMotionRoutedDistribute == op_id);
const DOUBLE num_rows_outer = pci->PdRows()[0];
const DOUBLE dWidthOuter = pci->GetWidth()[0];
// motion cost contains three parts: sending cost, interconnect cost, and receiving cost.
// TODO 2014-03-18
// in current cost model, interconnect cost is tied with receiving cost. Because we
// only have one set calibration results in the dimension of the number of segments.
// Once we calibrate the cost model with different number of segments, I will update
// the function.
CDouble dSendCostUnit(0);
CDouble dRecvCostUnit(0);
CDouble recvCost(0);
CCost costLocal(0);
if (COperator::EopPhysicalMotionBroadcast == op_id)
{
// broadcast cost is amplified by the number of segments
dSendCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpBroadcastSendCostUnit)
->Get();
dRecvCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpBroadcastRecvCostUnit)
->Get();
recvCost =
num_rows_outer * dWidthOuter * pcmgpdb->UlHosts() * dRecvCostUnit;
}
else if (COperator::EopPhysicalMotionHashDistribute == op_id ||
COperator::EopPhysicalMotionRandom == op_id ||
COperator::EopPhysicalMotionRoutedDistribute == op_id)
{
dSendCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpRedistributeSendCostUnit)
->Get();
dRecvCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpRedistributeRecvCostUnit)
->Get();
// Adjust the cost of no-op hashed distribution to correctly reflect that no tuple movement is needed
CPhysicalMotion *pMotion = CPhysicalMotion::PopConvert(exprhdl.Pop());
CDistributionSpec *pds = pMotion->Pds();
if (CDistributionSpec::EdtHashedNoOp == pds->Edt())
{
// promote the plan with redistribution on same distributed columns of base table for parallel append
dSendCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpNoOpCostUnit)
->Get();
dRecvCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpNoOpCostUnit)
->Get();
}
recvCost = pci->Rows() * pci->Width() * dRecvCostUnit;
}
else if (COperator::EopPhysicalMotionGather == op_id)
{
dSendCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpGatherSendCostUnit)
->Get();
dRecvCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpGatherRecvCostUnit)
->Get();
recvCost =
num_rows_outer * dWidthOuter * pcmgpdb->UlHosts() * dRecvCostUnit;
}
GPOS_ASSERT(0 <= dSendCostUnit);
GPOS_ASSERT(0 <= dRecvCostUnit);
costLocal =
CCost(pci->NumRebinds() *
(num_rows_outer * dWidthOuter * dSendCostUnit + recvCost));
if (COperator::EopPhysicalMotionBroadcast == op_id)
{
COptimizerConfig *optimizer_config =
COptCtxt::PoctxtFromTLS()->GetOptimizerConfig();
ULONG broadcast_threshold =
optimizer_config->GetHint()->UlBroadcastThreshold();
if (num_rows_outer > broadcast_threshold)
{
DOUBLE ulPenalizationFactor = 100000000000000.0;
costLocal = CCost(ulPenalizationFactor);
}
}
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostSequenceProject
//
// @doc:
// Cost of sequence project
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostSequenceProject(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalSequenceProject ==
exprhdl.Pop()->Eopid());
const DOUBLE num_rows_outer = pci->PdRows()[0];
const DOUBLE dWidthOuter = pci->GetWidth()[0];
ULONG ulSortCols = 0;
COrderSpecArray *pdrgpos =
CPhysicalSequenceProject::PopConvert(exprhdl.Pop())->Pdrgpos();
const ULONG ulOrderSpecs = pdrgpos->Size();
for (ULONG ul = 0; ul < ulOrderSpecs; ul++)
{
COrderSpec *pos = (*pdrgpos)[ul];
ulSortCols += pos->UlSortColumns();
}
const CDouble dTupDefaultProcCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpTupDefaultProcCostUnit)
->Get();
GPOS_ASSERT(0 < dTupDefaultProcCostUnit);
// we process (sorted window of) input tuples to compute window function values
CCost costLocal =
CCost(pci->NumRebinds() * (ulSortCols * num_rows_outer * dWidthOuter *
dTupDefaultProcCostUnit));
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostIndexScan
//
// @doc:
// Cost of index scan
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostIndexScan(CMemoryPool *, // mp
CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
COperator *pop = exprhdl.Pop();
COperator::EOperatorId op_id = pop->Eopid();
GPOS_ASSERT(COperator::EopPhysicalIndexScan == op_id ||
COperator::EopPhysicalDynamicIndexScan == op_id);
const CDouble dTableWidth =
CPhysicalScan::PopConvert(pop)->PstatsBaseTable()->Width();
const CDouble dIndexFilterCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpIndexFilterCostUnit)
->Get();
const CDouble dIndexScanTupCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpIndexScanTupCostUnit)
->Get();
const CDouble dIndexScanTupRandomFactor =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpIndexScanTupRandomFactor)
->Get();
GPOS_ASSERT(0 < dIndexFilterCostUnit);
GPOS_ASSERT(0 < dIndexScanTupCostUnit);
GPOS_ASSERT(0 < dIndexScanTupRandomFactor);
CDouble dRowsIndex = pci->Rows();
ULONG ulIndexKeys = 1;
if (COperator::EopPhysicalIndexScan == op_id)
{
ulIndexKeys = CPhysicalIndexScan::PopConvert(pop)->Pindexdesc()->Keys();
}
else
{
ulIndexKeys =
CPhysicalDynamicIndexScan::PopConvert(pop)->Pindexdesc()->Keys();
}
// TODO: 2014-02-01
// Add logic to judge if the index column used in the filter is the first key of a multi-key index or not.
// and separate the cost functions for the two cases.
// index scan cost contains two parts: index-column lookup and output tuple cost.
// 1. index-column lookup: correlated with index lookup rows, the number of index columns used in lookup,
// table width and a randomIOFactor.
// 2. output tuple cost: this is handled by the Filter on top of IndexScan, if no Filter exists, we add output cost
// when we sum-up children cost
CDouble dCostPerIndexRow = ulIndexKeys * dIndexFilterCostUnit +
dTableWidth * dIndexScanTupCostUnit;
return CCost(pci->NumRebinds() *
(dRowsIndex * dCostPerIndexRow + dIndexScanTupRandomFactor));
}
CCost
CCostModelGPDB::CostIndexOnlyScan(CMemoryPool *mp GPOS_UNUSED, // mp
CExpressionHandle &exprhdl, //exprhdl
const CCostModelGPDB *pcmgpdb, // pcmgpdb
const SCostingInfo *pci //pci
)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
COperator *pop = exprhdl.Pop();
GPOS_ASSERT(COperator::EopPhysicalIndexOnlyScan == pop->Eopid());
const CDouble dTableWidth =
CPhysicalScan::PopConvert(pop)->PstatsBaseTable()->Width();
const CDouble dIndexFilterCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpIndexFilterCostUnit)
->Get();
const CDouble dIndexScanTupCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpIndexScanTupCostUnit)
->Get();
const CDouble dIndexScanTupRandomFactor =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpIndexScanTupRandomFactor)
->Get();
GPOS_ASSERT(0 < dIndexFilterCostUnit);
GPOS_ASSERT(0 < dIndexScanTupCostUnit);
GPOS_ASSERT(0 < dIndexScanTupRandomFactor);
CDouble dRowsIndex = pci->Rows();
ULONG ulIndexKeys =
CPhysicalIndexOnlyScan::PopConvert(pop)->Pindexdesc()->Keys();
IStatistics *stats =
CPhysicalIndexOnlyScan::PopConvert(pop)->PstatsBaseTable();
// Calculating cost of index-only-scan is identical to index-scan with the
// addition of dPartialVisFrac which indicates the percentage of pages not
// currently marked as all-visible. Planner has similar logic inside
// `cost_index()` to calculate pages fetched from index-only-scan.
CDouble dCostPerIndexRow = ulIndexKeys * dIndexFilterCostUnit +
dTableWidth * dIndexScanTupCostUnit;
CDouble dPartialVisFrac(1);
if (stats->RelPages() != 0)
{
dPartialVisFrac =
1 - (CDouble(stats->RelAllVisible()) / CDouble(stats->RelPages()));
}
return CCost(pci->NumRebinds() *
(dRowsIndex * dCostPerIndexRow +
dIndexScanTupRandomFactor * dPartialVisFrac));
}
CCost
CCostModelGPDB::CostBitmapTableScan(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(
COperator::EopPhysicalBitmapTableScan == exprhdl.Pop()->Eopid() ||
COperator::EopPhysicalDynamicBitmapTableScan == exprhdl.Pop()->Eopid());
CCost result(0.0);
CExpression *pexprIndexCond =
exprhdl.PexprScalarRepChild(1 /*child_index*/);
CColRefSet *pcrsUsed = pexprIndexCond->DeriveUsedColumns();
CColRefSet *outerRefs = exprhdl.DeriveOuterReferences();
CColRefSet *pcrsLocalUsed = GPOS_NEW(mp) CColRefSet(mp, *pcrsUsed);
IMDIndex::EmdindexType indexType = IMDIndex::EmdindSentinel;
if (COperator::EopScalarBitmapIndexProbe == pexprIndexCond->Pop()->Eopid())
{
indexType = CScalarBitmapIndexProbe::PopConvert(pexprIndexCond->Pop())
->Pindexdesc()
->IndexType();
}
BOOL isInPredOnBtreeIndex =
(IMDIndex::EmdindBtree == indexType &&
COperator::EopScalarArrayCmp == (*pexprIndexCond)[0]->Pop()->Eopid());
// subtract outer references from the used colrefs, so we can see
// how many colrefs are used for this table
pcrsLocalUsed->Exclude(outerRefs);
const DOUBLE rows = pci->Rows();
const DOUBLE width = pci->Width();
CDouble dInitRebind =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpBitmapScanRebindCost)
->Get();
if (COperator::EopScalarBitmapIndexProbe !=
pexprIndexCond->Pop()->Eopid() ||
1 < pcrsLocalUsed->Size() ||
(isInPredOnBtreeIndex && rows > 2.0 &&
GPOS_FTRACE(EopttraceLegacyCostModel)))
{
// Child is Bitmap AND/OR, or we use Multi column index or this is an IN predicate
// that's used with the "calibrated" cost model.
// Handling the IN predicate in this code path is to avoid plan regressions from
// earlier versions of the code that treated IN predicates like ORs and therefore
// also handled them in this code path. This is especially noticeable for btree
// indexes that often have a high NDV, because the small/large NDV cost model
// produces very high cost for cases with a higher NDV.
const CDouble dIndexFilterCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpIndexFilterCostUnit)
->Get();
GPOS_ASSERT(0 < dIndexFilterCostUnit);
// check whether the user specified overriding values in gucs
CDouble dInitScan =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
->Get();
GPOS_ASSERT(dInitScan >= 0 && dInitRebind >= 0);
// For now we are trying to cost Bitmap Scan similar to Index Scan. dIndexFilterCostUnit is
// the dominant factor in costing Index Scan so we are using it in our model. Also we are giving
// Bitmap Scan a start up cost similar to Sequential Scan. Note that in this code path we add the
// relatively high dInitScan cost, while in the other code paths below (CostBitmapLargeNDV and
// CostBitmapSmallNDV) we don't. That's something we should look into.
// Conceptually the cost of evaluating index qual is also linear in the
// number of index columns, but we're only accounting for the dominant cost
result =
CCost( // cost for each byte returned by the index scan plus cost for incremental rebinds
pci->NumRebinds() *
(rows * width * dIndexFilterCostUnit + dInitRebind) +
// init cost
dInitScan);
}
else
{
// if the expression is const table get, the pcrsUsed is empty
// so we use minimum value MinDistinct for dNDV in that case.
CDouble dNDV = CHistogram::MinDistinct;
CDouble dNDVThreshold =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpBitmapNDVThreshold)
->Get();
if (rows < 1.0)
{
// if we aren't accessing a row every rebind, then don't charge a cost for those cases where we don't have a row
dNDV = rows;
}
else if (1 == pcrsLocalUsed->Size()) // if you only have one local pred
{
CColRef *pcrIndexCond = pcrsLocalUsed->PcrFirst();
GPOS_ASSERT(nullptr != pcrIndexCond);
// get the num distinct for the rows returned by the predicate
dNDV = pci->Pcstats()->GetNDVs(pcrIndexCond);
// if there's an outerref
if (1 < pcrsUsed->Size() && COperator::EopScalarBitmapIndexProbe ==
pexprIndexCond->Pop()->Eopid())
{
CExpression *pexprScalarCmp = (*pexprIndexCond)[0];
// The index condition contains an outer reference. Adjust
// the NDV to 1, if we compare the colref with a single value
if (CPredicateUtils::IsEqualityOp(pexprScalarCmp))
{
dNDV = 1.0;
}
}
}
if (GPOS_FTRACE(EopttraceLegacyCostModel))
{
// optimizer_cost_model = 'legacy'
if (dNDVThreshold <= dNDV)
{
result = CostBitmapLargeNDV(pcmgpdb, pci, dNDV);
}
else
{
result = CostBitmapSmallNDV(pcmgpdb, pci, dNDV);
}
}
else
{
// optimizer_cost_model = 'calibrated'|'experimental'
CDouble dBitmapIO =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpBitmapIOCostSmallNDV)
->Get();
CDouble c5_dInitScan =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
->Get();
CDouble c3_dBitmapPageCost =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpBitmapPageCost)
->Get();
BOOL isAOTable = CPhysicalScan::PopConvert(exprhdl.Pop())
->Ptabdesc()
->IsAORowOrColTable();
// some cost constants determined with the cal_bitmap_test.py script
CDouble c1_cost_per_row(0.03);
CDouble c2_cost_per_byte(0.0001);
CDouble bitmap_union_cost_per_distinct_value(0.000027);
CDouble init_cost_advantage_for_bitmap_scan(0.9);
if (IMDIndex::EmdindBtree == indexType)
{
// btree indexes are not sensitive to the NDV, since they don't have any bitmaps
c3_dBitmapPageCost = 0.0;
}
// Give the index scan a small initial advantage over the table scan, so we use indexes
// for small tables - this should avoid having table scan and index scan costs being
// very close together for many small queries.
c5_dInitScan = c5_dInitScan * init_cost_advantage_for_bitmap_scan;
// The numbers below were experimentally determined using regression analysis in the cal_bitmap_test.py script
// The following dSizeCost is in the form C1 * rows + C2 * rows * width. This is because the width should have
// significantly less weight than rows as the execution time does not grow as fast in regards to width
CDouble dSizeCost = dBitmapIO * (rows * c1_cost_per_row +
rows * width * c2_cost_per_byte);
CDouble bitmapUnionCost = 0;
if (!isAOTable && indexType == IMDIndex::EmdindBitmap && dNDV > 1.0)
{
CDouble baseTableRows = CPhysicalScan::PopConvert(exprhdl.Pop())
->PstatsBaseTable()
->Rows();
// for bitmap index scans on heap tables, we found that there is an additional cost
// associated with unioning them that is proportional to the number of bitmaps involved
// (dNDV-1) times the width of the bitmap (proportional to the number of rows in the table)
bitmapUnionCost = std::max(0.0, dNDV.Get() - 1.0) *
baseTableRows *
bitmap_union_cost_per_distinct_value;
}
result = CCost(pci->NumRebinds() *
(dSizeCost + dNDV * c3_dBitmapPageCost +
dInitRebind + bitmapUnionCost) +
c5_dInitScan);
}
}
pcrsLocalUsed->Release();
return result;
}
CCost
CCostModelGPDB::CostBitmapSmallNDV(const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci, CDouble dNDV)
{
const DOUBLE rows = pci->Rows();
const DOUBLE width = pci->Width();
CDouble dSize = (rows * width) * 0.001;
CDouble dBitmapIO =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpBitmapIOCostSmallNDV)
->Get();
CDouble dBitmapPageCost =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpBitmapPageCostSmallNDV)
->Get();
CDouble effectiveNDV = dNDV;
if (rows < 1.0)
{
// if we aren't accessing a row every rebind, then don't charge a cost for those cases where we don't have a row
effectiveNDV = rows;
}
return CCost(pci->NumRebinds() *
(dBitmapIO * dSize + dBitmapPageCost * effectiveNDV));
}
CCost
CCostModelGPDB::CostBitmapLargeNDV(const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci, CDouble dNDV)
{
const DOUBLE rows = pci->Rows();
const DOUBLE width = pci->Width();
CDouble dSize = (rows * width * dNDV) * 0.001;
CDouble dBitmapIO =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpBitmapIOCostLargeNDV)
->Get();
CDouble dBitmapPageCost =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpBitmapPageCostLargeNDV)
->Get();
return CCost(pci->NumRebinds() *
(dBitmapIO * dSize + dBitmapPageCost * dNDV));
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostScan
//
// @doc:
// Cost of scan
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostScan(CMemoryPool *, // mp
CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb, const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
COperator *pop = exprhdl.Pop();
COperator::EOperatorId op_id = pop->Eopid();
GPOS_ASSERT(COperator::EopPhysicalTableScan == op_id ||
COperator::EopPhysicalDynamicTableScan == op_id ||
COperator::EopPhysicalExternalScan == op_id);
const CDouble dInitScan =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpInitScanFactor)
->Get();
const CDouble dTableWidth =
CPhysicalScan::PopConvert(pop)->PstatsBaseTable()->Width();
// Get total rows for each host to scan
const CDouble dTableScanCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpTableScanCostUnit)
->Get();
GPOS_ASSERT(0 < dTableScanCostUnit);
switch (op_id)
{
case COperator::EopPhysicalTableScan:
case COperator::EopPhysicalDynamicTableScan:
case COperator::EopPhysicalExternalScan:
// table scan cost considers only retrieving tuple cost,
// since we scan the entire table here, the cost is correlated with table rows and table width,
// since Scan's parent operator may be a filter that will be pushed into Scan node in GPDB plan,
// we add Scan output tuple cost in the parent operator and not here
return CCost(
pci->NumRebinds() *
(dInitScan + pci->Rows() * dTableWidth * dTableScanCostUnit));
default:
GPOS_ASSERT(!"invalid index scan");
return CCost(0);
}
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::CostFilter
//
// @doc:
// Cost of filter
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::CostFilter(CMemoryPool *mp, CExpressionHandle &exprhdl,
const CCostModelGPDB *pcmgpdb,
const SCostingInfo *pci)
{
GPOS_ASSERT(nullptr != pcmgpdb);
GPOS_ASSERT(nullptr != pci);
GPOS_ASSERT(COperator::EopPhysicalFilter == exprhdl.Pop()->Eopid());
const DOUBLE dInput = pci->PdRows()[0];
const ULONG ulFilterCols = exprhdl.DeriveUsedColumns(1)->Size();
const CDouble dFilterColCostUnit =
pcmgpdb->GetCostModelParams()
->PcpLookup(CCostModelParamsGPDB::EcpFilterColCostUnit)
->Get();
GPOS_ASSERT(0 < dFilterColCostUnit);
// filter cost is correlated with the input rows and the number of filter columns.
CCost costLocal = CCost(dInput * ulFilterCols * dFilterColCostUnit);
costLocal = CCost(costLocal.Get() * pci->NumRebinds());
CCost costChild =
CostChildren(mp, exprhdl, pci, pcmgpdb->GetCostModelParams());
return costLocal + costChild;
}
//---------------------------------------------------------------------------
// @function:
// CCostModelGPDB::Cost
//
// @doc:
// Main driver
//
//---------------------------------------------------------------------------
CCost
CCostModelGPDB::Cost(
CExpressionHandle &exprhdl, // handle gives access to expression properties
const SCostingInfo *pci) const
{
GPOS_ASSERT(nullptr != pci);
COperator::EOperatorId op_id = exprhdl.Pop()->Eopid();
if (op_id == COperator::EopPhysicalComputeScalar)
{
return CostComputeScalar(m_mp, exprhdl, pci, m_cost_model_params, this);
}
if (FUnary(op_id))
{
return CostUnary(m_mp, exprhdl, pci, m_cost_model_params);
}
switch (op_id)
{
default:
{
// FIXME: macro this?
__builtin_unreachable();
}
case COperator::EopPhysicalTableScan:
case COperator::EopPhysicalDynamicTableScan:
case COperator::EopPhysicalExternalScan:
{
return CostScan(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalFilter:
{
return CostFilter(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalIndexOnlyScan:
{
return CostIndexOnlyScan(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalIndexScan:
case COperator::EopPhysicalDynamicIndexScan:
{
return CostIndexScan(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalBitmapTableScan:
case COperator::EopPhysicalDynamicBitmapTableScan:
{
return CostBitmapTableScan(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalSequenceProject:
{
return CostSequenceProject(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalCTEProducer:
{
return CostCTEProducer(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalCTEConsumer:
{
return CostCTEConsumer(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalConstTableGet:
{
return CostConstTableGet(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalDML:
{
return CostDML(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalHashAgg:
case COperator::EopPhysicalHashAggDeduplicate:
{
return CostHashAgg(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalScalarAgg:
{
return CostScalarAgg(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalStreamAgg:
case COperator::EopPhysicalStreamAggDeduplicate:
{
return CostStreamAgg(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalSequence:
{
return CostSequence(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalSort:
{
return CostSort(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalTVF:
{
return CostTVF(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalSerialUnionAll:
case COperator::EopPhysicalParallelUnionAll:
{
return CostUnionAll(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalInnerHashJoin:
case COperator::EopPhysicalLeftSemiHashJoin:
case COperator::EopPhysicalLeftAntiSemiHashJoin:
case COperator::EopPhysicalLeftAntiSemiHashJoinNotIn:
case COperator::EopPhysicalLeftOuterHashJoin:
case COperator::EopPhysicalRightOuterHashJoin:
{
return CostHashJoin(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalInnerIndexNLJoin:
case COperator::EopPhysicalLeftOuterIndexNLJoin:
{
return CostIndexNLJoin(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalMotionGather:
case COperator::EopPhysicalMotionBroadcast:
case COperator::EopPhysicalMotionHashDistribute:
case COperator::EopPhysicalMotionRandom:
case COperator::EopPhysicalMotionRoutedDistribute:
{
return CostMotion(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalInnerNLJoin:
case COperator::EopPhysicalLeftSemiNLJoin:
case COperator::EopPhysicalLeftAntiSemiNLJoin:
case COperator::EopPhysicalLeftAntiSemiNLJoinNotIn:
case COperator::EopPhysicalLeftOuterNLJoin:
case COperator::EopPhysicalCorrelatedInnerNLJoin:
case COperator::EopPhysicalCorrelatedLeftOuterNLJoin:
case COperator::EopPhysicalCorrelatedLeftSemiNLJoin:
case COperator::EopPhysicalCorrelatedInLeftSemiNLJoin:
case COperator::EopPhysicalCorrelatedLeftAntiSemiNLJoin:
case COperator::EopPhysicalCorrelatedNotInLeftAntiSemiNLJoin:
{
return CostNLJoin(m_mp, exprhdl, this, pci);
}
case COperator::EopPhysicalFullMergeJoin:
{
return CostMergeJoin(m_mp, exprhdl, this, pci);
}
}
}
// EOF
相关信息
相关文章
0
赞
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
8、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦