greenplumn execnodes 源码

2022-08-18
浏览 (364)

greenplumn execnodes 代码

文件路径：/src/include/nodes/execnodes.h

/*-------------------------------------------------------------------------
 *
 * execnodes.h
 *	  definitions for executor state nodes
 *
 *
 * Portions Copyright (c) 2005-2009, Greenplum inc
 * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates.
 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * src/include/nodes/execnodes.h
 *
 *-------------------------------------------------------------------------
 */
#ifndef EXECNODES_H
#define EXECNODES_H

#include "access/tupconvert.h"
#include "executor/instrument.h"
#include "lib/pairingheap.h"
#include "nodes/params.h"
#include "nodes/parsenodes.h"
#include "nodes/plannodes.h"
#include "partitioning/partdefs.h"
#include "utils/hsearch.h"
#include "utils/queryenvironment.h"
#include "utils/reltrigger.h"
#include "utils/sharedtuplestore.h"
#include "utils/snapshot.h"
#include "utils/sortsupport.h"
#include "utils/tuplestore.h"
#include "utils/tuplesort.h"
#include "nodes/tidbitmap.h"
#include "storage/condition_variable.h"


struct PlanState;				/* forward references in this file */
struct PartitionRoutingInfo;
struct ParallelHashJoinState;
struct ExecRowMark;
struct ExprState;
struct ExprContext;
struct RangeTblEntry;			/* avoid including parsenodes.h here */
struct ExprEvalStep;			/* avoid including execExpr.h everywhere */
struct CopyMultiInsertBuffer;


/* ----------------
 *		ExprState node
 *
 * ExprState is the top-level node for expression evaluation.
 * It contains instructions (in ->steps) to evaluate the expression.
 * ----------------
 */
typedef Datum (*ExprStateEvalFunc) (struct ExprState *expression,
									struct ExprContext *econtext,
									bool *isNull);

/* Bits in ExprState->flags (see also execExpr.h for private flag bits): */
/* expression is for use with ExecQual() */
#define EEO_FLAG_IS_QUAL					(1 << 0)

typedef struct ExprState
{
	Node		tag;

	uint8		flags;			/* bitmask of EEO_FLAG_* bits, see above */

	/*
	 * Storage for result value of a scalar expression, or for individual
	 * column results within expressions built by ExecBuildProjectionInfo().
	 */
#define FIELDNO_EXPRSTATE_RESNULL 2
	bool		resnull;
#define FIELDNO_EXPRSTATE_RESVALUE 3
	Datum		resvalue;

	/*
	 * If projecting a tuple result, this slot holds the result; else NULL.
	 */
#define FIELDNO_EXPRSTATE_RESULTSLOT 4
	TupleTableSlot *resultslot;

	/*
	 * Instructions to compute expression's return value.
	 */
	struct ExprEvalStep *steps;

	/*
	 * Function that actually evaluates the expression.  This can be set to
	 * different values depending on the complexity of the expression.
	 */
	ExprStateEvalFunc evalfunc;

	/* original expression tree, for debugging only */
	Expr	   *expr;

	/* private state for an evalfunc */
	void	   *evalfunc_private;

	/*
	 * XXX: following fields only needed during "compilation" (ExecInitExpr);
	 * could be thrown away afterwards.
	 */

	int			steps_len;		/* number of steps currently */
	int			steps_alloc;	/* allocated length of steps array */

#define FIELDNO_EXPRSTATE_PARENT 11
	struct PlanState *parent;	/* parent PlanState node, if any */
	ParamListInfo ext_params;	/* for compiling PARAM_EXTERN nodes */

	Datum	   *innermost_caseval;
	bool	   *innermost_casenull;

	Datum	   *innermost_domainval;
	bool	   *innermost_domainnull;
} ExprState;

/*
 * partition selector ids start from 1. Sometimes we use 0 to initialize variables
 */
#define InvalidPartitionSelectorId  0

struct CdbExplain_ShowStatCtx;          /* private, in "cdb/cdbexplain.c" */
struct ChunkTransportState;             /* #include "cdb/cdbinterconnect.h" */
struct StringInfoData;                  /* #include "lib/stringinfo.h" */
struct MemTupleBinding;
struct MemTupleData;
struct HeapScanDescData;
struct SliceTable;

/* ----------------
 *	  IndexInfo information
 *
 *		this struct holds the information needed to construct new index
 *		entries for a particular index.  Used for both index_build and
 *		retail creation of index entries.
 *
 *		NumIndexAttrs		total number of columns in this index
 *		NumIndexKeyAttrs	number of key columns in index
 *		IndexAttrNumbers	underlying-rel attribute numbers used as keys
 *							(zeroes indicate expressions). It also contains
 * 							info about included columns.
 *		Expressions			expr trees for expression entries, or NIL if none
 *		ExpressionsState	exec state for expressions, or NIL if none
 *		Predicate			partial-index predicate, or NIL if none
 *		PredicateState		exec state for predicate, or NIL if none
 *		ExclusionOps		Per-column exclusion operators, or NULL if none
 *		ExclusionProcs		Underlying function OIDs for ExclusionOps
 *		ExclusionStrats		Opclass strategy numbers for ExclusionOps
 *		UniqueOps			These are like Exclusion*, but for unique indexes
 *		UniqueProcs
 *		UniqueStrats
 *		Unique				is it a unique index?
 *		ReadyForInserts		is it valid for inserts?
 *		Concurrent			are we doing a concurrent index build?
 *		BrokenHotChain		did we detect any broken HOT chains?
 *		ParallelWorkers		# of workers requested (excludes leader)
 *		Am					Oid of index AM
 *		AmCache				private cache area for index AM
 *		Context				memory context holding this IndexInfo
 *
 * ii_Concurrent, ii_BrokenHotChain, and ii_ParallelWorkers are used only
 * during index build; they're conventionally zeroed otherwise.
 * ----------------
 */
typedef struct IndexInfo
{
	NodeTag		type;
	int			ii_NumIndexAttrs;	/* total number of columns in index */
	int			ii_NumIndexKeyAttrs;	/* number of key columns in index */
	AttrNumber	ii_IndexAttrNumbers[INDEX_MAX_KEYS];
	List	   *ii_Expressions; /* list of Expr */
	List	   *ii_ExpressionsState;	/* list of ExprState */
	List	   *ii_Predicate;	/* list of Expr */
	ExprState  *ii_PredicateState;
	Oid		   *ii_ExclusionOps;	/* array with one entry per column */
	Oid		   *ii_ExclusionProcs;	/* array with one entry per column */
	uint16	   *ii_ExclusionStrats; /* array with one entry per column */
	Oid		   *ii_UniqueOps;	/* array with one entry per column */
	Oid		   *ii_UniqueProcs; /* array with one entry per column */
	uint16	   *ii_UniqueStrats;	/* array with one entry per column */
	bool		ii_Unique;
	bool		ii_ReadyForInserts;
	bool		ii_Concurrent;
	bool		ii_BrokenHotChain;
	int			ii_ParallelWorkers;
	Oid			ii_Am;
	void	   *ii_AmCache;
	MemoryContext ii_Context;
} IndexInfo;

/* ----------------
 *	  ExprContext_CB
 *
 *		List of callbacks to be called at ExprContext shutdown.
 * ----------------
 */
typedef void (*ExprContextCallbackFunction) (Datum arg);

typedef struct ExprContext_CB
{
	struct ExprContext_CB *next;
	ExprContextCallbackFunction function;
	Datum		arg;
} ExprContext_CB;

/* ----------------
 *	  ExprContext
 *
 *		This class holds the "current context" information
 *		needed to evaluate expressions for doing tuple qualifications
 *		and tuple projections.  For example, if an expression refers
 *		to an attribute in the current inner tuple then we need to know
 *		what the current inner tuple is and so we look at the expression
 *		context.
 *
 *	There are two memory contexts associated with an ExprContext:
 *	* ecxt_per_query_memory is a query-lifespan context, typically the same
 *	  context the ExprContext node itself is allocated in.  This context
 *	  can be used for purposes such as storing function call cache info.
 *	* ecxt_per_tuple_memory is a short-term context for expression results.
 *	  As the name suggests, it will typically be reset once per tuple,
 *	  before we begin to evaluate expressions for that tuple.  Each
 *	  ExprContext normally has its very own per-tuple memory context.
 *
 *	CurrentMemoryContext should be set to ecxt_per_tuple_memory before
 *	calling ExecEvalExpr() --- see ExecEvalExprSwitchContext().
 * ----------------
 */
typedef struct ExprContext
{
	NodeTag		type;

	/* Tuples that Var nodes in expression may refer to */
#define FIELDNO_EXPRCONTEXT_SCANTUPLE 1
	TupleTableSlot *ecxt_scantuple;
#define FIELDNO_EXPRCONTEXT_INNERTUPLE 2
	TupleTableSlot *ecxt_innertuple;
#define FIELDNO_EXPRCONTEXT_OUTERTUPLE 3
	TupleTableSlot *ecxt_outertuple;

	/* Memory contexts for expression evaluation --- see notes above */
	MemoryContext ecxt_per_query_memory;
	MemoryContext ecxt_per_tuple_memory;

	/* Values to substitute for Param nodes in expression */
	ParamExecData *ecxt_param_exec_vals;	/* for PARAM_EXEC params */
	ParamListInfo ecxt_param_list_info; /* for other param types */

	/*
	 * Values to substitute for Aggref nodes in the expressions of an Agg
	 * node, or for WindowFunc nodes within a WindowAgg node.
	 */
#define FIELDNO_EXPRCONTEXT_AGGVALUES 8
	Datum	   *ecxt_aggvalues; /* precomputed values for aggs/windowfuncs */
#define FIELDNO_EXPRCONTEXT_AGGNULLS 9
	bool	   *ecxt_aggnulls;	/* null flags for aggs/windowfuncs */

	/* Value to substitute for CaseTestExpr nodes in expression */
#define FIELDNO_EXPRCONTEXT_CASEDATUM 10
	Datum		caseValue_datum;
#define FIELDNO_EXPRCONTEXT_CASENULL 11
	bool		caseValue_isNull;

	/* Value to substitute for CoerceToDomainValue nodes in expression */
#define FIELDNO_EXPRCONTEXT_DOMAINDATUM 12
	Datum		domainValue_datum;
#define FIELDNO_EXPRCONTEXT_DOMAINNULL 13
	bool		domainValue_isNull;

	/* Link to containing EState (NULL if a standalone ExprContext) */
	struct EState *ecxt_estate;

	/* Functions to call back when ExprContext is shut down or rescanned */
	ExprContext_CB *ecxt_callbacks;
} ExprContext;

/*
 * Set-result status used when evaluating functions potentially returning a
 * set.
 */
typedef enum
{
	ExprSingleResult,			/* expression does not return a set */
	ExprMultipleResult,			/* this result is an element of a set */
	ExprEndResult				/* there are no more elements in the set */
} ExprDoneCond;

/*
 * Return modes for functions returning sets.  Note values must be chosen
 * as separate bits so that a bitmask can be formed to indicate supported
 * modes.  SFRM_Materialize_Random and SFRM_Materialize_Preferred are
 * auxiliary flags about SFRM_Materialize mode, rather than separate modes.
 */
typedef enum
{
	SFRM_ValuePerCall = 0x01,	/* one value returned per call */
	SFRM_Materialize = 0x02,	/* result set instantiated in Tuplestore */
	SFRM_Materialize_Random = 0x04, /* Tuplestore needs randomAccess */
	SFRM_Materialize_Preferred = 0x08	/* caller prefers Tuplestore */
} SetFunctionReturnMode;

/*
 * When calling a function that might return a set (multiple rows),
 * a node of this type is passed as fcinfo->resultinfo to allow
 * return status to be passed back.  A function returning set should
 * raise an error if no such resultinfo is provided.
 */
typedef struct ReturnSetInfo
{
	NodeTag		type;
	/* values set by caller: */
	ExprContext *econtext;		/* context function is being called in */
	TupleDesc	expectedDesc;	/* tuple descriptor expected by caller */
	int			allowedModes;	/* bitmask: return modes caller can handle */
	/* result status from function (but pre-initialized by caller): */
	SetFunctionReturnMode returnMode;	/* actual return mode */
	ExprDoneCond isDone;		/* status for ValuePerCall mode */
	/* fields filled by function in Materialize return mode: */
	Tuplestorestate *setResult; /* holds the complete returned tuple set */
	TupleDesc	setDesc;		/* actual descriptor for returned tuples */
} ReturnSetInfo;

/* ----------------
 *		ProjectionInfo node information
 *
 *		This is all the information needed to perform projections ---
 *		that is, form new tuples by evaluation of targetlist expressions.
 *		Nodes which need to do projections create one of these.
 *
 *		The target tuple slot is kept in ProjectionInfo->pi_state.resultslot.
 *		ExecProject() evaluates the tlist, forms a tuple, and stores it
 *		in the given slot.  Note that the result will be a "virtual" tuple
 *		unless ExecMaterializeSlot() is then called to force it to be
 *		converted to a physical tuple.  The slot must have a tupledesc
 *		that matches the output of the tlist!
 * ----------------
 */
typedef struct ProjectionInfo
{
	NodeTag		type;
	/* instructions to evaluate projection */
	ExprState	pi_state;
	/* expression context in which to evaluate expression */
	ExprContext *pi_exprContext;
} ProjectionInfo;

/* ----------------
 *	  JunkFilter
 *
 *	  This class is used to store information regarding junk attributes.
 *	  A junk attribute is an attribute in a tuple that is needed only for
 *	  storing intermediate information in the executor, and does not belong
 *	  in emitted tuples.  For example, when we do an UPDATE query,
 *	  the planner adds a "junk" entry to the targetlist so that the tuples
 *	  returned to ExecutePlan() contain an extra attribute: the ctid of
 *	  the tuple to be updated.  This is needed to do the update, but we
 *	  don't want the ctid to be part of the stored new tuple!  So, we
 *	  apply a "junk filter" to remove the junk attributes and form the
 *	  real output tuple.  The junkfilter code also provides routines to
 *	  extract the values of the junk attribute(s) from the input tuple.
 *
 *	  targetList:		the original target list (including junk attributes).
 *	  cleanTupType:		the tuple descriptor for the "clean" tuple (with
 *						junk attributes removed).
 *	  cleanMap:			A map with the correspondence between the non-junk
 *						attribute numbers of the "original" tuple and the
 *						attribute numbers of the "clean" tuple.
 *	  resultSlot:		tuple slot used to hold cleaned tuple.
 *	  junkAttNo:		not used by junkfilter code.  Can be used by caller
 *						to remember the attno of a specific junk attribute
 *						(nodeModifyTable.c keeps the "ctid" or "wholerow"
 *						attno here).
 * ----------------
 */
typedef struct JunkFilter
{
	NodeTag		type;
	List	   *jf_targetList;
	TupleDesc	jf_cleanTupType;
	AttrNumber *jf_cleanMap;
	TupleTableSlot *jf_resultSlot;
	AttrNumber	jf_junkAttNo;
} JunkFilter;

/*
 * OnConflictSetState
 *
 * Executor state of an ON CONFLICT DO UPDATE operation.
 */
typedef struct OnConflictSetState
{
	NodeTag		type;

	TupleTableSlot *oc_Existing;	/* slot to store existing target tuple in */
	TupleTableSlot *oc_ProjSlot;	/* CONFLICT ... SET ... projection target */
	ProjectionInfo *oc_ProjInfo;	/* for ON CONFLICT DO UPDATE SET */
	ExprState  *oc_WhereClause; /* state for the WHERE clause */
} OnConflictSetState;

/*
 * ResultRelInfo
 *
 * Whenever we update an existing relation, we have to update indexes on the
 * relation, and perhaps also fire triggers.  ResultRelInfo holds all the
 * information needed about a result relation, including indexes.
 *
 * Normally, a ResultRelInfo refers to a table that is in the query's
 * range table; then ri_RangeTableIndex is the RT index and ri_RelationDesc
 * is just a copy of the relevant es_relations[] entry.  But sometimes,
 * in ResultRelInfos used only for triggers, ri_RangeTableIndex is zero
 * and ri_RelationDesc is a separately-opened relcache pointer that needs
 * to be separately closed.  See ExecGetTriggerResultRel.
 */
typedef struct ResultRelInfo
{
	NodeTag		type;

	/* result relation's range table index, or 0 if not in range table */
	Index		ri_RangeTableIndex;

	/* relation descriptor for result relation */
	Relation	ri_RelationDesc;

	/* # of indices existing on result relation */
	int			ri_NumIndices;

	/* array of relation descriptors for indices */
	RelationPtr ri_IndexRelationDescs;

	/* array of key/attr info for indices */
	IndexInfo **ri_IndexRelationInfo;

	/* triggers to be fired, if any */
	TriggerDesc *ri_TrigDesc;

	/* cached lookup info for trigger functions */
	FmgrInfo   *ri_TrigFunctions;

	/* array of trigger WHEN expr states */
	ExprState **ri_TrigWhenExprs;

	/* optional runtime measurements for triggers */
	Instrumentation *ri_TrigInstrument;

	/* On-demand created slots for triggers / returning processing */
	TupleTableSlot *ri_ReturningSlot;	/* for trigger output tuples */
	TupleTableSlot *ri_TrigOldSlot; /* for a trigger's old tuple */
	TupleTableSlot *ri_TrigNewSlot; /* for a trigger's new tuple */

	/* FDW callback functions, if foreign table */
	struct FdwRoutine *ri_FdwRoutine;

	/* available to save private state of FDW */
	void	   *ri_FdwState;

	/* true when modifying foreign table directly */
	bool		ri_usesFdwDirectModify;

	/* list of WithCheckOption's to be checked */
	List	   *ri_WithCheckOptions;

	/* list of WithCheckOption expr states */
	List	   *ri_WithCheckOptionExprs;

	/* array of constraint-checking expr states */
	ExprState **ri_ConstraintExprs;

	/* array of stored generated columns expr states */
	ExprState **ri_GeneratedExprs;

	/* for removing junk attributes from tuples */
	JunkFilter *ri_junkFilter;

	/*
	 * Extra GPDB junk columns. ri_segid_attno is used with DELETE, to indicate
	 * the segment the target tuple came from. 'action' is used with
	 * Split Updates.
	 *
	 * The target tuple's ctid is in ri_junkFilter->jf_junkAttNo, like in upstream.
	 */
	AttrNumber  ri_segid_attno;		/* gp_segment_id of old tuple */
	AttrNumber	ri_action_attno;	/* is this an INSERT or DELETE ? */

	/* list of RETURNING expressions */
	List	   *ri_returningList;

	/* for computing a RETURNING list */
	ProjectionInfo *ri_projectReturning;

	/* list of arbiter indexes to use to check conflicts */
	List	   *ri_onConflictArbiterIndexes;

	/* ON CONFLICT evaluation state */
	OnConflictSetState *ri_onConflict;

	/* partition check expression */
	List	   *ri_PartitionCheck;

	/* partition check expression state */
	ExprState  *ri_PartitionCheckExpr;

	/* relation descriptor for root partitioned table */
	Relation	ri_PartitionRoot;

	/* Additional information specific to partition tuple routing */
	struct PartitionRoutingInfo *ri_PartitionInfo;

	/* For use by copy.c when performing multi-inserts */
	struct CopyMultiInsertBuffer *ri_CopyMultiInsertBuffer;
} ResultRelInfo;

/* ----------------
 *	  EState information
 *
 * Master working state for an Executor invocation
 * ----------------
 */
typedef struct EState
{
	NodeTag		type;

	/* Basic state for all query types: */
	ScanDirection es_direction; /* current scan direction */
	Snapshot	es_snapshot;	/* time qual to use */
	Snapshot	es_crosscheck_snapshot; /* crosscheck time qual for RI */
	List	   *es_range_table; /* List of RangeTblEntry */
	struct RangeTblEntry **es_range_table_array;	/* equivalent array */
	Index		es_range_table_size;	/* size of the range table arrays */
	Relation   *es_relations;	/* Array of per-range-table-entry Relation
								 * pointers, or NULL if not yet opened */
	struct ExecRowMark **es_rowmarks;	/* Array of per-range-table-entry
										 * ExecRowMarks, or NULL if none */
	PlannedStmt *es_plannedstmt;	/* link to top of plan tree */
	const char *es_sourceText;	/* Source text from QueryDesc */

	JunkFilter *es_junkFilter;	/* top-level junk filter, if any */

	/* If query can insert/delete tuples, the command ID to mark them with */
	CommandId	es_output_cid;

	/* Info about target table(s) for insert/update/delete queries: */
	ResultRelInfo *es_result_relations; /* array of ResultRelInfos */
	int			es_num_result_relations;	/* length of array */
	ResultRelInfo *es_result_relation_info; /* currently active array elt */

	/*
	 * Info about the partition root table(s) for insert/update/delete queries
	 * targeting partitioned tables.  Only leaf partitions are mentioned in
	 * es_result_relations, but we need access to the roots for firing
	 * triggers and for runtime tuple routing.
	 */
	ResultRelInfo *es_root_result_relations;	/* array of ResultRelInfos */
	int			es_num_root_result_relations;	/* length of the array */
	PartitionDirectory es_partition_directory;	/* for PartitionDesc lookup */

	/*
	 * The following list contains ResultRelInfos created by the tuple routing
	 * code for partitions that don't already have one.
	 */
	List	   *es_tuple_routing_result_relations;

	/* Stuff used for firing triggers: */
	List	   *es_trig_target_relations;	/* trigger-only ResultRelInfos */

	TupleTableSlot *es_trig_tuple_slot; /* for trigger output tuples */
	TupleTableSlot *es_trig_oldtup_slot;		/* for TriggerEnabled */
	TupleTableSlot *es_trig_newtup_slot;		/* for TriggerEnabled */

	/* Parameter info: */
	ParamListInfo es_param_list_info;	/* values of external params */
	ParamExecData *es_param_exec_vals;	/* values of internal params */

	QueryEnvironment *es_queryEnv;	/* query environment */

	/* Other working state: */
	MemoryContext es_query_cxt; /* per-query context in which EState lives */

	List	   *es_tupleTable;	/* List of TupleTableSlots */

	uint64		es_processed;	/* # of tuples processed */

	int			es_top_eflags;	/* eflags passed to ExecutorStart */
	int			es_instrument;	/* OR of InstrumentOption flags */
	bool		es_finished;	/* true when ExecutorFinish is done */

	List	   *es_exprcontexts;	/* List of ExprContexts within EState */

	List	   *es_subplanstates;	/* List of PlanState for SubPlans */

	List	   *es_auxmodifytables; /* List of secondary ModifyTableStates */

	/*
	 * this ExprContext is for per-output-tuple operations, such as constraint
	 * checks and index-value computations.  It will be reset for each output
	 * tuple.  Note that it will be created only if needed.
	 */
	ExprContext *es_per_tuple_exprcontext;

	/*
	 * These fields are for re-evaluating plan quals when an updated tuple is
	 * substituted in READ COMMITTED mode.  es_epqTupleSlot[] contains test
	 * tuples that scan plan nodes should return instead of whatever they'd
	 * normally return, or an empty slot if there is nothing to return; if
	 * es_epqTupleSlot[] is not NULL if a particular array entry is valid; and
	 * es_epqScanDone[] is state to remember if the tuple has been returned
	 * already.  Arrays are of size es_range_table_size and are indexed by
	 * scan node scanrelid - 1.
	 */
	TupleTableSlot **es_epqTupleSlot;	/* array of EPQ substitute tuples */
	bool	   *es_epqScanDone; /* true if EPQ tuple has been fetched */

	bool		es_use_parallel_mode;	/* can we use parallel workers? */

	/* The per-query shared memory area to use for parallel execution. */
	struct dsa_area *es_query_dsa;

	/*
	 * JIT information. es_jit_flags indicates whether JIT should be performed
	 * and with which options.  es_jit is created on-demand when JITing is
	 * performed.
	 *
	 * es_jit_combined_instr is the combined, on demand allocated,
	 * instrumentation from all workers. The leader's instrumentation is kept
	 * separate, and is combined on demand by ExplainPrintJITSummary().
	 */
	int			es_jit_flags;
	struct JitContext *es_jit;
	struct JitInstrumentation *es_jit_worker_instr;

	/* Additions for MPP plan slicing. */
	struct SliceTable *es_sliceTable;

	/* Current positions of cursors used in CURRENT OF expressions */
	List	   *es_cursorPositions;

	/* Data structure for node sharing */
	List	   *es_sharenode;

	int			active_recv_id;
	void	   *motionlayer_context;  /* Motion Layer state */
	struct ChunkTransportState *interconnect_context; /* Interconnect state */

	/* MPP used resources */
	bool		es_interconnect_is_setup;   /* is interconnect set-up?    */

	bool		es_got_eos;			/* was end-of-stream received? */

	bool		cancelUnfinished;	/* when we're cleaning up, we need to make sure that we know it */

	/* results from qExec processes */
	struct CdbDispatcherState *dispatcherState;

	/* CDB: EXPLAIN ANALYZE statistics */
	struct CdbExplain_ShowStatCtx  *showstatctx;

	/*
	 * The slice number for the current node that is being processed.
	 * During plan initialization, in ExecInitPlan(), it is set to the
	 * slice we're currently initializing, even if it's an "alien" node.
	 * When executing a plan (ExecProcNode()), it is always set to the
	 * local slice we're currently executing, never to an alien slice.
	 */
	int			currentSliceId;

	/* Should the executor skip past the alien plan nodes */
	bool eliminateAliens;

	/* partition oid that is being scanned, used by DynamicBitmapHeapScan/IndexScan */
	int			partitionOid;

} EState;

struct PlanState;
struct MotionState;

extern struct MotionState *getMotionState(struct PlanState *ps, int sliceIndex);
extern int LocallyExecutingSliceIndex(EState *estate);
extern int PrimaryWriterSliceIndex(EState *estate);
extern int RootSliceIndex(EState *estate);

/*
 * ExecRowMark -
 *	   runtime representation of FOR [KEY] UPDATE/SHARE clauses
 *
 * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we will have an
 * ExecRowMark for each non-target relation in the query (except inheritance
 * parent RTEs, which can be ignored at runtime).  Virtual relations such as
 * subqueries-in-FROM will have an ExecRowMark with relation == NULL.  See
 * PlanRowMark for details about most of the fields.  In addition to fields
 * directly derived from PlanRowMark, we store an activity flag (to denote
 * inactive children of inheritance trees), curCtid, which is used by the
 * WHERE CURRENT OF code, and ermExtra, which is available for use by the plan
 * node that sources the relation (e.g., for a foreign table the FDW can use
 * ermExtra to hold information).
 *
 * EState->es_rowmarks is an array of these structs, indexed by RT index,
 * with NULLs for irrelevant RT indexes.  es_rowmarks itself is NULL if
 * there are no rowmarks.
 */
typedef struct ExecRowMark
{
	Relation	relation;		/* opened and suitably locked relation */
	Oid			relid;			/* its OID (or InvalidOid, if subquery) */
	Index		rti;			/* its range table index */
	Index		prti;			/* parent range table index, if child */
	Index		rowmarkId;		/* unique identifier for resjunk columns */
	RowMarkType markType;		/* see enum in nodes/plannodes.h */
	LockClauseStrength strength;	/* LockingClause's strength, or LCS_NONE */
	LockWaitPolicy waitPolicy;	/* NOWAIT and SKIP LOCKED */
	bool		ermActive;		/* is this mark relevant for current tuple? */
	ItemPointerData curCtid;	/* ctid of currently locked tuple, if any */
	void	   *ermExtra;		/* available for use by relation source node */
} ExecRowMark;

/*
 * ExecAuxRowMark -
 *	   additional runtime representation of FOR [KEY] UPDATE/SHARE clauses
 *
 * Each LockRows and ModifyTable node keeps a list of the rowmarks it needs to
 * deal with.  In addition to a pointer to the related entry in es_rowmarks,
 * this struct carries the column number(s) of the resjunk columns associated
 * with the rowmark (see comments for PlanRowMark for more detail).  In the
 * case of ModifyTable, there has to be a separate ExecAuxRowMark list for
 * each child plan, because the resjunk columns could be at different physical
 * column positions in different subplans.
 */
typedef struct ExecAuxRowMark
{
	ExecRowMark *rowmark;		/* related entry in es_rowmarks */
	AttrNumber	ctidAttNo;		/* resno of ctid junk attribute, if any */
	AttrNumber	toidAttNo;		/* resno of tableoid junk attribute, if any */
	AttrNumber	wholeAttNo;		/* resno of whole-row junk attribute, if any */
} ExecAuxRowMark;


/* ----------------------------------------------------------------
 *				 Tuple Hash Tables
 *
 * All-in-memory tuple hash tables are used for a number of purposes.
 *
 * Note: tab_hash_funcs are for the key datatype(s) stored in the table,
 * and tab_eq_funcs are non-cross-type equality operators for those types.
 * Normally these are the only functions used, but FindTupleHashEntry()
 * supports searching a hashtable using cross-data-type hashing.  For that,
 * the caller must supply hash functions for the LHS datatype as well as
 * the cross-type equality operators to use.  in_hash_funcs and cur_eq_func
 * are set to point to the caller's function arrays while doing such a search.
 * During LookupTupleHashEntry(), they point to tab_hash_funcs and
 * tab_eq_func respectively.
 * ----------------------------------------------------------------
 */
typedef struct TupleHashEntryData *TupleHashEntry;
typedef struct TupleHashTableData *TupleHashTable;

typedef struct TupleHashEntryData
{
	MinimalTuple firstTuple;	/* copy of first tuple in this group */
	void	   *additional;		/* user data */
	uint32		status;			/* hash status */
	uint32		hash;			/* hash value (cached) */
} TupleHashEntryData;

/* define parameters necessary to generate the tuple hash table interface */
#define SH_PREFIX tuplehash
#define SH_ELEMENT_TYPE TupleHashEntryData
#define SH_KEY_TYPE MinimalTuple
#define SH_SCOPE extern
#define SH_DECLARE
#include "lib/simplehash.h"

typedef struct TupleHashTableData
{
	tuplehash_hash *hashtab;	/* underlying hash table */
	int			numCols;		/* number of columns in lookup key */
	AttrNumber *keyColIdx;		/* attr numbers of key columns */
	FmgrInfo   *tab_hash_funcs; /* hash functions for table datatype(s) */
	ExprState  *tab_eq_func;	/* comparator for table datatype(s) */
	Oid		   *tab_collations; /* collations for hash and comparison */
	MemoryContext tablecxt;		/* memory context containing table */
	MemoryContext tempcxt;		/* context for function evaluations */
	Size		entrysize;		/* actual size to make each hash entry */
	TupleTableSlot *tableslot;	/* slot for referencing table entries */
	/* The following fields are set transiently for each table search: */
	TupleTableSlot *inputslot;	/* current input tuple's slot */
	FmgrInfo   *in_hash_funcs;	/* hash functions for input datatype(s) */
	ExprState  *cur_eq_func;	/* comparator for input vs. table */
	uint32		hash_iv;		/* hash-function IV */
	ExprContext *exprcontext;	/* expression context */
}			TupleHashTableData;

typedef tuplehash_iterator TupleHashIterator;

/*
 * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan.
 * Use ResetTupleHashIterator if the table can be frozen (in this case no
 * explicit scan termination is needed).
 */
#define InitTupleHashIterator(htable, iter) \
	tuplehash_start_iterate(htable->hashtab, iter)
#define TermTupleHashIterator(iter) \
	((void) 0)
#define ResetTupleHashIterator(htable, iter) \
	InitTupleHashIterator(htable, iter)
#define ScanTupleHashTable(htable, iter) \
	tuplehash_iterate(htable->hashtab, iter)

/* Abstraction of different memory management calls */
typedef struct MemoryManagerContainer
{
	void *manager; /* memory manager instance */
	void *(*alloc)(void *manager, Size len);
	void (*free)(void *manager, void *pointer);
	/*
	 * If existing space is too small, the realloced space is how many
	 * times of the existing one.
	 */
	int realloc_ratio;
} MemoryManagerContainer;

/* ----------------------------------------------------------------
 *				 Expression State Nodes
 *
 * Formerly, there was a separate executor expression state node corresponding
 * to each node in a planned expression tree.  That's no longer the case; for
 * common expression node types, all the execution info is embedded into
 * step(s) in a single ExprState node.  But we still have a few executor state
 * node types for selected expression node types, mostly those in which info
 * has to be shared with other parts of the execution state tree.
 * ----------------------------------------------------------------
 */

/* ----------------
 *		AggrefExprState node
 * ----------------
 */
typedef struct AggrefExprState
{
	NodeTag		type;
	Aggref	   *aggref;			/* expression plan node */
	int			aggno;			/* ID number for agg within its plan node */
} AggrefExprState;

/* ----------------
 *		WindowFuncExprState node
 * ----------------
 */
typedef struct WindowFuncExprState
{
	NodeTag		type;
	WindowFunc *wfunc;			/* expression plan node */
	List	   *args;			/* ExprStates for argument expressions */
	ExprState  *aggfilter;		/* FILTER expression */
	int			wfuncno;		/* ID number for wfunc within its plan node */
} WindowFuncExprState;


/* ----------------
 *		SetExprState node
 *
 * State for evaluating a potentially set-returning expression (like FuncExpr
 * or OpExpr).  In some cases, like some of the expressions in ROWS FROM(...)
 * the expression might not be a SRF, but nonetheless it uses the same
 * machinery as SRFs; it will be treated as a SRF returning a single row.
 * ----------------
 */
typedef struct SetExprState
{
	NodeTag		type;
	Expr	   *expr;			/* expression plan node */
	List	   *args;			/* ExprStates for argument expressions */

	/*
	 * In ROWS FROM, functions can be inlined, removing the FuncExpr normally
	 * inside.  In such a case this is the compiled expression (which cannot
	 * return a set), which'll be evaluated using regular ExecEvalExpr().
	 */
	ExprState  *elidedFuncState;

	/*
	 * Function manager's lookup info for the target function.  If func.fn_oid
	 * is InvalidOid, we haven't initialized it yet (nor any of the following
	 * fields, except funcReturnsSet).
	 */
	FmgrInfo	func;

	/*
	 * For a set-returning function (SRF) that returns a tuplestore, we keep
	 * the tuplestore here and dole out the result rows one at a time. The
	 * slot holds the row currently being returned.
	 */
	Tuplestorestate *funcResultStore;
	TupleTableSlot *funcResultSlot;

	/*
	 * In some cases we need to compute a tuple descriptor for the function's
	 * output.  If so, it's stored here.
	 */
	TupleDesc	funcResultDesc;
	bool		funcReturnsTuple;	/* valid when funcResultDesc isn't NULL */

	/*
	 * Remember whether the function is declared to return a set.  This is set
	 * by ExecInitExpr, and is valid even before the FmgrInfo is set up.
	 */
	bool		funcReturnsSet;

	/*
	 * setArgsValid is true when we are evaluating a set-returning function
	 * that uses value-per-call mode and we are in the middle of a call
	 * series; we want to pass the same argument values to the function again
	 * (and again, until it returns ExprEndResult).  This indicates that
	 * fcinfo_data already contains valid argument data.
	 */
	bool		setArgsValid;

	/*
	 * Flag to remember whether we have registered a shutdown callback for
	 * this SetExprState.  We do so only if funcResultStore or setArgsValid
	 * has been set at least once (since all the callback is for is to release
	 * the tuplestore or clear setArgsValid).
	 */
	bool		shutdown_reg;	/* a shutdown callback is registered */

	/*
	 * Call parameter structure for the function.  This has been initialized
	 * (by InitFunctionCallInfoData) if func.fn_oid is valid.  It also saves
	 * argument values between calls, when setArgsValid is true.
	 */
	FunctionCallInfo fcinfo;
} SetExprState;

/* ----------------
 *		SubPlanState node
 * ----------------
 */
typedef struct SubPlanState
{
	NodeTag		type;
	SubPlan    *subplan;		/* expression plan node */
	struct PlanState *planstate;	/* subselect plan's state tree */
	struct PlanState *parent;	/* parent plan node's state tree */
	ExprState  *testexpr;		/* state of combining expression */
	List	   *args;			/* states of argument expression(s) */
	HeapTuple	curTuple;		/* copy of most recent tuple from subplan */
	Datum		curArray;		/* most recent array from ARRAY() subplan */
	/* these are used when hashing the subselect's output: */
	TupleDesc	descRight;		/* subselect desc after projection */
	ProjectionInfo *projLeft;	/* for projecting lefthand exprs */
	ProjectionInfo *projRight;	/* for projecting subselect output */
	TupleHashTable hashtable;	/* hash table for no-nulls subselect rows */
	TupleHashTable hashnulls;	/* hash table for rows with null(s) */
	bool		havehashrows;	/* true if hashtable is not empty */
	bool		havenullrows;	/* true if hashnulls is not empty */
	MemoryContext hashtablecxt; /* memory context containing hash tables */
	MemoryContext hashtempcxt;	/* temp memory context for hash tables */
	ExprContext *innerecontext; /* econtext for computing inner tuples */
	AttrNumber *keyColIdx;		/* control data for hash tables */
	Oid		   *tab_eq_funcoids;	/* equality func oids for table
									 * datatype(s) */
	Oid		   *tab_collations; /* collations for hash and comparison */
	FmgrInfo   *tab_hash_funcs; /* hash functions for table datatype(s) */
	FmgrInfo   *tab_eq_funcs;	/* equality functions for table datatype(s) */
	FmgrInfo   *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */
	FmgrInfo   *cur_eq_funcs;	/* equality functions for LHS vs. table */
	ExprState  *cur_eq_comp;	/* equality comparator for LHS vs. table */

	Tuplestorestate *ts_state;
} SubPlanState;

/* ----------------
 *		AlternativeSubPlanState node
 * ----------------
 */
typedef struct AlternativeSubPlanState
{
	NodeTag		type;
	AlternativeSubPlan *subplan;	/* expression plan node */
	List	   *subplans;		/* SubPlanStates of alternative subplans */
	int			active;			/* list index of the one we're using */
} AlternativeSubPlanState;

/*
 * DomainConstraintState - one item to check during CoerceToDomain
 *
 * Note: we consider this to be part of an ExprState tree, so we give it
 * a name following the xxxState convention.  But there's no directly
 * associated plan-tree node.
 */
typedef enum DomainConstraintType
{
	DOM_CONSTRAINT_NOTNULL,
	DOM_CONSTRAINT_CHECK
} DomainConstraintType;

typedef struct DomainConstraintState
{
	NodeTag		type;
	DomainConstraintType constrainttype;	/* constraint type */
	char	   *name;			/* name of constraint (for error msgs) */
	Expr	   *check_expr;		/* for CHECK, a boolean expression */
	ExprState  *check_exprstate;	/* check_expr's eval state, or NULL */
} DomainConstraintState;


/* ----------------------------------------------------------------
 *				 Executor State Trees
 *
 * An executing query has a PlanState tree paralleling the Plan tree
 * that describes the plan.
 * ----------------------------------------------------------------
 */

/* ----------------
 *	 ExecProcNodeMtd
 *
 * This is the method called by ExecProcNode to return the next tuple
 * from an executor node.  It returns NULL, or an empty TupleTableSlot,
 * if no more tuples are available.
 * ----------------
 */
typedef TupleTableSlot *(*ExecProcNodeMtd) (struct PlanState *pstate);

/* ----------------
 *		PlanState node
 *
 * We never actually instantiate any PlanState nodes; this is just the common
 * abstract superclass for all PlanState-type nodes.
 * ----------------
 */
typedef struct PlanState
{
	NodeTag		type;

	Plan	   *plan;			/* associated Plan node */

	EState	   *state;			/* at execution time, states of individual
								 * nodes point to one EState for the whole
								 * top-level plan */

	ExecProcNodeMtd ExecProcNode;	/* function to return next tuple */
	ExecProcNodeMtd ExecProcNodeReal;	/* actual function, if above is a
										 * wrapper */

	Instrumentation *instrument;	/* Optional runtime stats for this node */
	WorkerInstrumentation *worker_instrument;	/* per-worker instrumentation */
	struct StringInfoData  *cdbexplainbuf;  /* EXPLAIN ANALYZE report buf */
	void      (*cdbexplainfun)(struct PlanState *planstate, struct StringInfoData *buf);
	/* callback before ExecutorEnd */

	/* Per-worker JIT instrumentation */
	struct SharedJitInstrumentation *worker_jit_instrument;

	/*
	 * Common structural data for all Plan types.  These links to subsidiary
	 * state trees parallel links in the associated plan tree (except for the
	 * subPlan list, which does not exist in the plan tree).
	 */
	ExprState  *qual;			/* boolean qual condition */
	struct PlanState *lefttree; /* input plan tree(s) */
	struct PlanState *righttree;

	List	   *initPlan;		/* Init SubPlanState nodes (un-correlated expr
								 * subselects) */
	List	   *subPlan;		/* SubPlanState nodes in my expressions */

	/*
	 * State for management of parameter-change-driven rescanning
	 */
	Bitmapset  *chgParam;		/* set of IDs of changed Params */

	/*
	 * Other run-time state needed by most if not all node types.
	 */
	TupleDesc	ps_ResultTupleDesc; /* node's return type */
	TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */
	ExprContext *ps_ExprContext;	/* node's expression-evaluation context */
	ProjectionInfo *ps_ProjInfo;	/* info for doing tuple projection */

	/*
	 * Scanslot's descriptor if known. This is a bit of a hack, but otherwise
	 * it's hard for expression compilation to optimize based on the
	 * descriptor, without encoding knowledge about all executor nodes.
	 */
	TupleDesc	scandesc;

	/*
	 * Define the slot types for inner, outer and scanslots for expression
	 * contexts with this state as a parent.  If *opsset is set, then
	 * *opsfixed indicates whether *ops is guaranteed to be the type of slot
	 * used. That means that every slot in the corresponding
	 * ExprContext.ecxt_*tuple will point to a slot of that type, while
	 * evaluating the expression.  If *opsfixed is false, but *ops is set,
	 * that indicates the most likely type of slot.
	 *
	 * The scan* fields are set by ExecInitScanTupleSlot(). If that's not
	 * called, nodes can initialize the fields themselves.
	 *
	 * If outer/inneropsset is false, the information is inferred on-demand
	 * using ExecGetResultSlotOps() on ->righttree/lefttree, using the
	 * corresponding node's resultops* fields.
	 *
	 * The result* fields are automatically set when ExecInitResultSlot is
	 * used (be it directly or when the slot is created by
	 * ExecAssignScanProjectionInfo() /
	 * ExecConditionalAssignProjectionInfo()).  If no projection is necessary
	 * ExecConditionalAssignProjectionInfo() defaults those fields to the scan
	 * operations.
	 */
	const TupleTableSlotOps *scanops;
	const TupleTableSlotOps *outerops;
	const TupleTableSlotOps *innerops;
	const TupleTableSlotOps *resultops;
	bool		scanopsfixed;
	bool		outeropsfixed;
	bool		inneropsfixed;
	bool		resultopsfixed;
	bool		scanopsset;
	bool		outeropsset;
	bool		inneropsset;
	bool		resultopsset;

	MemoryContext node_context;

	bool		fHadSentNodeStart;

	bool		squelched;		/* has ExecSquelchNode() been called already? */
} PlanState;

extern uint64 PlanStateOperatorMemKB(const PlanState *ps);

/* ----------------
 *	these are defined to avoid confusion problems with "left"
 *	and "right" and "inner" and "outer".  The convention is that
 *	the "left" plan is the "outer" plan and the "right" plan is
 *	the inner plan, but these make the code more readable.
 * ----------------
 */
#define innerPlanState(node)		(((PlanState *)(node))->righttree)
#define outerPlanState(node)		(((PlanState *)(node))->lefttree)

/* Macros for inline access to certain instrumentation counters */
#define InstrCountTuples2(node, delta) \
	do { \
		if (((PlanState *)(node))->instrument) \
			((PlanState *)(node))->instrument->ntuples2 += (delta); \
	} while (0)
#define InstrCountFiltered1(node, delta) \
	do { \
		if (((PlanState *)(node))->instrument) \
			((PlanState *)(node))->instrument->nfiltered1 += (delta); \
	} while(0)
#define InstrCountFiltered2(node, delta) \
	do { \
		if (((PlanState *)(node))->instrument) \
			((PlanState *)(node))->instrument->nfiltered2 += (delta); \
	} while(0)

/*
 * EPQState is state for executing an EvalPlanQual recheck on a candidate
 * tuple in ModifyTable or LockRows.  The estate and planstate fields are
 * NULL if inactive.
 */
typedef struct EPQState
{
	EState	   *estate;			/* subsidiary EState */
	PlanState  *planstate;		/* plan state tree ready to be executed */
	TupleTableSlot *origslot;	/* original output tuple to be rechecked */
	Plan	   *plan;			/* plan tree to be executed */
	List	   *arowMarks;		/* ExecAuxRowMarks (non-locking only) */
	int			epqParam;		/* ID of Param to force scan node re-eval */
} EPQState;


/* ----------------
 *	 ResultState information
 * ----------------
 */
typedef struct ResultState
{
	PlanState	ps;				/* its first field is NodeTag */
	ExprState  *resconstantqual;
	bool		rs_done;		/* are we done? */
	bool		rs_checkqual;	/* do we need to check the qual? */

	struct CdbHash *hashFilter;
} ResultState;

/* ----------------
 *	 ProjectSetState information
 *
 * Note: at least one of the "elems" will be a SetExprState; the rest are
 * regular ExprStates.
 * ----------------
 */
typedef struct ProjectSetState
{
	PlanState	ps;				/* its first field is NodeTag */
	Node	  **elems;			/* array of expression states */
	ExprDoneCond *elemdone;		/* array of per-SRF is-done states */
	int			nelems;			/* length of elemdone[] array */
	bool		pending_srf_tuples; /* still evaluating srfs in tlist? */
	MemoryContext argcontext;	/* context for SRF arguments */
} ProjectSetState;

/* ----------------
 *	 ModifyTableState information
 * ----------------
 */
typedef struct ModifyTableState
{
	PlanState	ps;				/* its first field is NodeTag */
	CmdType		operation;		/* INSERT, UPDATE, or DELETE */
	bool		canSetTag;		/* do we set the command tag/es_processed? */
	bool		mt_done;		/* are we done? */
	PlanState **mt_plans;		/* subplans (one per target rel) */
	int			mt_nplans;		/* number of plans in the array */
	int			mt_whichplan;	/* which one is being executed (0..n-1) */
	TupleTableSlot **mt_scans;	/* input tuple corresponding to underlying
								 * plans */
	ResultRelInfo *resultRelInfo;	/* per-subplan target relations */
	ResultRelInfo *rootResultRelInfo;	/* root target relation (partitioned
										 * table root) */
	List	  **mt_arowmarks;	/* per-subplan ExecAuxRowMark lists */
	EPQState	mt_epqstate;	/* for evaluating EvalPlanQual rechecks */
	bool		fireBSTriggers; /* do we need to fire stmt triggers? */
	bool	   *mt_isSplitUpdates; /* per-subplan flag to indicate if it's a split update */
	List	   *mt_excludedtlist;	/* the excluded pseudo relation's tlist  */

	/*
	 * Slot for storing tuples in the root partitioned table's rowtype during
	 * an UPDATE of a partitioned table.
	 */
	TupleTableSlot *mt_root_tuple_slot;

	/* Tuple-routing support info */
	struct PartitionTupleRouting *mt_partition_tuple_routing;

	/* controls transition table population for specified operation */
	struct TransitionCaptureState *mt_transition_capture;

	/* controls transition table population for INSERT...ON CONFLICT UPDATE */
	struct TransitionCaptureState *mt_oc_transition_capture;

	/* Per plan map for tuple conversion from child to root */
	TupleConversionMap **mt_per_subplan_tupconv_maps;
} ModifyTableState;

/* ----------------
 *	 AppendState information
 *
 *		nplans				how many plans are in the array
 *		whichplan			which plan is being executed (0 .. n-1), or a
 *							special negative value. See nodeAppend.c.
 *		prune_state			details required to allow partitions to be
 *							eliminated from the scan, or NULL if not possible.
 *		valid_subplans		for runtime pruning, valid appendplans indexes to
 *							scan.
 * ----------------
 */

struct AppendState;
typedef struct AppendState AppendState;
struct ParallelAppendState;
typedef struct ParallelAppendState ParallelAppendState;
struct PartitionPruneState;

struct AppendState
{
	PlanState	ps;				/* its first field is NodeTag */
	PlanState **appendplans;	/* array of PlanStates for my inputs */
	int			as_nplans;
	int			as_whichplan;
	int			as_first_partial_plan;	/* Index of 'appendplans' containing
										 * the first partial plan */
	ParallelAppendState *as_pstate; /* parallel coordination info */
	Size		pstate_len;		/* size of parallel coordination info */
	struct PartitionPruneState *as_prune_state;
	Bitmapset  *as_valid_subplans;
	bool		(*choose_next_subplan) (AppendState *);
};

/*
 * SequenceState
 */
typedef struct SequenceState
{
	PlanState	ps;
	PlanState **subplans;
	int			numSubplans;

	/*
	 * True if no subplan has been executed.
	 */
	bool		initState;
} SequenceState;

/* ----------------
 *	 MergeAppendState information
 *
 *		nplans			how many plans are in the array
 *		nkeys			number of sort key columns
 *		sortkeys		sort keys in SortSupport representation
 *		slots			current output tuple of each subplan
 *		heap			heap of active tuples
 *		initialized		true if we have fetched first tuple from each subplan
 *		noopscan		true if partition pruning proved that none of the
 *						mergeplans can contain a record to satisfy this query.
 *		prune_state		details required to allow partitions to be
 *						eliminated from the scan, or NULL if not possible.
 *		valid_subplans	for runtime pruning, valid mergeplans indexes to
 *						scan.
 * ----------------
 */
typedef struct MergeAppendState
{
	PlanState	ps;				/* its first field is NodeTag */
	PlanState **mergeplans;		/* array of PlanStates for my inputs */
	int			ms_nplans;
	int			ms_nkeys;
	SortSupport ms_sortkeys;	/* array of length ms_nkeys */
	TupleTableSlot **ms_slots;	/* array of length ms_nplans */
	struct binaryheap *ms_heap; /* binary heap of slot indices */
	bool		ms_initialized; /* are subplans started? */
	bool		ms_noopscan;
	struct PartitionPruneState *ms_prune_state;
	Bitmapset  *ms_valid_subplans;
} MergeAppendState;

/* ----------------
 *	 RecursiveUnionState information
 *
 *		RecursiveUnionState is used for performing a recursive union.
 *
 *		recursing			T when we're done scanning the non-recursive term
 *		intermediate_empty	T if intermediate_table is currently empty
 *		working_table		working table (to be scanned by recursive term)
 *		intermediate_table	current recursive output (next generation of WT)
 * ----------------
 */
typedef struct RecursiveUnionState
{
	PlanState	ps;				/* its first field is NodeTag */
	bool		recursing;
	bool		intermediate_empty;
	Tuplestorestate *working_table;
	Tuplestorestate *intermediate_table;
	/* Remaining fields are unused in UNION ALL case */
	Oid		   *eqfuncoids;		/* per-grouping-field equality fns */
	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
	MemoryContext tempContext;	/* short-term context for comparisons */
	TupleHashTable hashtable;	/* hash table for tuples already seen */
	MemoryContext tableContext; /* memory context containing hash table */
} RecursiveUnionState;

/* ----------------
 *	 BitmapAndState information
 * ----------------
 */
typedef struct BitmapAndState
{
	PlanState	ps;				/* its first field is NodeTag */
	PlanState **bitmapplans;	/* array of PlanStates for my inputs */
	int			nplans;			/* number of input plans */
	Node	   *bitmap;			/* output stream bitmap */
} BitmapAndState;

/* ----------------
 *	 BitmapOrState information
 * ----------------
 */
typedef struct BitmapOrState
{
	PlanState	ps;				/* its first field is NodeTag */
	PlanState **bitmapplans;	/* array of PlanStates for my inputs */
	int			nplans;			/* number of input plans */
	Node	   *bitmap;			/* output bitmap */
} BitmapOrState;

/* ----------------------------------------------------------------
 *				 Scan State Information
 * ----------------------------------------------------------------
 */

/* What stage the scan node is currently
 *
 * 	SCAN_INIT: we are initializing the scan state
 * 	SCAN_SCAN: all initializations for reading tuples are done
 * 		and we are either reading tuples, or ready to read tuples
 * 	SCAN_DONE: we are done with all relations/partitions, but
 * 		the scan state is still valid for a ReScan (i.e., we
 * 		haven't destroyed our scan state yet)
 * 	SCAN_END: we are completely done. We cannot ReScan, without
 * 		redoing the whole initialization phase again.
 */
typedef enum
{
	SCAN_INIT,
	SCAN_SCAN,
	SCAN_DONE,
	SCAN_END
} ScanStatus;

/* ----------------
 *	 ScanState information
 *
 *		ScanState extends PlanState for node types that represent
 *		scans of an underlying relation.  It can also be used for nodes
 *		that scan the output of an underlying plan node --- in that case,
 *		only ScanTupleSlot is actually useful, and it refers to the tuple
 *		retrieved from the subplan.
 *
 *		currentRelation    relation being scanned (NULL if none)
 *		currentScanDesc    current scan descriptor for scan (NULL if none)
 *		ScanTupleSlot	   pointer to slot in tuple table holding scan tuple
 * ----------------
 */
typedef struct ScanState
{
	PlanState	ps;				/* its first field is NodeTag */
	Relation	ss_currentRelation;
	struct TableScanDescData *ss_currentScanDesc;
	TupleTableSlot *ss_ScanTupleSlot;
} ScanState;

/* ----------------
 *	 SeqScanState information
 * ----------------
 */
typedef struct SeqScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	Size		pscan_len;		/* size of parallel heap scan descriptor */
} SeqScanState;

/* ----------------
 *	 SampleScanState information
 * ----------------
 */
typedef struct SampleScanState
{
	ScanState	ss;
	List	   *args;			/* expr states for TABLESAMPLE params */
	ExprState  *repeatable;		/* expr state for REPEATABLE expr */
	/* use struct pointer to avoid including tsmapi.h here */
	struct TsmRoutine *tsmroutine;	/* descriptor for tablesample method */
	void	   *tsm_state;		/* tablesample method can keep state here */
	bool		use_bulkread;	/* use bulkread buffer access strategy? */
	bool		use_pagemode;	/* use page-at-a-time visibility checking? */
	bool		begun;			/* false means need to call BeginSampleScan */
	uint32		seed;			/* random seed */
	int64		donetuples;		/* number of tuples already returned */
	bool		haveblock;		/* has a block for sampling been determined */
	bool		done;			/* exhausted all tuples? */
} SampleScanState;

/*
 * These structs store information about index quals that don't have simple
 * constant right-hand sides.  See comments for ExecIndexBuildScanKeys()
 * for discussion.
 */
typedef struct
{
	struct ScanKeyData *scan_key;	/* scankey to put value into */
	ExprState  *key_expr;		/* expr to evaluate to get value */
	bool		key_toastable;	/* is expr's result a toastable datatype? */
} IndexRuntimeKeyInfo;

typedef struct
{
	struct ScanKeyData *scan_key;	/* scankey to put value into */
	ExprState  *array_expr;		/* expr to evaluate to get array value */
	int			next_elem;		/* next array element to use */
	int			num_elems;		/* number of elems in current array value */
	Datum	   *elem_values;	/* array of num_elems Datums */
	bool	   *elem_nulls;		/* array of num_elems is-null flags */
} IndexArrayKeyInfo;

/* ----------------
 *	 IndexScanState information
 *
 *		indexqualorig	   execution state for indexqualorig expressions
 *		indexorderbyorig   execution state for indexorderbyorig expressions
 *		ScanKeys		   Skey structures for index quals
 *		NumScanKeys		   number of ScanKeys
 *		OrderByKeys		   Skey structures for index ordering operators
 *		NumOrderByKeys	   number of OrderByKeys
 *		RuntimeKeys		   info about Skeys that must be evaluated at runtime
 *		NumRuntimeKeys	   number of RuntimeKeys
 *		RuntimeKeysReady   true if runtime Skeys have been computed
 *		RuntimeContext	   expr context for evaling runtime Skeys
 *		RelationDesc	   index relation descriptor
 *		ScanDesc		   index scan descriptor
 *
 *		ReorderQueue	   tuples that need reordering due to re-check
 *		ReachedEnd		   have we fetched all tuples from index already?
 *		OrderByValues	   values of ORDER BY exprs of last fetched tuple
 *		OrderByNulls	   null flags for OrderByValues
 *		SortSupport		   for reordering ORDER BY exprs
 *		OrderByTypByVals   is the datatype of order by expression pass-by-value?
 *		OrderByTypLens	   typlens of the datatypes of order by expressions
 *		PscanLen		   size of parallel index scan descriptor
 * ----------------
 */
typedef struct IndexScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	ExprState  *indexqualorig;
	List	   *indexorderbyorig;
	struct ScanKeyData *iss_ScanKeys;
	int			iss_NumScanKeys;
	struct ScanKeyData *iss_OrderByKeys;
	int			iss_NumOrderByKeys;
	IndexRuntimeKeyInfo *iss_RuntimeKeys;
	int			iss_NumRuntimeKeys;
	bool		iss_RuntimeKeysReady;
	ExprContext *iss_RuntimeContext;
	Relation	iss_RelationDesc;
	struct IndexScanDescData *iss_ScanDesc;

	/* These are needed for re-checking ORDER BY expr ordering */
	pairingheap *iss_ReorderQueue;
	bool		iss_ReachedEnd;
	Datum	   *iss_OrderByValues;
	bool	   *iss_OrderByNulls;
	SortSupport iss_SortSupport;
	bool	   *iss_OrderByTypByVals;
	int16	   *iss_OrderByTypLens;
	Size		iss_PscanLen;

	/*
	 * tableOid is the oid of the partition or relation on which our current
	 * index relation is defined.
	 */
	Oid			tableOid;
} IndexScanState;

/*
 * DynamicIndexScanState
 */
typedef struct DynamicIndexScanState
{
	ScanState	ss;

	int			scan_state; /* the stage of scanning */

	int			eflags;
	IndexScanState *indexScanState;
	List	   *tuptable;
	ExprContext *outer_exprContext;

	/*
	 * This memory context will be reset per-partition to free
	 * up previous partition's memory
	 */
	MemoryContext partitionMemoryContext;

	int			nOids; /* number of oids to scan in partitioned table */
	Oid		   *partOids; /* list of oids to scan in partitioned table */
	int			whichPart; /* index of current partition in partOids */
	/* The partition oid for which the current varnos are mapped */
	Oid columnLayoutOid;

	struct PartitionPruneState *as_prune_state; /* partition dynamic pruning state */
	Bitmapset  *as_valid_subplans; /* used to determine partitions during dynamic pruning*/
	bool 		did_pruning; /* flag that is set when */
} DynamicIndexScanState;

/* ----------------
 *	 IndexOnlyScanState information
 *
 *		indexqual		   execution state for indexqual expressions
 *		ScanKeys		   Skey structures for index quals
 *		NumScanKeys		   number of ScanKeys
 *		OrderByKeys		   Skey structures for index ordering operators
 *		NumOrderByKeys	   number of OrderByKeys
 *		RuntimeKeys		   info about Skeys that must be evaluated at runtime
 *		NumRuntimeKeys	   number of RuntimeKeys
 *		RuntimeKeysReady   true if runtime Skeys have been computed
 *		RuntimeContext	   expr context for evaling runtime Skeys
 *		RelationDesc	   index relation descriptor
 *		ScanDesc		   index scan descriptor
 *		TableSlot		   slot for holding tuples fetched from the table
 *		VMBuffer		   buffer in use for visibility map testing, if any
 *		PscanLen		   size of parallel index-only scan descriptor
 * ----------------
 */
typedef struct IndexOnlyScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	ExprState  *indexqual;
	struct ScanKeyData *ioss_ScanKeys;
	int			ioss_NumScanKeys;
	struct ScanKeyData *ioss_OrderByKeys;
	int			ioss_NumOrderByKeys;
	IndexRuntimeKeyInfo *ioss_RuntimeKeys;
	int			ioss_NumRuntimeKeys;
	bool		ioss_RuntimeKeysReady;
	ExprContext *ioss_RuntimeContext;
	Relation	ioss_RelationDesc;
	struct IndexScanDescData *ioss_ScanDesc;
	TupleTableSlot *ioss_TableSlot;
	Buffer		ioss_VMBuffer;
	Size		ioss_PscanLen;
} IndexOnlyScanState;

/* ----------------
 *	 BitmapIndexScanState information
 *
 *		result			   bitmap to return output into, or NULL
 *		ScanKeys		   Skey structures for index quals
 *		NumScanKeys		   number of ScanKeys
 *		RuntimeKeys		   info about Skeys that must be evaluated at runtime
 *		NumRuntimeKeys	   number of RuntimeKeys
 *		ArrayKeys		   info about Skeys that come from ScalarArrayOpExprs
 *		NumArrayKeys	   number of ArrayKeys
 *		RuntimeKeysReady   true if runtime Skeys have been computed
 *		RuntimeContext	   expr context for evaling runtime Skeys
 *		RelationDesc	   index relation descriptor
 *		ScanDesc		   index scan descriptor
 * ----------------
 */
typedef struct BitmapIndexScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	Node       *biss_result;	/* output bitmap */
	struct ScanKeyData *biss_ScanKeys;
	int			biss_NumScanKeys;
	IndexRuntimeKeyInfo *biss_RuntimeKeys;
	int			biss_NumRuntimeKeys;
	IndexArrayKeyInfo *biss_ArrayKeys;
	int			biss_NumArrayKeys;
	bool		biss_RuntimeKeysReady;
	ExprContext *biss_RuntimeContext;
	Relation	biss_RelationDesc;
	struct IndexScanDescData *biss_ScanDesc;
} BitmapIndexScanState;

/*
 * DynamicBitmapIndexScanState
 */
typedef struct DynamicBitmapIndexScanState
{
	ScanState	ss;

	int			scan_state; /* the stage of scanning */

	int			eflags;
	BitmapIndexScanState *bitmapIndexScanState;
	ExprContext *outer_exprContext;

	/*
	 * This memory context will be reset per-partition to free
	 * up previous partition's memory
	 */
	MemoryContext partitionMemoryContext;

	/* The partition oid for which the current varnos are mapped */
	Oid columnLayoutOid;

	List	   *tuptable;
} DynamicBitmapIndexScanState;

/* ----------------
 *	 SharedBitmapState information
 *
 *		BM_INITIAL		TIDBitmap creation is not yet started, so first worker
 *						to see this state will set the state to BM_INPROGRESS
 *						and that process will be responsible for creating
 *						TIDBitmap.
 *		BM_INPROGRESS	TIDBitmap creation is in progress; workers need to
 *						sleep until it's finished.
 *		BM_FINISHED		TIDBitmap creation is done, so now all workers can
 *						proceed to iterate over TIDBitmap.
 * ----------------
 */
typedef enum
{
	BM_INITIAL,
	BM_INPROGRESS,
	BM_FINISHED
} SharedBitmapState;

/* ----------------
 *	 ParallelBitmapHeapState information
 *		tbmiterator				iterator for scanning current pages
 *		prefetch_iterator		iterator for prefetching ahead of current page
 *		mutex					mutual exclusion for the prefetching variable
 *								and state
 *		prefetch_pages			# pages prefetch iterator is ahead of current
 *		prefetch_target			current target prefetch distance
 *		state					current state of the TIDBitmap
 *		cv						conditional wait variable
 *		phs_snapshot_data		snapshot data shared to workers
 * ----------------
 */
typedef struct ParallelBitmapHeapState
{
	dsa_pointer tbmiterator;
	dsa_pointer prefetch_iterator;
	slock_t		mutex;
	int			prefetch_pages;
	int			prefetch_target;
	SharedBitmapState state;
	ConditionVariable cv;
	char		phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
} ParallelBitmapHeapState;

/* ----------------
 *	 BitmapHeapScanState information
 *
 *		bitmapqualorig	   execution state for bitmapqualorig expressions
 *		tbm				   bitmap obtained from child index scan(s)
 *		tbmiterator		   iterator for scanning current pages
 *		tbmres			   current-page data
 *		can_skip_fetch	   can we potentially skip tuple fetches in this scan?
 *		return_empty_tuples number of empty tuples to return
 *		vmbuffer		   buffer for visibility-map lookups
 *		pvmbuffer		   ditto, for prefetched pages
 *		exact_pages		   total number of exact pages retrieved
 *		lossy_pages		   total number of lossy pages retrieved
 *		prefetch_iterator  iterator for prefetching ahead of current page
 *		prefetch_pages	   # pages prefetch iterator is ahead of current
 *		prefetch_target    current target prefetch distance
 *		prefetch_maximum   maximum value for prefetch_target
 *		pscan_len		   size of the shared memory for parallel bitmap
 *		initialized		   is node is ready to iterate
 *		shared_tbmiterator	   shared iterator
 *		shared_prefetch_iterator shared iterator for prefetching
 *		pstate			   shared state for parallel bitmap scan
 * ----------------
 */
typedef struct BitmapHeapScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	ExprState  *bitmapqualorig;
	Node	   *tbm;
	GenericBMIterator *tbmiterator;
	TBMIterateResult *tbmres;
	bool		can_skip_fetch;
	int			return_empty_tuples;
	Buffer		vmbuffer;
	Buffer		pvmbuffer;
	long		exact_pages;
	long		lossy_pages;
	GenericBMIterator *prefetch_iterator;
	int			prefetch_pages;
	int			prefetch_target;
	int			prefetch_maximum;
	Size		pscan_len;
	bool		initialized;
	TBMSharedIterator *shared_tbmiterator;
	TBMSharedIterator *shared_prefetch_iterator;
	ParallelBitmapHeapState *pstate;
} BitmapHeapScanState;

typedef struct DynamicBitmapHeapScanState
{
	ScanState	ss;				/* its first field is NodeTag */

	int			scan_state; /* the stage of scanning */

	int			eflags;
	BitmapHeapScanState *bhsState;

	/*
	 * The first partition requires initialization of expression states,
	 * such as qual, regardless of whether we need to re-map varattno
	 */
	bool		firstPartition;
	/*
	 * lastRelOid is the last relation that corresponds to the
	 * varattno mapping of qual and target list. Each time we open a new partition, we will
	 * compare the last relation with current relation by using varattnos_map()
	 * and then convert the varattno to the new varattno
	 */
	Oid			lastRelOid;

	/*
	 * scanrelid is the RTE index for this scan node. It will be used to select
	 * varno whose varattno will be remapped, if necessary
	 */
	Index		scanrelid;

	/*
	 * This memory context will be reset per-partition to free
	 * up previous partition's memory
	 */
	MemoryContext partitionMemoryContext;


	int			nOids; /* number of oids to scan in partitioned table */
	Oid		   *partOids; /* list of oids to scan in partitioned table */
	int			whichPart; /* index of current partition in partOids */

	struct PartitionPruneState *as_prune_state; /* partition dynamic pruning state */
	Bitmapset  *as_valid_subplans; /* used to determine partitions during dynamic pruning*/
	bool 		did_pruning; /* flag that is set when */
} DynamicBitmapHeapScanState;

/* ----------------
 *	 TidScanState information
 *
 *		tidexprs	   list of TidExpr structs (see nodeTidscan.c)
 *		isCurrentOf    scan has a CurrentOfExpr qual
 *		NumTids		   number of tids in this scan
 *		TidPtr		   index of currently fetched tid
 *		TidList		   evaluated item pointers (array of size NumTids)
 *		htup		   currently-fetched tuple, if any
 * ----------------
 */
typedef struct TidScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	List	   *tss_tidexprs;
	bool		tss_isCurrentOf;
	int			tss_NumTids;
	int			tss_TidPtr;
	ItemPointerData *tss_TidList;
	HeapTupleData tss_htup;
} TidScanState;

/* ----------------
 *	 SubqueryScanState information
 *
 *		SubqueryScanState is used for scanning a sub-query in the range table.
 *		ScanTupleSlot references the current output tuple of the sub-query.
 * ----------------
 */
typedef struct SubqueryScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	PlanState  *subplan;
} SubqueryScanState;

/* ----------------
 *	 FunctionScanState information
 *
 *		Function nodes are used to scan the results of a
 *		function appearing in FROM (typically a function returning set).
 *
 *		eflags				node's capability flags
 *		ordinality			is this scan WITH ORDINALITY?
 *		simple				true if we have 1 function and no ordinality
 *		ordinal				current ordinal column value
 *		nfuncs				number of functions being executed
 *		funcstates			per-function execution states (private in
 *							nodeFunctionscan.c)
 *		argcontext			memory context to evaluate function arguments in
 * ----------------
 */
struct FunctionScanPerFuncState;

typedef struct FunctionScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	int			eflags;
	bool		ordinality;
	bool		simple;
	int64		ordinal;
	int			nfuncs;
	struct FunctionScanPerFuncState *funcstates;	/* array of length nfuncs */
	MemoryContext argcontext;

	bool		delayEagerFree;		/* is is safe to free memory used by this node,
									 * when this node has outputted its last row? */

	/* tuplestore info when function scan run as initplan */
	bool		resultInTupleStore; /* function result stored in tuplestore */
	struct Tuplestorestate *ts_state;	/* tuple store state */
	int			initplanId;			/* initplan is for function execute on initplan */
} FunctionScanState;

extern void function_scan_create_bufname_prefix(char *p, int size, int initplan_id);

/* ----------------
 * TableFunctionState information
 *
 *   Table Function nodes are used to scan the results of a table function
 *   operating over a table as input.
 * ----------------
 */
typedef struct TableFunctionState
{
	ScanState	ss;				/* Table Function is a Scan */
	struct AnyTableData *inputscan;		/* subquery scan data */
	TupleDesc	resultdesc;		/* Function Result descriptor */
	HeapTupleData tuple;		/* Returned tuple */

	FmgrInfo	flinfo;
	FunctionCallInfo fcinfo;	/* Function Call Context */
	ReturnSetInfo rsinfo;		/* Resultset Context */
	List	   *args;			/* ExprStates for all the arguments */

	bool		is_rowtype;		/* Function returns records */
	bool		is_firstcall;
	bytea	   *userdata;		/* bytea given by describe func */
} TableFunctionState;


/* ----------------
 *	 ValuesScanState information
 *
 *		ValuesScan nodes are used to scan the results of a VALUES list
 *
 *		rowcontext			per-expression-list context
 *		exprlists			array of expression lists being evaluated
 *		array_len			size of array
 *		curr_idx			current array index (0-based)
 *
 *	Note: ss.ps.ps_ExprContext is used to evaluate any qual or projection
 *	expressions attached to the node.  We create a second ExprContext,
 *	rowcontext, in which to build the executor expression state for each
 *	Values sublist.  Resetting this context lets us get rid of expression
 *	state for each row, avoiding major memory leakage over a long values list.
 * ----------------
 */
typedef struct ValuesScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	ExprContext *rowcontext;
	List	  **exprlists;
	int			array_len;
	int			curr_idx;
} ValuesScanState;

/* ----------------
 *		TableFuncScanState node
 *
 * Used in table-expression functions like XMLTABLE.
 * ----------------
 */
typedef struct TableFuncScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	ExprState  *docexpr;		/* state for document expression */
	ExprState  *rowexpr;		/* state for row-generating expression */
	List	   *colexprs;		/* state for column-generating expression */
	List	   *coldefexprs;	/* state for column default expressions */
	List	   *ns_names;		/* same as TableFunc.ns_names */
	List	   *ns_uris;		/* list of states of namespace URI exprs */
	Bitmapset  *notnulls;		/* nullability flag for each output column */
	void	   *opaque;			/* table builder private space */
	const struct TableFuncRoutine *routine; /* table builder methods */
	FmgrInfo   *in_functions;	/* input function for each column */
	Oid		   *typioparams;	/* typioparam for each column */
	int64		ordinal;		/* row number to be output next */
	MemoryContext perTableCxt;	/* per-table context */
	Tuplestorestate *tupstore;	/* output tuple store */
} TableFuncScanState;

/* ----------------
 *	 CteScanState information
 *
 *		CteScan nodes are used to scan a CommonTableExpr query.
 *
 * Multiple CteScan nodes can read out from the same CTE query.  We use
 * a tuplestore to hold rows that have been read from the CTE query but
 * not yet consumed by all readers.
 * ----------------
 */
typedef struct CteScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	int			eflags;			/* capability flags to pass to tuplestore */
	int			readptr;		/* index of my tuplestore read pointer */
	PlanState  *cteplanstate;	/* PlanState for the CTE query itself */
	/* Link to the "leader" CteScanState (possibly this same node) */
	struct CteScanState *leader;
	/* The remaining fields are only valid in the "leader" CteScanState */
	Tuplestorestate *cte_table; /* rows already read from the CTE query */
	bool		eof_cte;		/* reached end of CTE query? */
} CteScanState;

/* ----------------
 *	 NamedTuplestoreScanState information
 *
 *		NamedTuplestoreScan nodes are used to scan a Tuplestore created and
 *		named prior to execution of the query.  An example is a transition
 *		table for an AFTER trigger.
 *
 * Multiple NamedTuplestoreScan nodes can read out from the same Tuplestore.
 * ----------------
 */
typedef struct NamedTuplestoreScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	int			readptr;		/* index of my tuplestore read pointer */
	TupleDesc	tupdesc;		/* format of the tuples in the tuplestore */
	Tuplestorestate *relation;	/* the rows */
} NamedTuplestoreScanState;

/* ----------------
 *	 WorkTableScanState information
 *
 *		WorkTableScan nodes are used to scan the work table created by
 *		a RecursiveUnion node.  We locate the RecursiveUnion node
 *		during executor startup.
 * ----------------
 */
typedef struct WorkTableScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	RecursiveUnionState *rustate;
} WorkTableScanState;

/* ----------------
 *	 ForeignScanState information
 *
 *		ForeignScan nodes are used to scan foreign-data tables.
 * ----------------
 */
typedef struct ForeignScanState
{
	ScanState	ss;				/* its first field is NodeTag */
	ExprState  *fdw_recheck_quals;	/* original quals not in ss.ps.qual */
	Size		pscan_len;		/* size of parallel coordination information */
	/* use struct pointer to avoid including fdwapi.h here */
	struct FdwRoutine *fdwroutine;
	void	   *fdw_state;		/* foreign-data wrapper can keep state here */
} ForeignScanState;

/*
 * DynamicSeqScanState
 */
typedef struct DynamicSeqScanState
{
	ScanState	ss;

	int			scan_state; /* the stage of scanning */

	int			eflags;
	SeqScanState *seqScanState;

	/*
	 * The first partition requires initialization of expression states,
	 * such as qual and targetlist, regardless of whether we need to re-map varattno
	 */
	bool		firstPartition;
	/*
	 * lastRelOid is the last relation that corresponds to the
	 * varattno mapping of qual and target list. Each time we open a new partition, we will
	 * compare the last relation with current relation by using varattnos_map()
	 * and then convert the varattno to the new varattno
	 */
	Oid			lastRelOid;

	/*
	 * scanrelid is the RTE index for this scan node. It will be used to select
	 * varno whose varattno will be remapped, if necessary
	 */
	Index		scanrelid;

	/*
	 * This memory context will be reset per-partition to free
	 * up previous partition's memory
	 */
	MemoryContext partitionMemoryContext;

	int			nOids; /* number of oids to scan in partitioned table */
	Oid		   *partOids; /* list of oids to scan in partitioned table */
	int			whichPart; /* index of current partition in partOids */

	struct PartitionPruneState *as_prune_state; /* partition dynamic pruning state */
	Bitmapset  *as_valid_subplans; /* used to determine partitions during dynamic pruning*/
	bool 		did_pruning; /* flag that is set when */
} DynamicSeqScanState;

/* ----------------
 *	 CustomScanState information
 *
 *		CustomScan nodes are used to execute custom code within executor.
 *
 * Core code must avoid assuming that the CustomScanState is only as large as
 * the structure declared here; providers are allowed to make it the first
 * element in a larger structure, and typically would need to do so.  The
 * struct is actually allocated by the CreateCustomScanState method associated
 * with the plan node.  Any additional fields can be initialized there, or in
 * the BeginCustomScan method.
 * ----------------
 */
struct CustomExecMethods;

typedef struct CustomScanState
{
	ScanState	ss;
	uint32		flags;			/* mask of CUSTOMPATH_* flags, see
								 * nodes/extensible.h */
	List	   *custom_ps;		/* list of child PlanState nodes, if any */
	Size		pscan_len;		/* size of parallel coordination information */
	const struct CustomExecMethods *methods;
} CustomScanState;

/* ----------------------------------------------------------------
 *				 Join State Information
 * ----------------------------------------------------------------
 */

/* ----------------
 *	 JoinState information
 *
 *		Superclass for state nodes of join plans.
 * ----------------
 */
typedef struct JoinState
{
	PlanState	ps;
	JoinType	jointype;
	bool		single_match;	/* True if we should skip to next outer tuple
								 * after finding one inner match */
	ExprState  *joinqual;		/* JOIN quals (in addition to ps.qual) */
} JoinState;

/* ----------------
 *	 NestLoopState information
 *
 *		NeedNewOuter	   true if need new outer tuple on next call
 *		MatchedOuter	   true if found a join match for current outer tuple
 *		NullInnerTupleSlot prepared null tuple for left outer joins
 * ----------------
 */
typedef struct NestLoopState
{
	JoinState	js;				/* its first field is NodeTag */
	bool		nl_NeedNewOuter;
	bool		nl_MatchedOuter;
	bool		shared_outer;
	bool		prefetch_inner;
	bool		prefetch_joinqual;
	bool		prefetch_qual;
	bool		reset_inner; /*CDB-OLAP*/
	bool		require_inner_reset; /*CDB-OLAP*/

	TupleTableSlot *nl_NullInnerTupleSlot;

	List	   *nl_InnerJoinKeys;        /* list of ExprState nodes */
	List	   *nl_OuterJoinKeys;        /* list of ExprState nodes */
	bool		nl_innerSideScanned;      /* set to true once we've scanned all inner tuples the first time */
	bool		nl_qualResultForNull;     /* the value of the join condition when one of the sides contains a NULL */
} NestLoopState;

/* ----------------
 *	 MergeJoinState information
 *
 *		NumClauses		   number of mergejoinable join clauses
 *		Clauses			   info for each mergejoinable clause
 *		JoinState		   current state of ExecMergeJoin state machine
 *		SkipMarkRestore    true if we may skip Mark and Restore operations
 *		ExtraMarks		   true to issue extra Mark operations on inner scan
 *		ConstFalseJoin	   true if we have a constant-false joinqual
 *		FillOuter		   true if should emit unjoined outer tuples anyway
 *		FillInner		   true if should emit unjoined inner tuples anyway
 *		MatchedOuter	   true if found a join match for current outer tuple
 *		MatchedInner	   true if found a join match for current inner tuple
 *		OuterTupleSlot	   slot in tuple table for cur outer tuple
 *		InnerTupleSlot	   slot in tuple table for cur inner tuple
 *		MarkedTupleSlot    slot in tuple table for marked tuple
 *		NullOuterTupleSlot prepared null tuple for right outer joins
 *		NullInnerTupleSlot prepared null tuple for left outer joins
 *		OuterEContext	   workspace for computing outer tuple's join values
 *		InnerEContext	   workspace for computing inner tuple's join values
 * ----------------
 */
/* private in nodeMergejoin.c: */
typedef struct MergeJoinClauseData *MergeJoinClause;

typedef struct MergeJoinState
{
	JoinState	js;				/* its first field is NodeTag */
	int			mj_NumClauses;
	MergeJoinClause mj_Clauses; /* array of length mj_NumClauses */
	int			mj_JoinState;
	bool		mj_SkipMarkRestore;
	bool		mj_ExtraMarks;
	bool		mj_ConstFalseJoin;
	bool		mj_FillOuter;
	bool		mj_FillInner;
	bool		mj_MatchedOuter;
	bool		mj_MatchedInner;
	TupleTableSlot *mj_OuterTupleSlot;
	TupleTableSlot *mj_InnerTupleSlot;
	TupleTableSlot *mj_MarkedTupleSlot;
	TupleTableSlot *mj_NullOuterTupleSlot;
	TupleTableSlot *mj_NullInnerTupleSlot;
	ExprContext *mj_OuterEContext;
	ExprContext *mj_InnerEContext;
	bool		prefetch_inner; /* MPP-3300 */
	bool		prefetch_joinqual;
	bool		prefetch_qual;
} MergeJoinState;

/* ----------------
 *	 HashJoinState information
 *
 *		hashclauses				original form of the hashjoin condition
 *		hj_OuterHashKeys		the outer hash keys in the hashjoin condition
 *		hj_InnerHashKeys		the inner hash keys in the hashjoin condition
 *		hj_HashOperators		the join operators in the hashjoin condition
 *		hj_HashTable			hash table for the hashjoin
 *								(NULL if table not built yet)
 *		hj_CurHashValue			hash value for current outer tuple
 *		hj_CurBucketNo			regular bucket# for current outer tuple
 *		hj_CurSkewBucketNo		skew bucket# for current outer tuple
 *		hj_CurTuple				last inner tuple matched to current outer
 *								tuple, or NULL if starting search
 *								(hj_CurXXX variables are undefined if
 *								OuterTupleSlot is empty!)
 *		hj_OuterTupleSlot		tuple slot for outer tuples
 *		hj_HashTupleSlot		tuple slot for inner (hashed) tuples
 *		hj_NullOuterTupleSlot	prepared null tuple for right/full outer joins
 *		hj_NullInnerTupleSlot	prepared null tuple for left/full outer joins
 *		hj_FirstOuterTupleSlot	first tuple retrieved from outer plan
 *		hj_JoinState			current state of ExecHashJoin state machine
 *		hj_MatchedOuter			true if found a join match for current outer
 *		hj_OuterNotEmpty		true if outer relation known not empty
 *		hj_nonequijoin			true to force hash table to keep nulls
 * ----------------
 */

/* these structs are defined in executor/hashjoin.h: */
typedef struct HashJoinTupleData *HashJoinTuple;
typedef struct HashJoinTableData *HashJoinTable;

typedef struct HashJoinState
{
	JoinState	js;				/* its first field is NodeTag */
	ExprState  *hashclauses;
	ExprState  *hashqualclauses;	/* CDB: ExprState node (match) */
	List	   *hj_OuterHashKeys;	/* list of ExprState nodes */
	List	   *hj_InnerHashKeys;	/* list of ExprState nodes */
	List	   *hj_HashOperators;	/* list of operator OIDs */
	List	   *hj_Collations;
	HashJoinTable hj_HashTable;
	uint32		hj_CurHashValue;
	int			hj_CurBucketNo;
	int			hj_CurSkewBucketNo;
	HashJoinTuple hj_CurTuple;
	TupleTableSlot *hj_OuterTupleSlot;
	TupleTableSlot *hj_HashTupleSlot;
	TupleTableSlot *hj_NullOuterTupleSlot;
	TupleTableSlot *hj_NullInnerTupleSlot;
	TupleTableSlot *hj_FirstOuterTupleSlot;
	int			hj_JoinState;
	bool		hj_MatchedOuter;
	bool		hj_OuterNotEmpty;
	bool		hj_InnerEmpty;  /* set to true if inner side is empty */
	bool		prefetch_inner;
	bool		prefetch_joinqual;
	bool		prefetch_qual;
	bool		hj_nonequijoin;

	/* set if the operator created workfiles */
	bool workfiles_created;
	bool reuse_hashtable; /* Do we need to preserve hash table to support rescan */
} HashJoinState;


/* ----------------------------------------------------------------
 *				 Materialization State Information
 * ----------------------------------------------------------------
 */


/* ----------------
 *	 MaterialState information
 *
 *		materialize nodes are used to materialize the results
 *		of a subplan into a temporary file.
 *
 *		ss.ss_ScanTupleSlot refers to output of underlying plan.
 * ----------------
 */
typedef struct MaterialState
{
	ScanState	ss;				/* its first field is NodeTag */
	int			eflags;			/* capability flags to pass to tuplestore */
	bool		eof_underlying; /* reached end of underlying plan? */
	Tuplestorestate *tuplestorestate;

	bool		ts_destroyed;	/* called destroy tuple store? */
	bool		delayEagerFree;	/* is is safe to free memory used by this node,
								 * when this node has outputted its last row? */
} MaterialState;

/* ----------------
 *	  ShareInputScanState information
 *
 *		State of each scanner of the ShareInput node
 * ----------------
 */
struct shareinput_local_state;
struct shareinput_Xslice_reference;
struct NTupleStore;
struct NTupleStoreAccessor;

typedef struct ShareInputScanState
{
	ScanState	ss;

	Tuplestorestate *ts_state;
	int			ts_pos;

	struct shareinput_local_state *local_state;
	struct shareinput_Xslice_reference *ref;

	bool		isready;
} ShareInputScanState;

/* XXX Should move into buf file */
extern void shareinput_create_bufname_prefix(char* p, int size, int share_id);

/* ----------------
 *	 Shared memory container for per-worker sort information
 * ----------------
 */
typedef struct SharedSortInfo
{
	int			num_workers;
	TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
} SharedSortInfo;

/* ----------------
 *	 SortState information
 * ----------------
 */
typedef struct SortState
{
	ScanState	ss;				/* its first field is NodeTag */
	bool		randomAccess;	/* need random access to sort output? */
	bool		bounded;		/* is the result set bounded? */
	int64		bound;			/* if bounded, how many tuples are needed */
	bool		sort_Done;		/* sort completed yet? */
	bool		bounded_Done;	/* value of bounded we did the sort with */
	int64		bound_Done;		/* value of bound we did the sort with */
	void	   *tuplesortstate; /* private state of tuplesort.c */
	bool		am_worker;		/* are we a worker? */
	SharedSortInfo *shared_info;	/* one entry per worker */

	bool		noduplicates;	/* true if discard duplicate rows */

	bool		delayEagerFree;		/* is it safe to free memory used by this node,
									 * when this node has outputted its last row? */
	TuplesortInstrumentation sortstats; /* holds stats, if the Sort is eagerly free'd */

} SortState;

/* ---------------------
 *	AggState information
 *
 *	ss.ss_ScanTupleSlot refers to output of underlying plan.
 *
 *	Note: ss.ps.ps_ExprContext contains ecxt_aggvalues and
 *	ecxt_aggnulls arrays, which hold the computed agg values for the current
 *	input group during evaluation of an Agg node's output tuple(s).  We
 *	create a second ExprContext, tmpcontext, in which to evaluate input
 *	expressions and run the aggregate transition functions.
 * ---------------------
 */
/* these structs are private in nodeAgg.c: */
typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
typedef struct AggStatePerHashData *AggStatePerHash;

typedef struct AggState
{
	ScanState	ss;				/* its first field is NodeTag */
	List	   *aggs;			/* all Aggref nodes in targetlist & quals */
	int			numaggs;		/* length of list (could be zero!) */
	int			numtrans;		/* number of pertrans items */
	AggStrategy aggstrategy;	/* strategy mode */
	AggSplit	aggsplit;		/* agg-splitting mode, see nodes.h */
	AggStatePerPhase phase;		/* pointer to current phase data */
	int			numphases;		/* number of phases (including phase 0) */
	int			current_phase;	/* current phase number */
	AggStatePerAgg peragg;		/* per-Aggref information */
	AggStatePerTrans pertrans;	/* per-Trans state information */
	ExprContext *hashcontext;	/* econtexts for long-lived data (hashtable) */
	ExprContext **aggcontexts;	/* econtexts for long-lived data (per GS) */
	ExprContext *tmpcontext;	/* econtext for input expressions */
#define FIELDNO_AGGSTATE_CURAGGCONTEXT 14
	ExprContext *curaggcontext; /* currently active aggcontext */
	AggStatePerAgg curperagg;	/* currently active aggregate, if any */
#define FIELDNO_AGGSTATE_CURPERTRANS 16
	AggStatePerTrans curpertrans;	/* currently active trans state, if any */
	bool		input_done;		/* indicates end of input */
	bool		agg_done;		/* indicates completion of Agg scan */
	int			projected_set;	/* The last projected grouping set */
#define FIELDNO_AGGSTATE_CURRENT_SET 20
	int			current_set;	/* The current grouping set being evaluated */
	Bitmapset  *grouped_cols;	/* grouped cols in current projection */
	List	   *all_grouped_cols;	/* list of all grouped cols in DESC order */
	/* These fields are for grouping set phase data */
	int			maxsets;		/* The max number of sets in any phase */
	AggStatePerPhase phases;	/* array of all phases */
	Tuplesortstate *sort_in;	/* sorted input to phases > 1 */
	Tuplesortstate *sort_out;	/* input is copied here for next phase */
	TupleTableSlot *sort_slot;	/* slot for sort results */
	/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
	AggStatePerGroup *pergroups;	/* grouping set indexed array of per-group
									 * pointers */
	HeapTuple	grp_firstTuple; /* copy of first tuple of current group */
	/* these fields are used in AGG_HASHED and AGG_MIXED modes: */
	bool		table_filled;	/* hash table filled yet? */
	int			num_hashes;
	MemoryContext	hash_metacxt;	/* memory for hash table itself */
	struct HashTapeInfo *hash_tapeinfo; /* metadata for spill tapes */
	struct HashAggSpill *hash_spills; /* HashAggSpill for each grouping set,
										 exists only during first pass */
	TupleTableSlot *hash_spill_slot; /* slot for reading from spill files */
	List	   *hash_batches;	/* hash batches remaining to be processed */
	bool		hash_ever_spilled;	/* ever spilled during this execution? */
	bool		hash_spill_mode;	/* we hit a limit during the current batch
									   and we must not create new groups */
	Size		hash_mem_limit;	/* limit before spilling hash table */
	uint64		hash_ngroups_limit;	/* limit before spilling hash table */
	int			hash_planned_partitions; /* number of partitions planned
											for first pass */
	double		hashentrysize;	/* estimate revised during execution */
	Size		hash_mem_peak;	/* peak hash table memory usage */
	uint64		hash_ngroups_current;	/* number of groups currently in
										   memory in all hash tables */
	uint64		hash_disk_used; /* kB of disk space used */
	int			hash_batches_used;	/* batches used during entire execution */

	AggStatePerHash perhash;	/* array of per-hashtable data */
	AggStatePerGroup *hash_pergroup;	/* grouping set indexed array of
										 * per-group pointers */

	/* support for evaluation of agg input expressions: */
#define FIELDNO_AGGSTATE_ALL_PERGROUPS 49
	AggStatePerGroup *all_pergroups;	/* array of first ->pergroups, than
										 * ->hash_pergroup */
	ProjectionInfo *combinedproj;	/* projection machinery */

	int			group_id;		/* GROUP_ID in current projection. This is passed
								 * to GroupingSetId expressions, similar to the
								 * 'grouped_cols' value. */
	int			gset_id;

	/* if input tuple has an AggExprId, save the Attribute Number */
	Index       AggExprId_AttrNum;
} AggState;

typedef struct TupleSplitState
{
	ScanState	    ss;				/* its first field is NodeTag */

	bool		    *isnull_orig;   /* each input tuple, original isnull array */

	TupleTableSlot  *outerslot;    /* store input tuple for several split loop */
	Index           currentExprId; /* current AggExprId value */

	AttrNumber      maxAttrNum;    /* the maximum AttrNum need to projection */
	int             numDisDQAs;    /* number of splitting for each input tuple*/

	/* For each splitting tuple is mapping to a bitmap set depends on AggExprId,
	 * Only the input AttrNum in the bitmap set, other column set to null
	 */
	Bitmapset       **dqa_split_bms;

	ExprState       **agg_filter_array; /* DQA filter which push down from aggref */
	int             *dqa_id_array; /* DQA id for each each split tuple */
} TupleSplitState;

typedef struct AggExprIdState
{
	ExprState	xprstate;

	PlanState   *parent;
} AggExprIdState;

typedef struct RowIdExprState
{
	ExprState	xprstate;

	uint64		rowcounter;
} RowIdExprState;

/* ----------------
 *	WindowAggState information
 * ----------------
 */
/* these structs are private in nodeWindowAgg.c: */
typedef struct WindowStatePerFuncData *WindowStatePerFunc;
typedef struct WindowStatePerAggData *WindowStatePerAgg;

typedef struct WindowAggState
{
	ScanState	ss;				/* its first field is NodeTag */

	/* these fields are filled in by ExecInitExpr: */
	List	   *funcs;			/* all WindowFunc nodes in targetlist */
	int			numfuncs;		/* total number of window functions */
	int			numaggs;		/* number that are plain aggregates */

	WindowStatePerFunc perfunc; /* per-window-function information */
	WindowStatePerAgg peragg;	/* per-plain-aggregate information */
	ExprState  *partEqfunction; /* equality funcs for partition columns */
	ExprState  *ordEqfunction;	/* equality funcs for ordering columns */
	Tuplestorestate *buffer;	/* stores rows of current partition */
	int			current_ptr;	/* read pointer # for current row */
	int			framehead_ptr;	/* read pointer # for frame head, if used */
	int			frametail_ptr;	/* read pointer # for frame tail, if used */
	int			grouptail_ptr;	/* read pointer # for group tail, if used */
	int64		spooled_rows;	/* total # of rows in buffer */
	int64		currentpos;		/* position of current row in partition */
	int64		frameheadpos;	/* current frame head position */
	int64		frametailpos;	/* current frame tail position (frame end+1) */
	/* use struct pointer to avoid including windowapi.h here */
	struct WindowObjectData *agg_winobj;	/* winobj for aggregate fetches */
	int64		aggregatedbase; /* start row for current aggregates */
	int64		aggregatedupto; /* rows before this one are aggregated */

	int			frameOptions;	/* frame_clause options, see WindowDef */
	ExprState  *startOffset;	/* expression for starting bound offset */
	ExprState  *endOffset;		/* expression for ending bound offset */
	Datum		startOffsetValue;	/* result of startOffset evaluation */
	Datum		endOffsetValue; /* result of endOffset evaluation */

	/* these fields are used with RANGE offset PRECEDING/FOLLOWING: */
	FmgrInfo	startInRangeFunc;	/* in_range function for startOffset */
	FmgrInfo	endInRangeFunc; /* in_range function for endOffset */
	Oid			inRangeColl;	/* collation for in_range tests */
	bool		inRangeAsc;		/* use ASC sort order for in_range tests? */
	bool		inRangeNullsFirst;	/* nulls sort first for in_range tests? */
	/*
	 * In GPDB, we support RANGE/ROWS start/end expressions to contain
	 * variables. You lose on some optimizations in that case, so we use
	 * these flags to indicate if they don't contain any variables, to allow
	 * those optimizations in the usual case that they don't.
	 */
	bool		start_offset_var_free;
	bool		end_offset_var_free;

	bool		start_offset_valid;		/* is startOffsetValue valid for current row? */
	bool		end_offset_valid;		/* is endOffsetValue valid for current row? */

	/* these fields are used in GROUPS mode: */
	int64		currentgroup;	/* peer group # of current row in partition */
	int64		frameheadgroup; /* peer group # of frame head row */
	int64		frametailgroup; /* peer group # of frame tail row */
	int64		groupheadpos;	/* current row's peer group head position */
	int64		grouptailpos;	/* " " " " tail position (group end+1) */

	MemoryContext partcontext;	/* context for partition-lifespan data */
	MemoryContext aggcontext;	/* shared context for aggregate working data */
	MemoryContext curaggcontext;	/* current aggregate's working data */
	ExprContext *tmpcontext;	/* short-term evaluation context */

	bool		all_first;		/* true if the scan is starting */
	bool		all_done;		/* true if the scan is finished */
	bool		partition_spooled;	/* true if all tuples in current partition
									 * have been spooled into tuplestore */
	bool		more_partitions;	/* true if there's more partitions after
									 * this one */
	bool		framehead_valid;	/* true if frameheadpos is known up to
									 * date for current row */
	bool		frametail_valid;	/* true if frametailpos is known up to
									 * date for current row */
	bool		grouptail_valid;	/* true if grouptailpos is known up to
									 * date for current row */

	TupleTableSlot *first_part_slot;	/* first tuple of current or next
										 * partition */
	TupleTableSlot *framehead_slot; /* first tuple of current frame */
	TupleTableSlot *frametail_slot; /* first tuple after current frame */

	/* temporary slots for tuples fetched back from tuplestore */
	TupleTableSlot *agg_row_slot;
	TupleTableSlot *temp_slot_1;
	TupleTableSlot *temp_slot_2;
} WindowAggState;

/* ----------------
 *	 UniqueState information
 *
 *		Unique nodes are used "on top of" sort nodes to discard
 *		duplicate tuples returned from the sort phase.  Basically
 *		all it does is compare the current tuple from the subplan
 *		with the previously fetched tuple (stored in its result slot).
 *		If the two are identical in all interesting fields, then
 *		we just fetch another tuple from the sort and try again.
 * ----------------
 */
typedef struct UniqueState
{
	PlanState	ps;				/* its first field is NodeTag */
	ExprState  *eqfunction;		/* tuple equality qual */
} UniqueState;

/* ----------------
 * GatherState information
 *
 *		Gather nodes launch 1 or more parallel workers, run a subplan
 *		in those workers, and collect the results.
 * ----------------
 */
typedef struct GatherState
{
	PlanState	ps;				/* its first field is NodeTag */
	bool		initialized;	/* workers launched? */
	bool		need_to_scan_locally;	/* need to read from local plan? */
	int64		tuples_needed;	/* tuple bound, see ExecSetTupleBound */
	/* these fields are set up once: */
	TupleTableSlot *funnel_slot;
	struct ParallelExecutorInfo *pei;
	/* all remaining fields are reinitialized during a rescan: */
	int			nworkers_launched;	/* original number of workers */
	int			nreaders;		/* number of still-active workers */
	int			nextreader;		/* next one to try to read from */
	struct TupleQueueReader **reader;	/* array with nreaders active entries */
} GatherState;

/* ----------------
 * GatherMergeState information
 *
 *		Gather merge nodes launch 1 or more parallel workers, run a
 *		subplan which produces sorted output in each worker, and then
 *		merge the results into a single sorted stream.
 * ----------------
 */
struct GMReaderTupleBuffer;		/* private in nodeGatherMerge.c */

typedef struct GatherMergeState
{
	PlanState	ps;				/* its first field is NodeTag */
	bool		initialized;	/* workers launched? */
	bool		gm_initialized; /* gather_merge_init() done? */
	bool		need_to_scan_locally;	/* need to read from local plan? */
	int64		tuples_needed;	/* tuple bound, see ExecSetTupleBound */
	/* these fields are set up once: */
	TupleDesc	tupDesc;		/* descriptor for subplan result tuples */
	int			gm_nkeys;		/* number of sort columns */
	SortSupport gm_sortkeys;	/* array of length gm_nkeys */
	struct ParallelExecutorInfo *pei;
	/* all remaining fields are reinitialized during a rescan */
	/* (but the arrays are not reallocated, just cleared) */
	int			nworkers_launched;	/* original number of workers */
	int			nreaders;		/* number of active workers */
	TupleTableSlot **gm_slots;	/* array with nreaders+1 entries */
	struct TupleQueueReader **reader;	/* array with nreaders active entries */
	struct GMReaderTupleBuffer *gm_tuple_buffers;	/* nreaders tuple buffers */
	struct binaryheap *gm_heap; /* binary heap of slot indices */
} GatherMergeState;

/* ----------------
 *	 Values displayed by EXPLAIN ANALYZE
 * ----------------
 */
typedef struct HashInstrumentation
{
	int			nbuckets;		/* number of buckets at end of execution */
	int			nbuckets_original;	/* planned number of buckets */
	int			nbatch;			/* number of batches at end of execution */
	int			nbatch_original;	/* planned number of batches */
	size_t		space_peak;		/* speak memory usage in bytes */
} HashInstrumentation;

/* ----------------
 *	 Shared memory container for per-worker hash information
 * ----------------
 */
typedef struct SharedHashInfo
{
	int			num_workers;
	HashInstrumentation hinstrument[FLEXIBLE_ARRAY_MEMBER];
} SharedHashInfo;

/* ----------------
 *	 HashState information
 * ----------------
 */
typedef struct HashState
{
	PlanState	ps;				/* its first field is NodeTag */
	HashJoinTable hashtable;	/* hash table for the hashjoin */
	List	   *hashkeys;		/* list of ExprState nodes */
	bool		hs_keepnull;	/* Keep nulls */
	bool		hs_quit_if_hashkeys_null;	/* quit building hash table if hashkeys are all null */
	bool		hs_hashkeys_null;	/* found an instance wherein hashkeys are all null */
	/* hashkeys is same as parent's hj_InnerHashKeys */

	SharedHashInfo *shared_info;	/* one entry per worker */
	HashInstrumentation *hinstrument;	/* this worker's entry */

	/* Parallel hash state. */
	struct ParallelHashJoinState *parallel_state;
} HashState;

/* ----------------
 *	 SetOpState information
 *
 *		Even in "sorted" mode, SetOp nodes are more complex than a simple
 *		Unique, since we have to count how many duplicates to return.  But
 *		we also support hashing, so this is really more like a cut-down
 *		form of Agg.
 * ----------------
 */
/* this struct is private in nodeSetOp.c: */
typedef struct SetOpStatePerGroupData *SetOpStatePerGroup;

typedef struct SetOpState
{
	PlanState	ps;				/* its first field is NodeTag */
	ExprState  *eqfunction;		/* equality comparator */
	Oid		   *eqfuncoids;		/* per-grouping-field equality fns */
	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
	bool		setop_done;		/* indicates completion of output scan */
	long		numOutput;		/* number of dups left to output */
	/* these fields are used in SETOP_SORTED mode: */
	SetOpStatePerGroup pergroup;	/* per-group working state */
	HeapTuple	grp_firstTuple; /* copy of first tuple of current group */
	/* these fields are used in SETOP_HASHED mode: */
	TupleHashTable hashtable;	/* hash table with one entry per group */
	MemoryContext tableContext; /* memory context containing hash table */
	bool		table_filled;	/* hash table filled yet? */
	TupleHashIterator hashiter; /* for iterating through hash table */
} SetOpState;

/* ----------------
 *	 LockRowsState information
 *
 *		LockRows nodes are used to enforce FOR [KEY] UPDATE/SHARE locking.
 * ----------------
 */
typedef struct LockRowsState
{
	PlanState	ps;				/* its first field is NodeTag */
	List	   *lr_arowMarks;	/* List of ExecAuxRowMarks */
	EPQState	lr_epqstate;	/* for evaluating EvalPlanQual rechecks */
} LockRowsState;

/* ----------------
 *	 LimitState information
 *
 *		Limit nodes are used to enforce LIMIT/OFFSET clauses.
 *		They just select the desired subrange of their subplan's output.
 *
 * offset is the number of initial tuples to skip (0 does nothing).
 * count is the number of tuples to return after skipping the offset tuples.
 * If no limit count was specified, count is undefined and noCount is true.
 * When lstate == LIMIT_INITIAL, offset/count/noCount haven't been set yet.
 * ----------------
 */
typedef enum
{
	LIMIT_INITIAL,				/* initial state for LIMIT node */
	LIMIT_RESCAN,				/* rescan after recomputing parameters */
	LIMIT_EMPTY,				/* there are no returnable rows */
	LIMIT_INWINDOW,				/* have returned a row in the window */
	LIMIT_SUBPLANEOF,			/* at EOF of subplan (within window) */
	LIMIT_WINDOWEND,			/* stepped off end of window */
	LIMIT_WINDOWSTART			/* stepped off beginning of window */
} LimitStateCond;

typedef struct LimitState
{
	PlanState	ps;				/* its first field is NodeTag */
	ExprState  *limitOffset;	/* OFFSET parameter, or NULL if none */
	ExprState  *limitCount;		/* COUNT parameter, or NULL if none */
	int64		offset;			/* current OFFSET value */
	int64		count;			/* current COUNT, if any */
	bool		noCount;		/* if true, ignore count */
	LimitStateCond lstate;		/* state machine status, as above */
	int64		position;		/* 1-based index of last tuple returned */
	TupleTableSlot *subSlot;	/* tuple last obtained from subplan */

	bool		expect_rescan;
} LimitState;

/*
 * DML Operations
 */

/*
 * ExecNode for Split.
 * This operator contains a Plannode in PlanState.
 * The Plannode contains indexes to the ctid, insert, delete, resjunk columns
 * needed for adding the action (Insert/Delete).
 * A MemoryContext and TupleTableSlot are maintained to keep the INSERT
 * tuple until requested.
 */
typedef struct SplitUpdateState
{
	PlanState	ps;
	bool		processInsert;	/* flag that specifies the operator's next
								 * action. */
	TupleTableSlot *insertTuple;	/* tuple to Insert */
	TupleTableSlot *deleteTuple;	/* tuple to Delete */

	AttrNumber	input_segid_attno;		/* attribute number of "gp_segment_id" in subplan's target list */
	AttrNumber	output_segid_attno;		/* attribute number of "gp_segment_id" in output target list */

	struct CdbHash *cdbhash;	/* hash api object */

} SplitUpdateState;

/*
 * ExecNode for AssertOp.
 * This operator contains a Plannode that contains the expressions
 * to execute.
 */
typedef struct AssertOpState
{
	PlanState	ps;
} AssertOpState;


typedef enum MotionStateType
{
	MOTIONSTATE_NONE,			/* The motion state is not decided, or non
								 * active in a slice (neither send nor recv) */
	MOTIONSTATE_SEND,			/* The motion is sender */
	MOTIONSTATE_RECV,			/* The motion is recver */
} MotionStateType;

/* ----------------
 *         MotionState information
 * ----------------
 */
typedef struct MotionState
{
	PlanState	ps;				/* its first field is NodeTag */
	MotionStateType mstype;		/* Motion state type */
	bool		stopRequested;	/* set when we want transfer to stop */

	/* For motion send */
	bool		sentEndOfStream;	/* set when end-of-stream has successfully been sent */
	List	   *hashExprs;		/* state struct used for evaluating the hash expressions */
	struct CdbHash *cdbhash;	/* hash api object */
	int			numHashSegments;	/* number of segments to use when calculating hash */

	/* For Motion recv */
	int			routeIdNext;	/* for a sorted motion node, the routeId to get next (same as
								 * the routeId last returned ) */
	bool		tupleheapReady; /* for a sorted motion node, false until we have a tuple from
								 * each source segindex */

	/* For sorted Motion recv */
	int			numSortCols;
	SortSupport sortKeys;
	TupleTableSlot **slots;
	struct binaryheap *tupleheap; /* binary heap of slot indices */
	int			lastSortColIdx;

	/* The following can be used for debugging, usage stats, etc.  */
	int			numTuplesFromChild;	/* Number of tuples received from child */
	int			numTuplesToAMS;		/* Number of tuples from child that were sent to AMS */
	int			numTuplesFromAMS;	/* Number of tuples received from AMS */
	int			numTuplesToParent;	/* Number of tuples either from child or AMS that were sent to parent */

	struct timeval otherTime;   /* time accumulator used in sending motion node to keep track of time
								 * spent getting the next tuple (not sending). this could mean time spent
								 * in another motion node receiving. */

	struct timeval motionTime;  /* time accumulator for time spent in motion node.  For sending motion node
								 * it is just the amount of time actually sending the tuple thru the
								 * interconnect.  For receiving motion node, it is the time spent waiting
								 * and processing of the next incoming tuple.
								 */

	Oid		   *outputFunArray;	/* output functions for each column (debug only) */

	int			numInputSegs;	/* the number of segments on the sending slice */
} MotionState;

/* ----------------
 *	 PartitionSelectorState information
 *
 *		A PartitionSelector is used to affect an which partitions are scanned
 *		at "other" side of a join.
 *
 * This is a GPDB mechanism, used for runtime partition pruning based on
 * actual values seen in a join. It is in addition to the partition pruning
 * done at plan-time and at executor startup.
 * ----------------
 */
typedef struct PartitionSelectorState
{
	PlanState	ps;				/* its first field is NodeTag */

	struct PartitionPruneState *prune_state;
	Bitmapset *part_prune_result;
} PartitionSelectorState;

#endif							/* EXECNODES_H */

相关信息

greenplumn 源码目录

相关文章

greenplumn altertablenodes 源码

greenplumn bitmapset 源码

greenplumn extensible 源码

greenplumn lockoptions 源码

greenplumn makefuncs 源码

greenplumn memnodes 源码

greenplumn nodeFuncs 源码

greenplumn nodes 源码

greenplumn params 源码

greenplumn parsenodes 源码

0 赞

所属分类： 大数据
本文标签： greenplum
版权声明： 原创文章如转载，请注明本文链接: https://www.seaxiang.com/blog/6b036f8d14cf42d98a2beb9c3be841b9

热门推荐

1、直接访问google.com
2、 - 优质文章
3、 gate.io
4、 harmony 鸿蒙hdc使用指导
5、 harmony 鸿蒙ArkUI组件（ArkTS）开发常见问题
6、 harmony 鸿蒙初识ArkTS语言
7、 golang
8、 flink kafka connector scan.startup.mode 的几个选项
9、 openharmony
10、 Vue中input框自动聚焦

Loading...