greenplumn nodeModifyTable 源码
greenplumn nodeModifyTable 代码
文件路径:/src/backend/executor/nodeModifyTable.c
/*-------------------------------------------------------------------------
*
* nodeModifyTable.c
* routines to handle ModifyTable nodes.
*
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/executor/nodeModifyTable.c
*
*-------------------------------------------------------------------------
*/
/* INTERFACE ROUTINES
* ExecInitModifyTable - initialize the ModifyTable node
* ExecModifyTable - retrieve the next tuple from the node
* ExecEndModifyTable - shut down the ModifyTable node
* ExecReScanModifyTable - rescan the ModifyTable node
*
* NOTES
* Each ModifyTable node contains a list of one or more subplans,
* much like an Append node. There is one subplan per result relation.
* The key reason for this is that in an inherited UPDATE command, each
* result relation could have a different schema (more or different
* columns) requiring a different plan tree to produce it. In an
* inherited DELETE, all the subplans should produce the same output
* rowtype, but we might still find that different plans are appropriate
* for different child relations.
*
* If the query specifies RETURNING, then the ModifyTable returns a
* RETURNING tuple after completing each row insert, update, or delete.
* It must be called again to continue the operation. Without RETURNING,
* we just loop within the node until all the work is done, then
* return NULL. This avoids useless call/return overhead.
*/
#include "postgres.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/tableam.h"
#include "access/tupconvert.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "commands/trigger.h"
#include "executor/execPartition.h"
#include "executor/executor.h"
#include "executor/nodeModifyTable.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "rewrite/rewriteHandler.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "access/transam.h"
#include "cdb/cdbaocsam.h"
#include "cdb/cdbappendonlyam.h"
#include "cdb/cdbvars.h"
#include "parser/parsetree.h"
#include "utils/lsyscache.h"
#include "utils/snapmgr.h"
static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
ResultRelInfo *resultRelInfo,
ItemPointer conflictTid,
TupleTableSlot *planSlot,
TupleTableSlot *excludedSlot,
EState *estate,
bool canSetTag,
TupleTableSlot **returning);
static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
EState *estate,
PartitionTupleRouting *proute,
ResultRelInfo *targetRelInfo,
TupleTableSlot *slot);
static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node);
static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node,
int whichplan);
/*
* Verify that the tuples to be produced by INSERT or UPDATE match the
* target relation's rowtype
*
* We do this to guard against stale plans. If plan invalidation is
* functioning properly then we should never get a failure here, but better
* safe than sorry. Note that this is called after we have obtained lock
* on the target rel, so the rowtype can't change underneath us.
*
* The plan output is represented by its targetlist, because that makes
* handling the dropped-column case easier.
*/
static void
ExecCheckPlanOutput(Relation resultRel, List *targetList)
{
TupleDesc resultDesc = RelationGetDescr(resultRel);
int attno = 0;
ListCell *lc;
foreach(lc, targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
Form_pg_attribute attr;
if (tle->resjunk)
continue; /* ignore junk tlist items */
if (attno >= resultDesc->natts)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("table row type and query-specified row type do not match"),
errdetail("Query has too many columns.")));
attr = TupleDescAttr(resultDesc, attno);
attno++;
if (!attr->attisdropped)
{
/* Normal case: demand type match */
if (exprType((Node *) tle->expr) != attr->atttypid)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("table row type and query-specified row type do not match"),
errdetail("Table has type %s at ordinal position %d, but query expects %s.",
format_type_be(attr->atttypid),
attno,
format_type_be(exprType((Node *) tle->expr)))));
}
else
{
/*
* For a dropped column, we can't check atttypid (it's likely 0).
* In any case the planner has most likely inserted an INT4 null.
* What we insist on is just *some* NULL constant.
*/
/* GPDB_96_MERGE_FIXME: the subplan can be a Motion, so that the NULLs
* are transferred through the Motion node.
*/
#if 0
if (!IsA(tle->expr, Const) ||
!((Const *) tle->expr)->constisnull)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("table row type and query-specified row type do not match"),
errdetail("Query provides a value for a dropped column at ordinal position %d.",
attno)));
#endif
}
}
if (attno != resultDesc->natts)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("table row type and query-specified row type do not match"),
errdetail("Query has too few columns.")));
}
/*
* ExecProcessReturning --- evaluate a RETURNING list
*
* resultRelInfo: current result rel
* tupleSlot: slot holding tuple actually inserted/updated/deleted
* planSlot: slot holding tuple returned by top subplan node
*
* Note: If tupleSlot is NULL, the FDW should have already provided econtext's
* scan tuple.
*
* Returns a slot holding the result tuple
*/
static TupleTableSlot *
ExecProcessReturning(ResultRelInfo *resultRelInfo,
TupleTableSlot *tupleSlot,
TupleTableSlot *planSlot)
{
ProjectionInfo *projectReturning = resultRelInfo->ri_projectReturning;
ExprContext *econtext = projectReturning->pi_exprContext;
/* Make tuple and any needed join variables available to ExecProject */
if (tupleSlot)
econtext->ecxt_scantuple = tupleSlot;
econtext->ecxt_outertuple = planSlot;
/*
* RETURNING expressions might reference the tableoid column, so
* reinitialize tts_tableOid before evaluating them.
*/
econtext->ecxt_scantuple->tts_tableOid =
RelationGetRelid(resultRelInfo->ri_RelationDesc);
/* Compute the RETURNING expressions */
return ExecProject(projectReturning);
}
/*
* ExecCheckTupleVisible -- verify tuple is visible
*
* It would not be consistent with guarantees of the higher isolation levels to
* proceed with avoiding insertion (taking speculative insertion's alternative
* path) on the basis of another tuple that is not visible to MVCC snapshot.
* Check for the need to raise a serialization failure, and do so as necessary.
*/
static void
ExecCheckTupleVisible(EState *estate,
Relation rel,
TupleTableSlot *slot)
{
if (!IsolationUsesXactSnapshot())
return;
if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
{
Datum xminDatum;
TransactionId xmin;
bool isnull;
xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
Assert(!isnull);
xmin = DatumGetTransactionId(xminDatum);
/*
* We should not raise a serialization failure if the conflict is
* against a tuple inserted by our own transaction, even if it's not
* visible to our snapshot. (This would happen, for example, if
* conflicting keys are proposed for insertion in a single command.)
*/
if (!TransactionIdIsCurrentTransactionId(xmin))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
}
}
/*
* ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
*/
static void
ExecCheckTIDVisible(EState *estate,
ResultRelInfo *relinfo,
ItemPointer tid,
TupleTableSlot *tempSlot)
{
Relation rel = relinfo->ri_RelationDesc;
/* Redundantly check isolation level */
if (!IsolationUsesXactSnapshot())
return;
if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
ExecCheckTupleVisible(estate, rel, tempSlot);
ExecClearTuple(tempSlot);
}
/*
* Compute stored generated columns for a tuple
*/
void
ExecComputeStoredGenerated(EState *estate, TupleTableSlot *slot)
{
ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
Relation rel = resultRelInfo->ri_RelationDesc;
TupleDesc tupdesc = RelationGetDescr(rel);
int natts = tupdesc->natts;
MemoryContext oldContext;
Datum *values;
bool *nulls;
Assert(tupdesc->constr && tupdesc->constr->has_generated_stored);
/*
* If first time through for this result relation, build expression
* nodetrees for rel's stored generation expressions. Keep them in the
* per-query memory context so they'll survive throughout the query.
*/
if (resultRelInfo->ri_GeneratedExprs == NULL)
{
oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
resultRelInfo->ri_GeneratedExprs =
(ExprState **) palloc(natts * sizeof(ExprState *));
for (int i = 0; i < natts; i++)
{
if (TupleDescAttr(tupdesc, i)->attgenerated == ATTRIBUTE_GENERATED_STORED)
{
Expr *expr;
expr = (Expr *) build_column_default(rel, i + 1);
if (expr == NULL)
elog(ERROR, "no generation expression found for column number %d of table \"%s\"",
i + 1, RelationGetRelationName(rel));
resultRelInfo->ri_GeneratedExprs[i] = ExecPrepareExpr(expr, estate);
}
}
MemoryContextSwitchTo(oldContext);
}
oldContext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
values = palloc(sizeof(*values) * natts);
nulls = palloc(sizeof(*nulls) * natts);
slot_getallattrs(slot);
memcpy(nulls, slot->tts_isnull, sizeof(*nulls) * natts);
for (int i = 0; i < natts; i++)
{
Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
if (attr->attgenerated == ATTRIBUTE_GENERATED_STORED)
{
ExprContext *econtext;
Datum val;
bool isnull;
econtext = GetPerTupleExprContext(estate);
econtext->ecxt_scantuple = slot;
val = ExecEvalExpr(resultRelInfo->ri_GeneratedExprs[i], econtext, &isnull);
values[i] = val;
nulls[i] = isnull;
}
else
{
if (!nulls[i])
values[i] = datumCopy(slot->tts_values[i], attr->attbyval, attr->attlen);
}
}
ExecClearTuple(slot);
memcpy(slot->tts_values, values, sizeof(*values) * natts);
memcpy(slot->tts_isnull, nulls, sizeof(*nulls) * natts);
ExecStoreVirtualTuple(slot);
ExecMaterializeSlot(slot);
MemoryContextSwitchTo(oldContext);
}
/* ----------------------------------------------------------------
* ExecInsert
*
* For INSERT, we have to insert the tuple into the target relation
* and insert appropriate tuples into the index relations.
*
* Returns RETURNING result if any, otherwise NULL.
*
* If the target table is partitioned, the input tuple in 'parentslot'
* is in the shape required for the parent table. This function will
* look up the ResultRelInfo of the target partition, and form a
* tuple suitable for the target partition. (It can be different, if
* there are dropped columns in the parent, but not the partition,
* for example.)
*
* In GPDB, the INSERT can be part of an update operation when
* there is a preceding SplitUpdate node. 'splitUpdate' is true in
* that case.
*
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecInsert(ModifyTableState *mtstate,
TupleTableSlot *slot,
TupleTableSlot *planSlot,
EState *estate,
bool canSetTag,
bool splitUpdate)
{
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
List *recheckIndexes = NIL;
TupleTableSlot *result = NULL;
TransitionCaptureState *ar_insert_trig_tcs;
ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
OnConflictAction onconflict = node->onConflictAction;
ExecMaterializeSlot(slot);
/*
* get information on the (current) result relation
*/
resultRelInfo = estate->es_result_relation_info;
resultRelationDesc = resultRelInfo->ri_RelationDesc;
/*
* BEFORE ROW INSERT Triggers.
*
* Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
* INSERT ... ON CONFLICT statement. We cannot check for constraint
* violations before firing these triggers, because they can change the
* values to insert. Also, they can run arbitrary user-defined code with
* side-effects that we can't cancel by just not inserting the tuple.
*/
/*
* GPDB_12_MERGE_FIXME: PostgreSQL *does* fire INSERT and DELETE
* triggers on an UPDATE that moves tuples from one partition to another.
* Should we follow that example with cross-segment UPDATEs too?
*/
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_before_row &&
!splitUpdate)
{
if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
return NULL; /* "do nothing" */
}
/* INSTEAD OF ROW INSERT Triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
{
if (!ExecIRInsertTriggers(estate, resultRelInfo, slot))
return NULL; /* "do nothing" */
}
else if (resultRelInfo->ri_FdwRoutine)
{
/*
* Compute stored generated columns
*/
if (resultRelationDesc->rd_att->constr &&
resultRelationDesc->rd_att->constr->has_generated_stored)
ExecComputeStoredGenerated(estate, slot);
/*
* insert into foreign table: let the FDW do it
*/
slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
resultRelInfo,
slot,
planSlot);
if (slot == NULL) /* "do nothing" */
return NULL;
/*
* AFTER ROW Triggers or RETURNING expressions might reference the
* tableoid column, so (re-)initialize tts_tableOid before evaluating
* them.
*/
slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
}
else
{
WCOKind wco_kind;
/*
* Constraints might reference the tableoid column, so (re-)initialize
* tts_tableOid before evaluating them.
*/
slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
/*
* Compute stored generated columns
*/
if (resultRelationDesc->rd_att->constr &&
resultRelationDesc->rd_att->constr->has_generated_stored)
ExecComputeStoredGenerated(estate, slot);
/*
* Check any RLS WITH CHECK policies.
*
* Normally we should check INSERT policies. But if the insert is the
* result of a partition key update that moved the tuple to a new
* partition, we should instead check UPDATE policies, because we are
* executing policies defined on the target table, and not those
* defined on the child partitions.
*/
wco_kind = (mtstate->operation == CMD_UPDATE) ?
WCO_RLS_UPDATE_CHECK : WCO_RLS_INSERT_CHECK;
/*
* ExecWithCheckOptions() will skip any WCOs which are not of the kind
* we are looking for at this point.
*/
if (resultRelInfo->ri_WithCheckOptions != NIL)
ExecWithCheckOptions(wco_kind, resultRelInfo, slot, estate);
/*
* Check the constraints of the tuple.
*/
if (resultRelationDesc->rd_att->constr)
ExecConstraints(resultRelInfo, slot, estate);
/*
* Also check the tuple against the partition constraint, if there is
* one; except that if we got here via tuple-routing, we don't need to
* if there's no BR trigger defined on the partition.
*/
if (resultRelInfo->ri_PartitionCheck &&
(resultRelInfo->ri_PartitionRoot == NULL ||
(resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_before_row)))
ExecPartitionCheck(resultRelInfo, slot, estate, true);
if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
{
/* Perform a speculative insertion. */
uint32 specToken;
ItemPointerData conflictTid;
bool specConflict;
List *arbiterIndexes;
arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
/*
* Do a non-conclusive check for conflicts first.
*
* We're not holding any locks yet, so this doesn't guarantee that
* the later insert won't conflict. But it avoids leaving behind
* a lot of canceled speculative insertions, if you run a lot of
* INSERT ON CONFLICT statements that do conflict.
*
* We loop back here if we find a conflict below, either during
* the pre-check, or when we re-check after inserting the tuple
* speculatively.
*/
vlock:
specConflict = false;
if (!ExecCheckIndexConstraints(slot, estate, &conflictTid,
arbiterIndexes))
{
/* committed conflict tuple found */
if (onconflict == ONCONFLICT_UPDATE)
{
/*
* In case of ON CONFLICT DO UPDATE, execute the UPDATE
* part. Be prepared to retry if the UPDATE fails because
* of another concurrent UPDATE/DELETE to the conflict
* tuple.
*/
TupleTableSlot *returning = NULL;
if (ExecOnConflictUpdate(mtstate, resultRelInfo,
&conflictTid, planSlot, slot,
estate, canSetTag, &returning))
{
InstrCountTuples2(&mtstate->ps, 1);
return returning;
}
else
goto vlock;
}
else
{
/*
* In case of ON CONFLICT DO NOTHING, do nothing. However,
* verify that the tuple is visible to the executor's MVCC
* snapshot at higher isolation levels.
*
* Using ExecGetReturningSlot() to store the tuple for the
* recheck isn't that pretty, but we can't trivially use
* the input slot, because it might not be of a compatible
* type. As there's no conflicting usage of
* ExecGetReturningSlot() in the DO NOTHING case...
*/
Assert(onconflict == ONCONFLICT_NOTHING);
ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid,
ExecGetReturningSlot(estate, resultRelInfo));
InstrCountTuples2(&mtstate->ps, 1);
return NULL;
}
}
/*
* Before we start insertion proper, acquire our "speculative
* insertion lock". Others can use that to wait for us to decide
* if we're going to go ahead with the insertion, instead of
* waiting for the whole transaction to complete.
*/
specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
/* insert the tuple, with the speculative token */
table_tuple_insert_speculative(resultRelationDesc, slot,
estate->es_output_cid,
0,
NULL,
specToken);
/* insert index entries for tuple */
recheckIndexes = ExecInsertIndexTuples(slot, estate, true,
&specConflict,
arbiterIndexes);
/* adjust the tuple's state accordingly */
table_tuple_complete_speculative(resultRelationDesc, slot,
specToken, !specConflict);
/*
* Wake up anyone waiting for our decision. They will re-check
* the tuple, see that it's no longer speculative, and wait on our
* XID as if this was a regularly inserted tuple all along. Or if
* we killed the tuple, they will see it's dead, and proceed as if
* the tuple never existed.
*/
SpeculativeInsertionLockRelease(GetCurrentTransactionId());
/*
* If there was a conflict, start from the beginning. We'll do
* the pre-check again, which will now find the conflicting tuple
* (unless it aborts before we get there).
*/
if (specConflict)
{
list_free(recheckIndexes);
goto vlock;
}
/* Since there was no insertion conflict, we're done */
}
else
{
/* insert the tuple normally */
table_tuple_insert(resultRelationDesc, slot,
estate->es_output_cid,
0, NULL);
/* insert index entries for tuple */
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL,
NIL);
}
}
if (canSetTag)
{
(estate->es_processed)++;
setLastTid(&slot->tts_tid);
}
/*
* If this insert is the result of a partition key update that moved the
* tuple to a new partition, put this row into the transition NEW TABLE,
* if there is one. We need to do this separately for DELETE and INSERT
* because they happen on different tables.
*/
ar_insert_trig_tcs = mtstate->mt_transition_capture;
if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
&& mtstate->mt_transition_capture->tcs_update_new_table)
{
ExecARUpdateTriggers(estate, resultRelInfo, NULL,
NULL,
slot,
NULL,
mtstate->mt_transition_capture);
/*
* We've already captured the NEW TABLE row, so make sure any AR
* INSERT trigger fired below doesn't capture it again.
*/
ar_insert_trig_tcs = NULL;
}
slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
/* AFTER ROW INSERT Triggers */
/*
* GPDB: Don't fire DELETE triggers on a split UPDATE.
*
* GPDB_12_MERGE_FIXME: PostgreSQL *does* fire INSERT and DELETE
* triggers on an UPDATE that moves tuples from one partition to another.
* Should we follow that example with cross-segment UPDATEs too?
*/
if (!splitUpdate)
ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes,
ar_insert_trig_tcs);
list_free(recheckIndexes);
/*
* Check any WITH CHECK OPTION constraints from parent views. We are
* required to do this after testing all constraints and uniqueness
* violations per the SQL spec, so we do it after actually inserting the
* record into the heap and all indexes.
*
* ExecWithCheckOptions will elog(ERROR) if a violation is found, so the
* tuple will never be seen, if it violates the WITH CHECK OPTION.
*
* ExecWithCheckOptions() will skip any WCOs which are not of the kind we
* are looking for at this point.
*/
if (resultRelInfo->ri_WithCheckOptions != NIL)
ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
result = ExecProcessReturning(resultRelInfo, slot, planSlot);
return result;
}
/* ----------------------------------------------------------------
* ExecDelete
*
* DELETE is like UPDATE, except that we delete the tuple and no
* index modifications are needed.
*
* When deleting from a table, tupleid identifies the tuple to
* delete and oldtuple is NULL. When deleting from a view,
* oldtuple is passed to the INSTEAD OF triggers and identifies
* what to delete, and tupleid is invalid. When deleting from a
* foreign table, tupleid is invalid; the FDW has to figure out
* which row to delete using data from the planSlot. oldtuple is
* passed to foreign table triggers; it is NULL when the foreign
* table has no relevant triggers. We use tupleDeleted to indicate
* whether the tuple is actually deleted, callers can use it to
* decide whether to continue the operation. When this DELETE is a
* part of an UPDATE of partition-key, then the slot returned by
* EvalPlanQual() is passed back using output parameter epqslot.
*
* In GPDB, DELETE can be part of an update operation when
* there is a preceding SplitUpdate node.
*
* Returns RETURNING result if any, otherwise NULL.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecDelete(ModifyTableState *mtstate,
ItemPointer tupleid,
int32 segid,
HeapTuple oldtuple,
TupleTableSlot *planSlot,
EPQState *epqstate,
EState *estate,
bool processReturning,
bool canSetTag,
bool changingPart,
bool splitUpdate,
bool *tupleDeleted,
TupleTableSlot **epqreturnslot)
{
/* PlanGenerator planGen = estate->es_plannedstmt->planGen; */
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
TM_Result result;
TM_FailureData tmfd;
TupleTableSlot *slot = NULL;
TransitionCaptureState *ar_delete_trig_tcs;
if (tupleDeleted)
*tupleDeleted = false;
/*
* Sanity check the distribution of the tuple to prevent
* potential data corruption in case users manipulate data
* incorrectly (e.g. insert data on incorrect segment through
* utility mode) or there is bug in code, etc.
*/
if (segid != GpIdentity.segindex)
elog(ERROR,
"distribution key of the tuple (%u, %u) doesn't belong to "
"current segment (actually from seg%d)",
BlockIdGetBlockNumber(&(tupleid->ip_blkid)),
tupleid->ip_posid,
segid);
/*
* get information on the (current) result relation
*/
// GPDB_12_MERGE_FIXME: How to do this in new partitioning implementation?
// Can we change the way ORCA deletion plans work?
#if 0
if (estate->es_result_partitions && planGen == PLANGEN_OPTIMIZER)
{
Assert(estate->es_result_partitions->part->parrelid);
#ifdef USE_ASSERT_CHECKING
Oid parent = estate->es_result_partitions->part->parrelid;
#endif
/* Obtain part for current tuple. */
resultRelInfo = slot_get_partition(planSlot, estate, true);
estate->es_result_relation_info = resultRelInfo;
#ifdef USE_ASSERT_CHECKING
Oid part = RelationGetRelid(resultRelInfo->ri_RelationDesc);
#endif
Assert(parent != part);
}
else
{
resultRelInfo = estate->es_result_relation_info;
}
#else
resultRelInfo = estate->es_result_relation_info;
#endif
resultRelationDesc = resultRelInfo->ri_RelationDesc;
/* BEFORE ROW DELETE Triggers */
/*
* GPDB_12_MERGE_FIXME: PostgreSQL *does* fire INSERT and DELETE
* triggers on an UPDATE that moves tuples from one partition to another.
* Should we follow that example with cross-segment UPDATEs too?
*/
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_delete_before_row &&
!splitUpdate)
{
bool dodelete;
dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
tupleid, oldtuple, epqreturnslot);
if (!dodelete) /* "do nothing" */
return NULL;
}
/* INSTEAD OF ROW DELETE Triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_delete_instead_row)
{
bool dodelete;
Assert(oldtuple != NULL);
dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, oldtuple);
if (!dodelete) /* "do nothing" */
return NULL;
}
else if (resultRelInfo->ri_FdwRoutine)
{
/*
* delete from foreign table: let the FDW do it
*
* We offer the returning slot as a place to store RETURNING data,
* although the FDW can return some other slot if it wants.
*/
slot = ExecGetReturningSlot(estate, resultRelInfo);
slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate,
resultRelInfo,
slot,
planSlot);
if (slot == NULL) /* "do nothing" */
return NULL;
/*
* RETURNING expressions might reference the tableoid column, so
* (re)initialize tts_tableOid before evaluating them.
*/
if (TTS_EMPTY(slot))
ExecStoreAllNullTuple(slot);
slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
}
else
{
/*
* delete the tuple
*
* Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
* that the row to be deleted is visible to that snapshot, and throw a
* can't-serialize error if not. This is a special-case behavior
* needed for referential integrity updates in transaction-snapshot
* mode transactions.
*/
ldelete:;
result = table_tuple_delete(resultRelationDesc, tupleid,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&tmfd,
changingPart || splitUpdate);
switch (result)
{
case TM_SelfModified:
/*
* The target tuple was already updated or deleted by the
* current command, or by a later command in the current
* transaction. The former case is possible in a join DELETE
* where multiple tuples join to the same target tuple. This
* is somewhat questionable, but Postgres has always allowed
* it: we just ignore additional deletion attempts.
*
* The latter case arises if the tuple is modified by a
* command in a BEFORE trigger, or perhaps by a command in a
* volatile function used in the query. In such situations we
* should not ignore the deletion, but it is equally unsafe to
* proceed. We don't want to discard the original DELETE
* while keeping the triggered actions based on its deletion;
* and it would be no better to allow the original DELETE
* while discarding updates that it triggered. The row update
* carries some information that might be important according
* to business rules; so throwing an error is the only safe
* course.
*
* If a trigger actually intends this type of interaction, it
* can re-execute the DELETE and then return NULL to cancel
* the outer delete.
*/
if (tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
/* Else, already deleted by self; nothing to do */
/*-------
* In an scenario in which R(a,b) and S(a,b) have
* R S
* ________ ________
* (1, 1) (1, 2)
* (1, 7)
*
* An update query such as:
* UPDATE R SET a = S.b FROM S WHERE R.b = S.a;
*
* will have an non-deterministic output. The tuple in R
* can be updated to (2,1) or (7,1).
* Since the introduction of SplitUpdate, these queries will
* send multiple requests to delete the same tuple. One of them
* will pass, but others will not. But there will also be
* multiple requests to insert a new version of the tuple, and
* we cannot cancel out those if the Delete cannot be
* performed. An error is reported in such scenario; otherwise
* you end up with multiple copies of the same row.
*-------
*/
if (splitUpdate)
{
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION ),
errmsg("multiple updates to a row by the same query is not allowed")));
}
return NULL;
case TM_Ok:
break;
case TM_Updated:
{
TupleTableSlot *inputslot;
TupleTableSlot *epqslot;
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
/*
* Already know that we're going to need to do EPQ, so
* fetch tuple directly into the right slot.
*/
EvalPlanQualBegin(epqstate, estate);
inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
result = table_tuple_lock(resultRelationDesc, tupleid,
estate->es_snapshot,
inputslot, estate->es_output_cid,
LockTupleExclusive, LockWaitBlock,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
&tmfd);
switch (result)
{
case TM_Ok:
Assert(tmfd.traversed);
epqslot = EvalPlanQual(estate,
epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
/*
* If requested, skip delete and pass back the
* updated row.
*/
if (epqreturnslot)
{
*epqreturnslot = epqslot;
return NULL;
}
else
goto ldelete;
case TM_SelfModified:
/*
* This can be reached when following an update
* chain from a tuple updated by another session,
* reaching a tuple that was already updated in
* this transaction. If previously updated by this
* command, ignore the delete, otherwise error
* out.
*
* See also TM_SelfModified response to
* table_tuple_delete() above.
*/
if (tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
return NULL;
case TM_Deleted:
/* tuple already deleted; nothing to do */
return NULL;
default:
/*
* TM_Invisible should be impossible because we're
* waiting for updated row versions, and would
* already have errored out if the first version
* is invisible.
*
* TM_Updated should be impossible, because we're
* locking the latest version via
* TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
*/
elog(ERROR, "unexpected table_tuple_lock status: %u",
result);
return NULL;
}
Assert(false);
break;
}
case TM_Deleted:
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent delete")));
/* tuple already deleted; nothing to do */
return NULL;
default:
elog(ERROR, "unrecognized table_tuple_delete status: %u",
result);
return NULL;
}
/*
* Note: Normally one would think that we have to delete index tuples
* associated with the heap tuple now...
*
* ... but in POSTGRES, we have no need to do this because VACUUM will
* take care of it later. We can't delete index tuples immediately
* anyway, since the tuple is still visible to other transactions.
*/
}
if (canSetTag)
(estate->es_processed)++;
/* Tell caller that the delete actually happened. */
if (tupleDeleted)
*tupleDeleted = true;
/*
* If this delete is the result of a partition key update that moved the
* tuple to a new partition, put this row into the transition OLD TABLE,
* if there is one. We need to do this separately for DELETE and INSERT
* because they happen on different tables.
*/
ar_delete_trig_tcs = mtstate->mt_transition_capture;
if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
&& mtstate->mt_transition_capture->tcs_update_old_table)
{
ExecARUpdateTriggers(estate, resultRelInfo,
tupleid,
oldtuple,
NULL,
NULL,
mtstate->mt_transition_capture);
/*
* We've already captured the NEW TABLE row, so make sure any AR
* DELETE trigger fired below doesn't capture it again.
*/
ar_delete_trig_tcs = NULL;
}
/* AFTER ROW DELETE Triggers */
/*
* GPDB_12_MERGE_FIXME: PostgreSQL *does* fire INSERT and DELETE
* triggers on an UPDATE that moves tuples from one partition to another.
* Should we follow that example with cross-segment UPDATEs too?
*/
if (!RelationIsAppendOptimized(resultRelationDesc) && !splitUpdate)
{
ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
ar_delete_trig_tcs);
}
/* Process RETURNING if present and if requested */
/*
* In a split update, the processed rows are returned by the INSERT
* of the new row, not the DELETE of the old one.
*/
if (processReturning && resultRelInfo->ri_projectReturning)
{
/*
* We have to put the target tuple into a slot, which means first we
* gotta fetch it. We can use the trigger tuple slot.
*/
TupleTableSlot *rslot;
if (resultRelInfo->ri_FdwRoutine)
{
/* FDW must have provided a slot containing the deleted row */
Assert(!TupIsNull(slot));
}
else
{
slot = ExecGetReturningSlot(estate, resultRelInfo);
if (oldtuple != NULL)
{
ExecForceStoreHeapTuple(oldtuple, slot, false);
}
else
{
if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
SnapshotAny, slot))
elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
}
}
rslot = ExecProcessReturning(resultRelInfo, slot, planSlot);
/*
* Before releasing the target tuple again, make sure rslot has a
* local copy of any pass-by-reference values.
*/
ExecMaterializeSlot(rslot);
ExecClearTuple(slot);
return rslot;
}
return NULL;
}
/* ----------------------------------------------------------------
* ExecUpdate
*
* note: we can't run UPDATE queries with transactions
* off because UPDATEs are actually INSERTs and our
* scan will mistakenly loop forever, updating the tuple
* it just inserted.. This should be fixed but until it
* is, we don't want to get stuck in an infinite loop
* which corrupts your database..
*
* When updating a table, tupleid identifies the tuple to
* update and oldtuple is NULL. When updating a view, oldtuple
* is passed to the INSTEAD OF triggers and identifies what to
* update, and tupleid is invalid. When updating a foreign table,
* tupleid is invalid; the FDW has to figure out which row to
* update using data from the planSlot. oldtuple is passed to
* foreign table triggers; it is NULL when the foreign table has
* no relevant triggers.
*
* Returns RETURNING result if any, otherwise NULL.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecUpdate(ModifyTableState *mtstate,
ItemPointer tupleid,
HeapTuple oldtuple,
TupleTableSlot *slot,
TupleTableSlot *planSlot,
int32 segid, /* gpdb specific parameter, check if tuple to update is from local */
EPQState *epqstate,
EState *estate,
bool canSetTag)
{
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
TM_Result result;
TM_FailureData tmfd;
List *recheckIndexes = NIL;
TupleConversionMap *saved_tcs_map = NULL;
/*
* abort the operation if not running transactions
*/
if (IsBootstrapProcessingMode())
elog(ERROR, "cannot UPDATE during bootstrap");
/*
* Sanity check the distribution of the tuple to prevent
* potential data corruption in case users manipulate data
* incorrectly (e.g. insert data on incorrect segment through
* utility mode) or there is bug in code, etc.
*/
if (segid != GpIdentity.segindex)
elog(ERROR,
"distribution key of the tuple (%u, %u) doesn't belong to "
"current segment (actually from seg%d)",
BlockIdGetBlockNumber(&(tupleid->ip_blkid)),
tupleid->ip_posid,
segid);
ExecMaterializeSlot(slot);
/*
* get information on the (current) result relation
*/
resultRelInfo = estate->es_result_relation_info;
resultRelationDesc = resultRelInfo->ri_RelationDesc;
/* BEFORE ROW UPDATE Triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_update_before_row)
{
if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
tupleid, oldtuple, slot))
return NULL; /* "do nothing" */
}
/* INSTEAD OF ROW UPDATE Triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_update_instead_row)
{
if (!ExecIRUpdateTriggers(estate, resultRelInfo,
oldtuple, slot))
return NULL; /* "do nothing" */
}
else if (resultRelInfo->ri_FdwRoutine)
{
/*
* Compute stored generated columns
*/
if (resultRelationDesc->rd_att->constr &&
resultRelationDesc->rd_att->constr->has_generated_stored)
ExecComputeStoredGenerated(estate, slot);
/*
* update in foreign table: let the FDW do it
*/
slot = resultRelInfo->ri_FdwRoutine->ExecForeignUpdate(estate,
resultRelInfo,
slot,
planSlot);
if (slot == NULL) /* "do nothing" */
return NULL;
/*
* AFTER ROW Triggers or RETURNING expressions might reference the
* tableoid column, so (re-)initialize tts_tableOid before evaluating
* them.
*/
slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
}
else
{
LockTupleMode lockmode;
bool partition_constraint_failed;
bool update_indexes;
/*
* Constraints might reference the tableoid column, so (re-)initialize
* tts_tableOid before evaluating them.
*/
slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
/*
* Compute stored generated columns
*/
if (resultRelationDesc->rd_att->constr &&
resultRelationDesc->rd_att->constr->has_generated_stored)
ExecComputeStoredGenerated(estate, slot);
/*
* Check any RLS UPDATE WITH CHECK policies
*
* If we generate a new candidate tuple after EvalPlanQual testing, we
* must loop back here and recheck any RLS policies and constraints.
* (We don't need to redo triggers, however. If there are any BEFORE
* triggers then trigger.c will have done table_tuple_lock to lock the
* correct tuple, so there's no need to do them again.)
*/
lreplace:;
/* ensure slot is independent, consider e.g. EPQ */
ExecMaterializeSlot(slot);
/*
* If partition constraint fails, this row might get moved to another
* partition, in which case we should check the RLS CHECK policy just
* before inserting into the new partition, rather than doing it here.
* This is because a trigger on that partition might again change the
* row. So skip the WCO checks if the partition constraint fails.
*/
partition_constraint_failed =
resultRelInfo->ri_PartitionCheck &&
!ExecPartitionCheck(resultRelInfo, slot, estate, false);
if (!partition_constraint_failed &&
resultRelInfo->ri_WithCheckOptions != NIL)
{
/*
* ExecWithCheckOptions() will skip any WCOs which are not of the
* kind we are looking for at this point.
*/
ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK,
resultRelInfo, slot, estate);
}
/*
* If a partition check failed, try to move the row into the right
* partition.
*/
if (partition_constraint_failed)
{
bool tuple_deleted;
TupleTableSlot *ret_slot;
TupleTableSlot *epqslot = NULL;
PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
int map_index;
TupleConversionMap *tupconv_map;
/*
* Disallow an INSERT ON CONFLICT DO UPDATE that causes the
* original row to migrate to a different partition. Maybe this
* can be implemented some day, but it seems a fringe feature with
* little redeeming value.
*/
if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("invalid ON UPDATE specification"),
errdetail("The result tuple would appear in a different partition than the original tuple.")));
/*
* When an UPDATE is run on a leaf partition, we will not have
* partition tuple routing set up. In that case, fail with
* partition constraint violation error.
*/
if (proute == NULL)
ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
/*
* Row movement, part 1. Delete the tuple, but skip RETURNING
* processing. We want to return rows from INSERT.
*/
ExecDelete(mtstate, tupleid, segid, oldtuple, planSlot, epqstate,
estate, false, false /* canSetTag */ ,
true /* changingPart */ ,
false /* splitUpdate */ ,
&tuple_deleted, &epqslot);
/*
* For some reason if DELETE didn't happen (e.g. trigger prevented
* it, or it was already deleted by self, or it was concurrently
* deleted by another transaction), then we should skip the insert
* as well; otherwise, an UPDATE could cause an increase in the
* total number of rows across all partitions, which is clearly
* wrong.
*
* For a normal UPDATE, the case where the tuple has been the
* subject of a concurrent UPDATE or DELETE would be handled by
* the EvalPlanQual machinery, but for an UPDATE that we've
* translated into a DELETE from this partition and an INSERT into
* some other partition, that's not available, because CTID chains
* can't span relation boundaries. We mimic the semantics to a
* limited extent by skipping the INSERT if the DELETE fails to
* find a tuple. This ensures that two concurrent attempts to
* UPDATE the same tuple at the same time can't turn one tuple
* into two, and that an UPDATE of a just-deleted tuple can't
* resurrect it.
*/
if (!tuple_deleted)
{
/*
* epqslot will be typically NULL. But when ExecDelete()
* finds that another transaction has concurrently updated the
* same row, it re-fetches the row, skips the delete, and
* epqslot is set to the re-fetched tuple slot. In that case,
* we need to do all the checks again.
*/
if (TupIsNull(epqslot))
return NULL;
else
{
slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
goto lreplace;
}
}
/*
* Updates set the transition capture map only when a new subplan
* is chosen. But for inserts, it is set for each row. So after
* INSERT, we need to revert back to the map created for UPDATE;
* otherwise the next UPDATE will incorrectly use the one created
* for INSERT. So first save the one created for UPDATE.
*/
if (mtstate->mt_transition_capture)
saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
/*
* resultRelInfo is one of the per-subplan resultRelInfos. So we
* should convert the tuple into root's tuple descriptor, since
* ExecInsert() starts the search from root. The tuple conversion
* map list is in the order of mtstate->resultRelInfo[], so to
* retrieve the one for this resultRel, we need to know the
* position of the resultRel in mtstate->resultRelInfo[].
*/
map_index = resultRelInfo - mtstate->resultRelInfo;
Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
if (tupconv_map != NULL)
slot = execute_attr_map_slot(tupconv_map->attrMap,
slot,
mtstate->mt_root_tuple_slot);
/*
* Prepare for tuple routing, making it look like we're inserting
* into the root.
*/
Assert(mtstate->rootResultRelInfo != NULL);
slot = ExecPrepareTupleRouting(mtstate, estate, proute,
mtstate->rootResultRelInfo, slot);
ret_slot = ExecInsert(mtstate, slot, planSlot,
estate, canSetTag, false /* splitUpdate */);
/* Revert ExecPrepareTupleRouting's node change. */
estate->es_result_relation_info = resultRelInfo;
if (mtstate->mt_transition_capture)
{
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
}
return ret_slot;
}
/*
* Check the constraints of the tuple. We've already checked the
* partition constraint above; however, we must still ensure the tuple
* passes all other constraints, so we will call ExecConstraints() and
* have it validate all remaining checks.
*/
if (resultRelationDesc->rd_att->constr)
ExecConstraints(resultRelInfo, slot, estate);
/*
* replace the heap tuple
*
* Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
* that the row to be updated is visible to that snapshot, and throw a
* can't-serialize error if not. This is a special-case behavior
* needed for referential integrity updates in transaction-snapshot
* mode transactions.
*/
result = table_tuple_update(resultRelationDesc, tupleid, slot,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
&tmfd, &lockmode, &update_indexes);
switch (result)
{
case TM_SelfModified:
/*
* The target tuple was already updated or deleted by the
* current command, or by a later command in the current
* transaction. The former case is possible in a join UPDATE
* where multiple tuples join to the same target tuple. This
* is pretty questionable, but Postgres has always allowed it:
* we just execute the first update action and ignore
* additional update attempts.
*
* The latter case arises if the tuple is modified by a
* command in a BEFORE trigger, or perhaps by a command in a
* volatile function used in the query. In such situations we
* should not ignore the update, but it is equally unsafe to
* proceed. We don't want to discard the original UPDATE
* while keeping the triggered actions based on it; and we
* have no principled way to merge this update with the
* previous ones. So throwing an error is the only safe
* course.
*
* If a trigger actually intends this type of interaction, it
* can re-execute the UPDATE (assuming it can figure out how)
* and then return NULL to cancel the outer update.
*
* In GPDB, for AO tables TM_SelfModified is returned only
* in case of same command tuple update based on visimap dirty
* list checking. Also, tmfd is not initialized and can't be for
* AO case, as visimap update within same command happens at end
* of command.
*/
if (!RelationIsAppendOptimized(resultRelationDesc) &&
tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
/* Else, already updated by self; nothing to do */
return NULL;
case TM_Ok:
break;
case TM_Updated:
{
TupleTableSlot *inputslot;
TupleTableSlot *epqslot;
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
/*
* Already know that we're going to need to do EPQ, so
* fetch tuple directly into the right slot.
*/
EvalPlanQualBegin(epqstate, estate);
inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
result = table_tuple_lock(resultRelationDesc, tupleid,
estate->es_snapshot,
inputslot, estate->es_output_cid,
lockmode, LockWaitBlock,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
&tmfd);
switch (result)
{
case TM_Ok:
Assert(tmfd.traversed);
epqslot = EvalPlanQual(estate,
epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
goto lreplace;
case TM_Deleted:
/* tuple already deleted; nothing to do */
return NULL;
case TM_SelfModified:
/*
* This can be reached when following an update
* chain from a tuple updated by another session,
* reaching a tuple that was already updated in
* this transaction. If previously modified by
* this command, ignore the redundant update,
* otherwise error out.
*
* See also TM_SelfModified response to
* table_tuple_update() above.
*/
if (tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
return NULL;
default:
/* see table_tuple_lock call in ExecDelete() */
elog(ERROR, "unexpected table_tuple_lock status: %u",
result);
return NULL;
}
}
break;
case TM_Deleted:
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent delete")));
/* tuple already deleted; nothing to do */
return NULL;
default:
elog(ERROR, "unrecognized table_tuple_update status: %u",
result);
return NULL;
}
/* insert index entries for tuple if necessary */
if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL, NIL);
}
if (canSetTag)
(estate->es_processed)++;
/* AFTER ROW UPDATE Triggers */
/* GPDB: AO and AOCO tables don't support triggers */
if (!RelationIsAppendOptimized(resultRelationDesc))
ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot,
recheckIndexes,
mtstate->operation == CMD_INSERT ?
mtstate->mt_oc_transition_capture :
mtstate->mt_transition_capture);
list_free(recheckIndexes);
/*
* Check any WITH CHECK OPTION constraints from parent views. We are
* required to do this after testing all constraints and uniqueness
* violations per the SQL spec, so we do it after actually updating the
* record in the heap and all indexes.
*
* ExecWithCheckOptions() will skip any WCOs which are not of the kind we
* are looking for at this point.
*/
if (resultRelInfo->ri_WithCheckOptions != NIL)
ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
return ExecProcessReturning(resultRelInfo, slot, planSlot);
return NULL;
}
/*
* Insert the new tuple version of a Split Update
*
* We have to check if this UPDATE also moves the row to
* a different partition, much like ExecUpdate() does.
*/
static TupleTableSlot *
ExecSplitUpdate_Insert(ModifyTableState *mtstate,
TupleTableSlot *slot,
TupleTableSlot *planSlot,
EState *estate,
bool canSetTag)
{
ResultRelInfo *resultRelInfo;
Relation resultRelationDesc;
bool partition_constraint_failed;
TupleConversionMap *saved_tcs_map = NULL;
PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
int map_index;
TupleConversionMap *tupconv_map;
/*
* get information on the (current) result relation
*/
resultRelInfo = estate->es_result_relation_info;
resultRelationDesc = resultRelInfo->ri_RelationDesc;
/* ensure slot is independent, consider e.g. EPQ */
ExecMaterializeSlot(slot);
/*
* If partition constraint fails, this row might get moved to another
* partition, in which case we should check the RLS CHECK policy just
* before inserting into the new partition, rather than doing it here.
* This is because a trigger on that partition might again change the
* row. So skip the WCO checks if the partition constraint fails.
*/
partition_constraint_failed =
resultRelInfo->ri_PartitionCheck &&
!ExecPartitionCheck(resultRelInfo, slot, estate, false);
if (!partition_constraint_failed &&
resultRelInfo->ri_WithCheckOptions != NIL)
{
/*
* ExecWithCheckOptions() will skip any WCOs which are not of the
* kind we are looking for at this point.
*/
ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK,
resultRelInfo, slot, estate);
}
/*
* Updates set the transition capture map only when a new subplan
* is chosen. But for inserts, it is set for each row. So after
* INSERT, we need to revert back to the map created for UPDATE;
* otherwise the next UPDATE will incorrectly use the one created
* for INSERT. So first save the one created for UPDATE.
*/
if (mtstate->mt_transition_capture)
saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
if (partition_constraint_failed)
{
/*
* When an UPDATE is run on a leaf partition, we will not have
* partition tuple routing set up. In that case, fail with
* partition constraint violation error.
*/
if (proute == NULL)
ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
/*
* resultRelInfo is one of the per-subplan resultRelInfos. So we
* should convert the tuple into root's tuple descriptor, since
* ExecInsert() starts the search from root. The tuple conversion
* map list is in the order of mtstate->resultRelInfo[], so to
* retrieve the one for this resultRel, we need to know the
* position of the resultRel in mtstate->resultRelInfo[].
*/
map_index = resultRelInfo - mtstate->resultRelInfo;
Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
if (tupconv_map != NULL)
slot = execute_attr_map_slot(tupconv_map->attrMap,
slot,
mtstate->mt_root_tuple_slot);
/*
* Prepare for tuple routing, making it look like we're inserting
* into the root.
*/
Assert(mtstate->rootResultRelInfo != NULL);
slot = ExecPrepareTupleRouting(mtstate, estate, proute,
mtstate->rootResultRelInfo, slot);
slot = ExecInsert(mtstate, slot, planSlot,
estate, mtstate->canSetTag,
true /* splitUpdate */);
/* Revert ExecPrepareTupleRouting's node change. */
estate->es_result_relation_info = resultRelInfo;
if (mtstate->mt_transition_capture)
{
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
}
}
else
{
slot = ExecInsert(mtstate, slot, planSlot,
estate, mtstate->canSetTag,
true /* splitUpdate */);
}
return slot;
}
/*
* ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
*
* Try to lock tuple for update as part of speculative insertion. If
* a qual originating from ON CONFLICT DO UPDATE is satisfied, update
* (but still lock row, even though it may not satisfy estate's
* snapshot).
*
* Returns true if we're done (with or without an update), or false if
* the caller must retry the INSERT from scratch.
*/
static bool
ExecOnConflictUpdate(ModifyTableState *mtstate,
ResultRelInfo *resultRelInfo,
ItemPointer conflictTid,
TupleTableSlot *planSlot,
TupleTableSlot *excludedSlot,
EState *estate,
bool canSetTag,
TupleTableSlot **returning)
{
ExprContext *econtext = mtstate->ps.ps_ExprContext;
Relation relation = resultRelInfo->ri_RelationDesc;
ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
TM_FailureData tmfd;
LockTupleMode lockmode;
TM_Result test;
Datum xminDatum;
TransactionId xmin;
bool isnull;
/* Determine lock mode to use */
lockmode = ExecUpdateLockMode(estate, resultRelInfo);
/*
* Lock tuple for update. Don't follow updates when tuple cannot be
* locked without doing so. A row locking conflict here means our
* previous conclusion that the tuple is conclusively committed is not
* true anymore.
*/
test = table_tuple_lock(relation, conflictTid,
estate->es_snapshot,
existing, estate->es_output_cid,
lockmode, LockWaitBlock, 0,
&tmfd);
switch (test)
{
case TM_Ok:
/* success! */
break;
case TM_Invisible:
/*
* This can occur when a just inserted tuple is updated again in
* the same command. E.g. because multiple rows with the same
* conflicting key values are inserted.
*
* This is somewhat similar to the ExecUpdate() TM_SelfModified
* case. We do not want to proceed because it would lead to the
* same row being updated a second time in some unspecified order,
* and in contrast to plain UPDATEs there's no historical behavior
* to break.
*
* It is the user's responsibility to prevent this situation from
* occurring. These problems are why SQL-2003 similarly specifies
* that for SQL MERGE, an exception must be raised in the event of
* an attempt to update the same row twice.
*/
xminDatum = slot_getsysattr(existing,
MinTransactionIdAttributeNumber,
&isnull);
Assert(!isnull);
xmin = DatumGetTransactionId(xminDatum);
if (TransactionIdIsCurrentTransactionId(xmin))
ereport(ERROR,
(errcode(ERRCODE_CARDINALITY_VIOLATION),
errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
/* This shouldn't happen */
elog(ERROR, "attempted to lock invisible tuple");
break;
case TM_SelfModified:
/*
* This state should never be reached. As a dirty snapshot is used
* to find conflicting tuples, speculative insertion wouldn't have
* seen this row to conflict with.
*/
elog(ERROR, "unexpected self-updated tuple");
break;
case TM_Updated:
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
/*
* As long as we don't support an UPDATE of INSERT ON CONFLICT for
* a partitioned table we shouldn't reach to a case where tuple to
* be lock is moved to another partition due to concurrent update
* of the partition key.
*/
Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
/*
* Tell caller to try again from the very start.
*
* It does not make sense to use the usual EvalPlanQual() style
* loop here, as the new version of the row might not conflict
* anymore, or the conflicting tuple has actually been deleted.
*/
ExecClearTuple(existing);
return false;
case TM_Deleted:
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent delete")));
/* see TM_Updated case */
Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
ExecClearTuple(existing);
return false;
default:
elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
}
/* Success, the tuple is locked. */
/*
* Verify that the tuple is visible to our MVCC snapshot if the current
* isolation level mandates that.
*
* It's not sufficient to rely on the check within ExecUpdate() as e.g.
* CONFLICT ... WHERE clause may prevent us from reaching that.
*
* This means we only ever continue when a new command in the current
* transaction could see the row, even though in READ COMMITTED mode the
* tuple will not be visible according to the current statement's
* snapshot. This is in line with the way UPDATE deals with newer tuple
* versions.
*/
ExecCheckTupleVisible(estate, relation, existing);
/*
* Make tuple and any needed join variables available to ExecQual and
* ExecProject. The EXCLUDED tuple is installed in ecxt_innertuple, while
* the target's existing tuple is installed in the scantuple. EXCLUDED
* has been made to reference INNER_VAR in setrefs.c, but there is no
* other redirection.
*/
econtext->ecxt_scantuple = existing;
econtext->ecxt_innertuple = excludedSlot;
econtext->ecxt_outertuple = NULL;
if (!ExecQual(onConflictSetWhere, econtext))
{
ExecClearTuple(existing); /* see return below */
InstrCountFiltered1(&mtstate->ps, 1);
return true; /* done with the tuple */
}
if (resultRelInfo->ri_WithCheckOptions != NIL)
{
/*
* Check target's existing tuple against UPDATE-applicable USING
* security barrier quals (if any), enforced here as RLS checks/WCOs.
*
* The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
* quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
* but that's almost the extent of its special handling for ON
* CONFLICT DO UPDATE.
*
* The rewriter will also have associated UPDATE applicable straight
* RLS checks/WCOs for the benefit of the ExecUpdate() call that
* follows. INSERTs and UPDATEs naturally have mutually exclusive WCO
* kinds, so there is no danger of spurious over-enforcement in the
* INSERT or UPDATE path.
*/
ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
existing,
mtstate->ps.state);
}
/* Project the new tuple version */
ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
/*
* Note that it is possible that the target tuple has been modified in
* this session, after the above table_tuple_lock. We choose to not error
* out in that case, in line with ExecUpdate's treatment of similar cases.
* This can happen if an UPDATE is triggered from within ExecQual(),
* ExecWithCheckOptions() or ExecProject() above, e.g. by selecting from a
* wCTE in the ON CONFLICT's SET.
*/
/* Execute UPDATE with projection */
*returning = ExecUpdate(mtstate, conflictTid, NULL,
resultRelInfo->ri_onConflict->oc_ProjSlot,
planSlot,
GpIdentity.segindex,
&mtstate->mt_epqstate, mtstate->ps.state,
canSetTag);
/*
* Clear out existing tuple, as there might not be another conflict among
* the next input rows. Don't want to hold resources till the end of the
* query.
*/
ExecClearTuple(existing);
return true;
}
/*
* Process BEFORE EACH STATEMENT triggers
*/
static void
fireBSTriggers(ModifyTableState *node)
{
ModifyTable *plan = (ModifyTable *) node->ps.plan;
ResultRelInfo *resultRelInfo = node->resultRelInfo;
/*
* If the node modifies a partitioned table, we must fire its triggers.
* Note that in that case, node->resultRelInfo points to the first leaf
* partition, not the root table.
*/
if (node->rootResultRelInfo != NULL)
resultRelInfo = node->rootResultRelInfo;
switch (node->operation)
{
case CMD_INSERT:
ExecBSInsertTriggers(node->ps.state, resultRelInfo);
if (plan->onConflictAction == ONCONFLICT_UPDATE)
ExecBSUpdateTriggers(node->ps.state,
resultRelInfo);
break;
case CMD_UPDATE:
ExecBSUpdateTriggers(node->ps.state, resultRelInfo);
break;
case CMD_DELETE:
ExecBSDeleteTriggers(node->ps.state, resultRelInfo);
break;
default:
elog(ERROR, "unknown operation");
break;
}
}
/*
* Return the target rel ResultRelInfo.
*
* This relation is the same as :
* - the relation for which we will fire AFTER STATEMENT triggers.
* - the relation into whose tuple format all captured transition tuples must
* be converted.
* - the root partitioned table.
*/
static ResultRelInfo *
getTargetResultRelInfo(ModifyTableState *node)
{
/*
* Note that if the node modifies a partitioned table, node->resultRelInfo
* points to the first leaf partition, not the root table.
*/
if (node->rootResultRelInfo != NULL)
return node->rootResultRelInfo;
else
return node->resultRelInfo;
}
/*
* Process AFTER EACH STATEMENT triggers
*/
static void
fireASTriggers(ModifyTableState *node)
{
ModifyTable *plan = (ModifyTable *) node->ps.plan;
ResultRelInfo *resultRelInfo = getTargetResultRelInfo(node);
switch (node->operation)
{
case CMD_INSERT:
if (plan->onConflictAction == ONCONFLICT_UPDATE)
ExecASUpdateTriggers(node->ps.state,
resultRelInfo,
node->mt_oc_transition_capture);
ExecASInsertTriggers(node->ps.state, resultRelInfo,
node->mt_transition_capture);
break;
case CMD_UPDATE:
ExecASUpdateTriggers(node->ps.state, resultRelInfo,
node->mt_transition_capture);
break;
case CMD_DELETE:
ExecASDeleteTriggers(node->ps.state, resultRelInfo,
node->mt_transition_capture);
break;
default:
elog(ERROR, "unknown operation");
break;
}
}
/*
* Set up the state needed for collecting transition tuples for AFTER
* triggers.
*/
static void
ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
{
ModifyTable *plan = (ModifyTable *) mtstate->ps.plan;
ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
/* Check for transition tables on the directly targeted relation. */
mtstate->mt_transition_capture =
MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
RelationGetRelid(targetRelInfo->ri_RelationDesc),
mtstate->operation);
if (plan->operation == CMD_INSERT &&
plan->onConflictAction == ONCONFLICT_UPDATE)
mtstate->mt_oc_transition_capture =
MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
RelationGetRelid(targetRelInfo->ri_RelationDesc),
CMD_UPDATE);
/*
* If we found that we need to collect transition tuples then we may also
* need tuple conversion maps for any children that have TupleDescs that
* aren't compatible with the tuplestores. (We can share these maps
* between the regular and ON CONFLICT cases.)
*/
if (mtstate->mt_transition_capture != NULL ||
mtstate->mt_oc_transition_capture != NULL)
{
ExecSetupChildParentMapForSubplan(mtstate);
/*
* Install the conversion map for the first plan for UPDATE and DELETE
* operations. It will be advanced each time we switch to the next
* plan. (INSERT operations set it every time, so we need not update
* mtstate->mt_oc_transition_capture here.)
*/
if (mtstate->mt_transition_capture && mtstate->operation != CMD_INSERT)
mtstate->mt_transition_capture->tcs_map =
tupconv_map_for_subplan(mtstate, 0);
}
}
/*
* ExecPrepareTupleRouting --- prepare for routing one tuple
*
* Determine the partition in which the tuple in slot is to be inserted,
* and modify mtstate and estate to prepare for it.
*
* Caller must revert the estate changes after executing the insertion!
* In mtstate, transition capture changes may also need to be reverted.
*
* Returns a slot holding the tuple of the partition rowtype.
*/
static TupleTableSlot *
ExecPrepareTupleRouting(ModifyTableState *mtstate,
EState *estate,
PartitionTupleRouting *proute,
ResultRelInfo *targetRelInfo,
TupleTableSlot *slot)
{
ResultRelInfo *partrel;
PartitionRoutingInfo *partrouteinfo;
TupleConversionMap *map;
/*
* Lookup the target partition's ResultRelInfo. If ExecFindPartition does
* not find a valid partition for the tuple in 'slot' then an error is
* raised. An error may also be raised if the found partition is not a
* valid target for INSERTs. This is required since a partitioned table
* UPDATE to another partition becomes a DELETE+INSERT.
*/
partrel = ExecFindPartition(mtstate, targetRelInfo, proute, slot, estate);
partrouteinfo = partrel->ri_PartitionInfo;
Assert(partrouteinfo != NULL);
/*
* Make it look like we are inserting into the partition.
*/
estate->es_result_relation_info = partrel;
/*
* If we're capturing transition tuples, we might need to convert from the
* partition rowtype to root partitioned table's rowtype.
*/
if (mtstate->mt_transition_capture != NULL)
{
if (partrel->ri_TrigDesc &&
partrel->ri_TrigDesc->trig_insert_before_row)
{
/*
* If there are any BEFORE triggers on the partition, we'll have
* to be ready to convert their result back to tuplestore format.
*/
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
mtstate->mt_transition_capture->tcs_map =
partrouteinfo->pi_PartitionToRootMap;
}
else
{
/*
* Otherwise, just remember the original unconverted tuple, to
* avoid a needless round trip conversion.
*/
mtstate->mt_transition_capture->tcs_original_insert_tuple = slot;
mtstate->mt_transition_capture->tcs_map = NULL;
}
}
if (mtstate->mt_oc_transition_capture != NULL)
{
mtstate->mt_oc_transition_capture->tcs_map =
partrouteinfo->pi_PartitionToRootMap;
}
/*
* Convert the tuple, if necessary.
*/
map = partrouteinfo->pi_RootToPartitionMap;
if (map != NULL)
{
TupleTableSlot *new_slot = partrouteinfo->pi_PartitionTupleSlot;
slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
}
return slot;
}
/*
* Initialize the child-to-root tuple conversion map array for UPDATE subplans.
*
* This map array is required to convert the tuple from the subplan result rel
* to the target table descriptor. This requirement arises for two independent
* scenarios:
* 1. For update-tuple-routing.
* 2. For capturing tuples in transition tables.
*/
static void
ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
{
ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
ResultRelInfo *resultRelInfos = mtstate->resultRelInfo;
TupleDesc outdesc;
int numResultRelInfos = mtstate->mt_nplans;
int i;
/*
* Build array of conversion maps from each child's TupleDesc to the one
* used in the target relation. The map pointers may be NULL when no
* conversion is necessary, which is hopefully a common case.
*/
/* Get tuple descriptor of the target rel. */
outdesc = RelationGetDescr(targetRelInfo->ri_RelationDesc);
mtstate->mt_per_subplan_tupconv_maps = (TupleConversionMap **)
palloc(sizeof(TupleConversionMap *) * numResultRelInfos);
for (i = 0; i < numResultRelInfos; ++i)
{
mtstate->mt_per_subplan_tupconv_maps[i] =
convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc),
outdesc,
gettext_noop("could not convert row type"));
}
}
/*
* For a given subplan index, get the tuple conversion map.
*/
static TupleConversionMap *
tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan)
{
/* If nobody else set the per-subplan array of maps, do so ourselves. */
if (mtstate->mt_per_subplan_tupconv_maps == NULL)
ExecSetupChildParentMapForSubplan(mtstate);
Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
return mtstate->mt_per_subplan_tupconv_maps[whichplan];
}
/* ----------------------------------------------------------------
* ExecModifyTable
*
* Perform table modifications as required, and return RETURNING results
* if needed.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
ExecModifyTable(PlanState *pstate)
{
ModifyTableState *node = castNode(ModifyTableState, pstate);
PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
EState *estate = node->ps.state;
CmdType operation = node->operation;
ResultRelInfo *saved_resultRelInfo;
ResultRelInfo *resultRelInfo;
PlanState *subplanstate;
JunkFilter *junkfilter;
AttrNumber action_attno;
AttrNumber segid_attno;
TupleTableSlot *slot;
TupleTableSlot *planSlot;
ItemPointer tupleid;
ItemPointerData tuple_ctid;
HeapTupleData oldtupdata;
HeapTuple oldtuple;
CHECK_FOR_INTERRUPTS();
/*
* This should NOT get called during EvalPlanQual; we should have passed a
* subplan tree to EvalPlanQual, instead. Use a runtime test not just
* Assert because this condition is easy to miss in testing. (Note:
* although ModifyTable should not get executed within an EvalPlanQual
* operation, we do have to allow it to be initialized and shut down in
* case it is within a CTE subplan. Hence this test must be here, not in
* ExecInitModifyTable.)
*/
if (estate->es_epqTupleSlot != NULL)
elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
/*
* If we've already completed processing, don't try to do more. We need
* this test because ExecPostprocessPlan might call us an extra time, and
* our subplan's nodes aren't necessarily robust against being called
* extra times.
*/
if (node->mt_done)
return NULL;
if (Gp_role == GP_ROLE_EXECUTE && !Gp_is_writer)
{
/*
* Current Greenplum MPP architecture only support one writer gang, and
* only writer gang can execute DML nodes. There is no code path to reach
* here. For writable CTE case as below:
*
* create table t(a int);
* with wcte as (delete from t returning a)
* insert into t select * from wcte;
*
* The above query will error out during parse-analyze so not reach here.
* If reaching here, some bugs must happen.
*/
elog(ERROR, "Reader Gang execute ModifyTable node, some bugs must happen");
}
/*
* On first call, fire BEFORE STATEMENT triggers before proceeding.
*/
if (node->fireBSTriggers)
{
fireBSTriggers(node);
node->fireBSTriggers = false;
}
/* Preload local variables */
resultRelInfo = node->resultRelInfo + node->mt_whichplan;
subplanstate = node->mt_plans[node->mt_whichplan];
junkfilter = resultRelInfo->ri_junkFilter;
action_attno = resultRelInfo->ri_action_attno;
segid_attno = resultRelInfo->ri_segid_attno;
/*
* es_result_relation_info must point to the currently active result
* relation while we are within this ModifyTable node. Even though
* ModifyTable nodes can't be nested statically, they can be nested
* dynamically (since our subplan could include a reference to a modifying
* CTE). So we have to save and restore the caller's value.
*/
saved_resultRelInfo = estate->es_result_relation_info;
estate->es_result_relation_info = resultRelInfo;
/*
* Fetch rows from subplan(s), and execute the required table modification
* for each row.
*/
for (;;)
{
/*
* Reset the per-output-tuple exprcontext. This is needed because
* triggers expect to use that context as workspace. It's a bit ugly
* to do this below the top level of the plan, however. We might need
* to rethink this later.
*/
ResetPerTupleExprContext(estate);
/*
* Reset per-tuple memory context used for processing on conflict and
* returning clauses, to free any expression evaluation storage
* allocated in the previous cycle.
*/
if (pstate->ps_ExprContext)
ResetExprContext(pstate->ps_ExprContext);
planSlot = ExecProcNode(subplanstate);
if (TupIsNull(planSlot))
{
/* advance to next subplan if any */
node->mt_whichplan++;
if (node->mt_whichplan < node->mt_nplans)
{
estate->es_result_relation_info = estate->es_result_relations + node->mt_whichplan;
resultRelInfo = estate->es_result_relation_info;
subplanstate = node->mt_plans[node->mt_whichplan];
junkfilter = estate->es_result_relation_info->ri_junkFilter;
action_attno = estate->es_result_relation_info->ri_action_attno;
segid_attno = estate->es_result_relation_info->ri_segid_attno;
EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan,
node->mt_arowmarks[node->mt_whichplan]);
/* Prepare to convert transition tuples from this child. */
if (node->mt_transition_capture != NULL)
{
node->mt_transition_capture->tcs_map =
tupconv_map_for_subplan(node, node->mt_whichplan);
}
if (node->mt_oc_transition_capture != NULL)
{
node->mt_oc_transition_capture->tcs_map =
tupconv_map_for_subplan(node, node->mt_whichplan);
}
continue;
}
else
break;
}
/*
* Ensure input tuple is the right format for the target relation.
*/
if (node->mt_scans[node->mt_whichplan]->tts_ops != planSlot->tts_ops)
{
ExecCopySlot(node->mt_scans[node->mt_whichplan], planSlot);
planSlot = node->mt_scans[node->mt_whichplan];
}
/*
* If resultRelInfo->ri_usesFdwDirectModify is true, all we need to do
* here is compute the RETURNING expressions.
*/
if (resultRelInfo->ri_usesFdwDirectModify)
{
Assert(resultRelInfo->ri_projectReturning);
/*
* A scan slot containing the data that was actually inserted,
* updated or deleted has already been made available to
* ExecProcessReturning by IterateDirectModify, so no need to
* provide it here.
*/
slot = ExecProcessReturning(resultRelInfo, NULL, planSlot);
estate->es_result_relation_info = saved_resultRelInfo;
return slot;
}
EvalPlanQualSetSlot(&node->mt_epqstate, planSlot);
slot = planSlot;
int32 segid = GpIdentity.segindex;
int action = -1;
tupleid = NULL;
oldtuple = NULL;
if (junkfilter != NULL)
{
/*
* extract the 'ctid' or 'wholerow' junk attribute.
*/
if (operation == CMD_UPDATE || operation == CMD_DELETE)
{
char relkind;
Datum datum;
bool isNull;
relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
if (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW)
{
datum = ExecGetJunkAttribute(slot,
junkfilter->jf_junkAttNo,
&isNull);
/* shouldn't ever get a null result... */
if (isNull)
elog(ERROR, "ctid is NULL");
tupleid = (ItemPointer) DatumGetPointer(datum);
tuple_ctid = *tupleid; /* be sure we don't free ctid!! */
tupleid = &tuple_ctid;
}
/*
* Use the wholerow attribute, when available, to reconstruct
* the old relation tuple.
*
* Foreign table updates have a wholerow attribute when the
* relation has a row-level trigger. Note that the wholerow
* attribute does not carry system columns. Foreign table
* triggers miss seeing those, except that we know enough here
* to set t_tableOid. Quite separately from this, the FDW may
* fetch its own junk attrs to identify the row.
*
* Other relevant relkinds, currently limited to views, always
* have a wholerow attribute.
*/
else if (AttributeNumberIsValid(junkfilter->jf_junkAttNo))
{
datum = ExecGetJunkAttribute(slot,
junkfilter->jf_junkAttNo,
&isNull);
/* shouldn't ever get a null result... */
if (isNull)
elog(ERROR, "wholerow is NULL");
oldtupdata.t_data = DatumGetHeapTupleHeader(datum);
oldtupdata.t_len =
HeapTupleHeaderGetDatumLength(oldtupdata.t_data);
ItemPointerSetInvalid(&(oldtupdata.t_self));
/* Historically, view triggers see invalid t_tableOid. */
oldtupdata.t_tableOid =
(relkind == RELKIND_VIEW) ? InvalidOid :
RelationGetRelid(resultRelInfo->ri_RelationDesc);
oldtuple = &oldtupdata;
}
else
Assert(relkind == RELKIND_FOREIGN_TABLE);
/*
* Extract GPDB-specific junk attributes.
*/
if (AttributeNumberIsValid(segid_attno))
{
datum = ExecGetJunkAttribute(slot,
segid_attno,
&isNull);
/* shouldn't ever get a null result... */
if (isNull)
elog(ERROR, "gp_segment_id is NULL");
segid = DatumGetInt32(datum);
}
if (AttributeNumberIsValid(action_attno))
{
datum = ExecGetJunkAttribute(slot,
action_attno,
&isNull);
/* shouldn't ever get a null result... */
if (isNull)
elog(ERROR, "action is NULL");
action = DatumGetInt32(datum);
}
}
/*
* apply the junkfilter if needed.
*/
if (operation != CMD_DELETE)
slot = ExecFilterJunk(junkfilter, slot);
}
switch (operation)
{
case CMD_INSERT:
/* Prepare for tuple routing if needed. */
if (proute)
slot = ExecPrepareTupleRouting(node, estate, proute,
resultRelInfo, slot);
slot = ExecInsert(node, slot, planSlot,
estate, node->canSetTag, false /* splitUpdate */);
/* Revert ExecPrepareTupleRouting's state change. */
if (proute)
estate->es_result_relation_info = resultRelInfo;
break;
case CMD_UPDATE:
if (!AttributeNumberIsValid(action_attno))
{
/* normal non-split UPDATE */
slot = ExecUpdate(node, tupleid, oldtuple, slot, planSlot,
segid,
&node->mt_epqstate, estate, node->canSetTag);
}
else if (DML_INSERT == action)
{
slot = ExecSplitUpdate_Insert(node, slot, planSlot,
estate, node->canSetTag);
}
else /* DML_DELETE */
{
slot = ExecDelete(node, tupleid, segid, oldtuple, planSlot,
&node->mt_epqstate, estate,
false,
false /* canSetTag */,
true /* changingPart */ ,
true /* splitUpdate */ ,
NULL, NULL);
}
break;
case CMD_DELETE:
slot = ExecDelete(node, tupleid, segid, oldtuple, planSlot,
&node->mt_epqstate, estate,
true, node->canSetTag,
false /* changingPart */ ,
false /* splitUpdate */ ,
NULL, NULL);
break;
default:
elog(ERROR, "unknown operation");
break;
}
/*
* If the target is a partitioned table, ExecInsert / ExecUpdate /
* ExecDelete might have changed es_result_relation_info to point to
* a partition, instead of the top-level table. Reset it. (It would
* be more tidy if those functions cleaned up after themselves, but
* it's more robust to do it here just once.)
*/
estate->es_result_relation_info = resultRelInfo;
/*
* If we got a RETURNING result, return it to caller. We'll continue
* the work on next call.
*/
if (slot)
{
estate->es_result_relation_info = saved_resultRelInfo;
return slot;
}
}
/* Restore es_result_relation_info before exiting */
estate->es_result_relation_info = saved_resultRelInfo;
/*
* We're done, but fire AFTER STATEMENT triggers before exiting.
*/
/* In GPDB, don't fire statement triggers in reader processes */
if (Gp_role != GP_ROLE_EXECUTE || Gp_is_writer)
fireASTriggers(node);
node->mt_done = true;
return NULL;
}
/* ----------------------------------------------------------------
* ExecInitModifyTable
* ----------------------------------------------------------------
*/
ModifyTableState *
ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
{
ModifyTableState *mtstate;
CmdType operation = node->operation;
int nplans = list_length(node->plans);
ResultRelInfo *saved_resultRelInfo;
ResultRelInfo *resultRelInfo;
Plan *subplan;
ListCell *l;
int i;
Relation rel;
bool update_tuple_routing_needed = node->partColsUpdated;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
/*
* create state structure
*/
mtstate = makeNode(ModifyTableState);
mtstate->ps.plan = (Plan *) node;
mtstate->ps.state = estate;
mtstate->ps.ExecProcNode = ExecModifyTable;
mtstate->operation = operation;
mtstate->canSetTag = node->canSetTag;
mtstate->mt_done = false;
mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);
mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans);
/* If modifying a partitioned table, initialize the root table info */
if (node->rootResultRelIndex >= 0)
mtstate->rootResultRelInfo = estate->es_root_result_relations +
node->rootResultRelIndex;
mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
mtstate->mt_nplans = nplans;
/* set up epqstate with dummy subplan data for the moment */
EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
if (CMD_UPDATE == operation)
{
mtstate->mt_isSplitUpdates = (bool *) palloc0(nplans * sizeof(bool));
if (node->isSplitUpdates)
{
if (list_length(node->isSplitUpdates) != nplans)
elog(ERROR, "ModifyTable node is missing is-split-update information");
i = 0;
foreach(l, node->isSplitUpdates)
mtstate->mt_isSplitUpdates[i++] = (bool) lfirst_int(l);
}
}
/* GPDB: Don't fire statement-triggers in QE reader processes */
if (Gp_role != GP_ROLE_EXECUTE || Gp_is_writer)
mtstate->fireBSTriggers = true;
/*
* call ExecInitNode on each of the plans to be executed and save the
* results into the array "mt_plans". This is also a convenient place to
* verify that the proposed target relations are valid and open their
* indexes for insertion of new index entries. Note we *must* set
* estate->es_result_relation_info correctly while we initialize each
* sub-plan; external modules such as FDWs may depend on that (see
* contrib/postgres_fdw/postgres_fdw.c: postgresBeginDirectModify() as one
* example).
*/
saved_resultRelInfo = estate->es_result_relation_info;
resultRelInfo = mtstate->resultRelInfo;
i = 0;
foreach(l, node->plans)
{
subplan = (Plan *) lfirst(l);
/* Initialize the usesFdwDirectModify flag */
resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i,
node->fdwDirectModifyPlans);
/*
* Verify result relation is a valid target for the current operation
*/
CheckValidResultRel(resultRelInfo, operation);
/*
* If there are indices on the result relation, open them and save
* descriptors in the result relation info, so that we can add new
* index entries for the tuples we add/update. We need not do this
* for a DELETE, however, since deletion doesn't affect indexes. Also,
* inside an EvalPlanQual operation, the indexes might be open
* already, since we share the resultrel state with the original
* query.
*/
if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
operation != CMD_DELETE &&
resultRelInfo->ri_IndexRelationDescs == NULL)
ExecOpenIndices(resultRelInfo,
node->onConflictAction != ONCONFLICT_NONE);
/*
* If this is an UPDATE and a BEFORE UPDATE trigger is present, the
* trigger itself might modify the partition-key values. So arrange
* for tuple routing.
*/
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_update_before_row &&
operation == CMD_UPDATE)
update_tuple_routing_needed = true;
/* Now init the plan for this result rel */
estate->es_result_relation_info = resultRelInfo;
mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
mtstate->mt_scans[i] =
ExecInitExtraTupleSlot(mtstate->ps.state, ExecGetResultType(mtstate->mt_plans[i]),
table_slot_callbacks(resultRelInfo->ri_RelationDesc));
if (RelationIsAoRows(resultRelInfo->ri_RelationDesc))
appendonly_dml_init(resultRelInfo->ri_RelationDesc, operation);
else if (RelationIsAoCols(resultRelInfo->ri_RelationDesc))
aoco_dml_init(resultRelInfo->ri_RelationDesc, operation);
/* Also let FDWs init themselves for foreign-table result rels */
if (!resultRelInfo->ri_usesFdwDirectModify &&
resultRelInfo->ri_FdwRoutine != NULL &&
resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL)
{
List *fdw_private = (List *) list_nth(node->fdwPrivLists, i);
resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate,
resultRelInfo,
fdw_private,
i,
eflags);
}
resultRelInfo++;
i++;
}
estate->es_result_relation_info = saved_resultRelInfo;
/* Get the target relation */
rel = (getTargetResultRelInfo(mtstate))->ri_RelationDesc;
/*
* If it's not a partitioned table after all, UPDATE tuple routing should
* not be attempted.
*/
if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
update_tuple_routing_needed = false;
/*
* Build state for tuple routing if it's an INSERT or if it's an UPDATE of
* partition key.
*/
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
(operation == CMD_INSERT || update_tuple_routing_needed))
mtstate->mt_partition_tuple_routing =
ExecSetupPartitionTupleRouting(estate, mtstate, rel);
/*
* Build state for collecting transition tuples. This requires having a
* valid trigger query context, so skip it in explain-only mode.
*/
if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
ExecSetupTransitionCaptureState(mtstate, estate);
/*
* Construct mapping from each of the per-subplan partition attnos to the
* root attno. This is required when during update row movement the tuple
* descriptor of a source partition does not match the root partitioned
* table descriptor. In such a case we need to convert tuples to the root
* tuple descriptor, because the search for destination partition starts
* from the root. We'll also need a slot to store these converted tuples.
* We can skip this setup if it's not a partition key update.
*/
if (update_tuple_routing_needed)
{
ExecSetupChildParentMapForSubplan(mtstate);
mtstate->mt_root_tuple_slot = table_slot_create(rel, NULL);
}
/*
* Initialize any WITH CHECK OPTION constraints if needed.
*/
resultRelInfo = mtstate->resultRelInfo;
i = 0;
foreach(l, node->withCheckOptionLists)
{
List *wcoList = (List *) lfirst(l);
List *wcoExprs = NIL;
ListCell *ll;
foreach(ll, wcoList)
{
WithCheckOption *wco = (WithCheckOption *) lfirst(ll);
ExprState *wcoExpr = ExecInitQual((List *) wco->qual,
&mtstate->ps);
wcoExprs = lappend(wcoExprs, wcoExpr);
}
resultRelInfo->ri_WithCheckOptions = wcoList;
resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
resultRelInfo++;
i++;
}
/*
* Initialize RETURNING projections if needed.
*/
if (node->returningLists)
{
TupleTableSlot *slot;
ExprContext *econtext;
/*
* Initialize result tuple slot and assign its rowtype using the first
* RETURNING list. We assume the rest will look the same.
*/
mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists);
/* Set up a slot for the output of the RETURNING projection(s) */
ExecInitResultTupleSlotTL(&mtstate->ps, &TTSOpsVirtual);
slot = mtstate->ps.ps_ResultTupleSlot;
/* Need an econtext too */
if (mtstate->ps.ps_ExprContext == NULL)
ExecAssignExprContext(estate, &mtstate->ps);
econtext = mtstate->ps.ps_ExprContext;
/*
* Build a projection for each result rel.
*/
resultRelInfo = mtstate->resultRelInfo;
foreach(l, node->returningLists)
{
List *rlist = (List *) lfirst(l);
resultRelInfo->ri_returningList = rlist;
resultRelInfo->ri_projectReturning =
ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
resultRelInfo->ri_RelationDesc->rd_att);
resultRelInfo++;
}
}
else
{
/*
* We still must construct a dummy result tuple type, because InitPlan
* expects one (maybe should change that?).
*/
mtstate->ps.plan->targetlist = NIL;
ExecInitResultTypeTL(&mtstate->ps);
mtstate->ps.ps_ExprContext = NULL;
}
/* Set the list of arbiter indexes if needed for ON CONFLICT */
resultRelInfo = mtstate->resultRelInfo;
if (node->onConflictAction != ONCONFLICT_NONE)
resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes;
/*
* If needed, Initialize target list, projection and qual for ON CONFLICT
* DO UPDATE.
*/
if (node->onConflictAction == ONCONFLICT_UPDATE)
{
ExprContext *econtext;
TupleDesc relationDesc;
TupleDesc tupDesc;
/* insert may only have one plan, inheritance is not expanded */
Assert(nplans == 1);
/* already exists if created by RETURNING processing above */
if (mtstate->ps.ps_ExprContext == NULL)
ExecAssignExprContext(estate, &mtstate->ps);
econtext = mtstate->ps.ps_ExprContext;
relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
/* carried forward solely for the benefit of explain */
mtstate->mt_excludedtlist = node->exclRelTlist;
/* create state for DO UPDATE SET operation */
resultRelInfo->ri_onConflict = makeNode(OnConflictSetState);
/* initialize slot for the existing tuple */
resultRelInfo->ri_onConflict->oc_Existing =
table_slot_create(resultRelInfo->ri_RelationDesc,
&mtstate->ps.state->es_tupleTable);
/* create the tuple slot for the UPDATE SET projection */
tupDesc = ExecTypeFromTL((List *) node->onConflictSet);
resultRelInfo->ri_onConflict->oc_ProjSlot =
ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc,
&TTSOpsVirtual);
/* build UPDATE SET projection state */
resultRelInfo->ri_onConflict->oc_ProjInfo =
ExecBuildProjectionInfo(node->onConflictSet, econtext,
resultRelInfo->ri_onConflict->oc_ProjSlot,
&mtstate->ps,
relationDesc);
/* initialize state to evaluate the WHERE clause, if any */
if (node->onConflictWhere)
{
ExprState *qualexpr;
qualexpr = ExecInitQual((List *) node->onConflictWhere,
&mtstate->ps);
resultRelInfo->ri_onConflict->oc_WhereClause = qualexpr;
}
}
/*
* If we have any secondary relations in an UPDATE or DELETE, they need to
* be treated like non-locked relations in SELECT FOR UPDATE, ie, the
* EvalPlanQual mechanism needs to be told about them. Locate the
* relevant ExecRowMarks.
*/
foreach(l, node->rowMarks)
{
PlanRowMark *rc = lfirst_node(PlanRowMark, l);
ExecRowMark *erm;
bool isdistributed = false;
/* ignore "parent" rowmarks; they are irrelevant at runtime */
if (rc->isParent)
continue;
/*
* Like in preprocess_targetlist, ignore distributed tables.
*/
/*
* GPDB_91_MERGE_FIXME: we are largely just ignoring locking altogether.
* Perhaps that's OK as long as we take a full table lock on any UPDATEs
* or DELETEs. But sure doesn't seem right. Can we do better?
*/
{
RangeTblEntry *rte = rt_fetch(rc->rti, estate->es_plannedstmt->rtable);
if (rte->rtekind == RTE_RELATION)
{
Relation relation = heap_open(rte->relid, NoLock);
if (GpPolicyIsPartitioned(relation->rd_cdbpolicy))
isdistributed = true;
heap_close(relation, NoLock);
if (isdistributed)
continue;
}
}
if (Gp_role == GP_ROLE_EXECUTE)
{
/*
* In the executor, we don't have information on which tables are
* distributed. Assume that everything is; we wouldn't be running this
* slice on an entry table otherwise.
*/
continue;
}
/* find ExecRowMark (same for all subplans) */
erm = ExecFindRowMark(estate, rc->rti, false);
/* build ExecAuxRowMark for each subplan */
for (i = 0; i < nplans; i++)
{
ExecAuxRowMark *aerm;
subplan = mtstate->mt_plans[i]->plan;
aerm = ExecBuildAuxRowMark(erm, subplan->targetlist);
mtstate->mt_arowmarks[i] = lappend(mtstate->mt_arowmarks[i], aerm);
}
}
/* select first subplan */
mtstate->mt_whichplan = 0;
subplan = (Plan *) linitial(node->plans);
EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan,
mtstate->mt_arowmarks[0]);
/*
* Initialize the junk filter(s) if needed. INSERT queries need a filter
* if there are any junk attrs in the tlist. UPDATE and DELETE always
* need a filter, since there's always at least one junk attribute present
* --- no need to look first. Typically, this will be a 'ctid' or
* 'wholerow' attribute, but in the case of a foreign data wrapper it
* might be a set of junk attributes sufficient to identify the remote
* row.
*
* If there are multiple result relations, each one needs its own junk
* filter. Note multiple rels are only possible for UPDATE/DELETE, so we
* can't be fooled by some needing a filter and some not.
*
* This section of code is also a convenient place to verify that the
* output of an INSERT or UPDATE matches the target table(s).
*/
{
bool junk_filter_needed = false;
switch (operation)
{
case CMD_INSERT:
foreach(l, subplan->targetlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (tle->resjunk)
{
junk_filter_needed = true;
break;
}
}
break;
case CMD_UPDATE:
case CMD_DELETE:
junk_filter_needed = true;
break;
default:
elog(ERROR, "unknown operation");
break;
}
if (junk_filter_needed)
{
resultRelInfo = mtstate->resultRelInfo;
for (i = 0; i < nplans; i++)
{
JunkFilter *j;
TupleTableSlot *junkresslot;
subplan = mtstate->mt_plans[i]->plan;
if (operation == CMD_INSERT || operation == CMD_UPDATE)
ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
subplan->targetlist);
junkresslot =
ExecInitExtraTupleSlot(estate, NULL,
table_slot_callbacks(resultRelInfo->ri_RelationDesc));
j = ExecInitJunkFilter(subplan->targetlist,
junkresslot);
if (operation == CMD_UPDATE || operation == CMD_DELETE)
{
/* For UPDATE/DELETE, find the appropriate junk attr now */
char relkind;
relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
if (relkind == RELKIND_RELATION ||
relkind == RELKIND_MATVIEW ||
relkind == RELKIND_PARTITIONED_TABLE)
{
j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
if (!AttributeNumberIsValid(j->jf_junkAttNo))
elog(ERROR, "could not find junk ctid column");
/* Extra GPDB junk columns */
resultRelInfo->ri_segid_attno = ExecFindJunkAttribute(j, "gp_segment_id");
if (!AttributeNumberIsValid(resultRelInfo->ri_segid_attno))
elog(ERROR, "could not find junk gp_segment_id column");
if (operation == CMD_UPDATE && mtstate->mt_isSplitUpdates[i])
{
resultRelInfo->ri_action_attno = ExecFindJunkAttribute(j, "DMLAction");
if (!AttributeNumberIsValid(resultRelInfo->ri_action_attno))
elog(ERROR, "could not find junk action column");
}
}
else if (relkind == RELKIND_FOREIGN_TABLE)
{
/*
* When there is a row-level trigger, there should be
* a wholerow attribute.
*/
j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
}
else
{
j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
if (!AttributeNumberIsValid(j->jf_junkAttNo))
elog(ERROR, "could not find junk wholerow column");
}
}
resultRelInfo->ri_junkFilter = j;
resultRelInfo++;
}
}
else
{
if (operation == CMD_INSERT)
ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc,
subplan->targetlist);
}
}
/*
* Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
* to estate->es_auxmodifytables so that it will be run to completion by
* ExecPostprocessPlan. (It'd actually work fine to add the primary
* ModifyTable node too, but there's no need.) Note the use of lcons not
* lappend: we need later-initialized ModifyTable nodes to be shut down
* before earlier ones. This ensures that we don't throw away RETURNING
* rows that need to be seen by a later CTE subplan.
*/
if (Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY)
{
/*
* We do not need this unless in executor or with utility role. Note
* This was added for the data modifying CTE feature but there are other
* cases could run into this also.
*/
if (!mtstate->canSetTag)
estate->es_auxmodifytables = lcons(mtstate,
estate->es_auxmodifytables);
}
return mtstate;
}
/* ----------------------------------------------------------------
* ExecEndModifyTable
*
* Shuts down the plan.
*
* Returns nothing of interest.
* ----------------------------------------------------------------
*/
void
ExecEndModifyTable(ModifyTableState *node)
{
int i;
/*
* Allow any FDWs to shut down
*/
for (i = 0; i < node->mt_nplans; i++)
{
ResultRelInfo *resultRelInfo = node->resultRelInfo + i;
if (!resultRelInfo->ri_usesFdwDirectModify &&
resultRelInfo->ri_FdwRoutine != NULL &&
resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL)
resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
resultRelInfo);
if (RelationIsAoRows(resultRelInfo->ri_RelationDesc))
appendonly_dml_finish(resultRelInfo->ri_RelationDesc,
node->operation);
else if(RelationIsAoCols(resultRelInfo->ri_RelationDesc))
aoco_dml_finish(resultRelInfo->ri_RelationDesc, node->operation);
}
/*
* Close all the partitioned tables, leaf partitions, and their indices
* and release the slot used for tuple routing, if set.
*/
if (node->mt_partition_tuple_routing)
{
ExecCleanupTupleRouting(node, node->mt_partition_tuple_routing);
if (node->mt_root_tuple_slot)
ExecDropSingleTupleTableSlot(node->mt_root_tuple_slot);
}
/*
* Free the exprcontext
*/
ExecFreeExprContext(&node->ps);
/*
* clean out the tuple table
*/
if (node->ps.ps_ResultTupleSlot)
ExecClearTuple(node->ps.ps_ResultTupleSlot);
/*
* Terminate EPQ execution if active
*/
EvalPlanQualEnd(&node->mt_epqstate);
/*
* shut down subplans
*/
for (i = 0; i < node->mt_nplans; i++)
ExecEndNode(node->mt_plans[i]);
}
void
ExecReScanModifyTable(ModifyTableState *node)
{
/*
* Currently, we don't need to support rescan on ModifyTable nodes. The
* semantics of that would be a bit debatable anyway.
*/
elog(ERROR, "ExecReScanModifyTable is not implemented");
}
void
ExecSquelchModifyTable(ModifyTableState *node)
{
/*
* ModifyTable nodes must run to completion when asked to Squelch so
* that we don't risk losing modifications which should be performed
* regardless of any LIMIT's or other forms for projections which could
* end up causing a squelch to happen.
*/
for (;;)
{
TupleTableSlot *result;
result = ExecModifyTable(&node->ps);
if (!result)
break;
}
}
相关信息
相关文章
0
赞
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
8、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦