greenplumn bitmaputil 源码
greenplumn bitmaputil 代码
文件路径:/src/backend/access/bitmap/bitmaputil.c
/*-------------------------------------------------------------------------
*
* bitmaputil.c
* Utility routines for on-disk bitmap index access method.
*
* Portions Copyright (c) 2007-2010 Greenplum Inc
* Portions Copyright (c) 2010-2012 EMC Corporation
* Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates.
* Portions Copyright (c) 2006-2008, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* src/backend/access/bitmap/bitmaputil.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/bitmap.h"
#include "access/bitmap_private.h"
#include "access/bitmap_xlog.h"
#include "access/heapam.h"
#include "access/reloptions.h"
#include "access/relscan.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
static void _bitmap_findnextword(BMBatchWords* words, uint64 nextReadNo);
static void _bitmap_resetWord(BMBatchWords *words, uint32 prevStartNo);
static uint8 _bitmap_find_bitset(BM_HRL_WORD word, uint8 lastPos);
/*
* _bitmap_formitem() -- construct a LOV entry.
*
* If the given tid number is greater than BM_HRL_WORD_SIZE, we
* construct the first fill word for this bitmap vector.
*/
BMLOVItem
_bitmap_formitem(uint64 currTidNumber)
{
BMLOVItem bmitem;
bmitem = (BMLOVItem) palloc(sizeof(BMLOVItemData));
bmitem->bm_lov_head = bmitem->bm_lov_tail = InvalidBlockNumber;
bmitem->bm_last_setbit = 0;
bmitem->bm_last_compword = LITERAL_ALL_ONE;
bmitem->bm_last_word = LITERAL_ALL_ZERO;
bmitem->lov_words_header = 0;
bmitem->bm_last_tid_location = 0;
/* fill up all existing bits with 0. */
if (currTidNumber <= BM_HRL_WORD_SIZE)
{
bmitem->bm_last_compword = LITERAL_ALL_ONE;
bmitem->bm_last_word = LITERAL_ALL_ZERO;
bmitem->lov_words_header = 0;
bmitem->bm_last_tid_location = 0;
}
else
{
uint64 numOfTotalFillWords;
BM_HRL_WORD numOfFillWords;
numOfTotalFillWords = (currTidNumber-1)/BM_HRL_WORD_SIZE;
numOfFillWords = (numOfTotalFillWords >= MAX_FILL_LENGTH) ?
MAX_FILL_LENGTH : numOfTotalFillWords;
bmitem->bm_last_compword = BM_MAKE_FILL_WORD(0, numOfFillWords);
bmitem->bm_last_word = LITERAL_ALL_ZERO;
bmitem->lov_words_header = 2;
bmitem->bm_last_tid_location = numOfFillWords * BM_HRL_WORD_SIZE;
bmitem->bm_last_setbit = numOfFillWords*BM_HRL_WORD_SIZE;
}
return bmitem;
}
/*
* _bitmap_get_metapage_data() -- return the metadata info stored
* in the given metapage buffer.
*/
BMMetaPage
_bitmap_get_metapage_data(Relation rel, Buffer metabuf)
{
Page page;
BMMetaPage metapage;
page = BufferGetPage(metabuf);
metapage = (BMMetaPage)PageGetContents(page);
/*
* If this metapage is from the pre 3.4 version of the bitmap
* index, we print "require to reindex" message, and error
* out.
*/
if (metapage->bm_version != BITMAP_VERSION)
{
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("the disk format for \"%s\" is not valid for this version of Greenplum Database",
RelationGetRelationName(rel)),
errhint("Use REINDEX to update this index.")));
}
return metapage;
}
/*
* _bitmap_init_batchwords() -- initialize a BMBatchWords in a given
* memory context.
*
* Allocate spaces for bitmap header words and bitmap content words.
*/
void
_bitmap_init_batchwords(BMBatchWords* words,
uint32 maxNumOfWords,
MemoryContext mcxt)
{
uint32 numOfHeaderWords;
MemoryContext oldcxt;
words->nwordsread = 0;
words->nextread = 1;
words->startNo = 0;
words->nwords = 0;
numOfHeaderWords = BM_CALC_H_WORDS(maxNumOfWords);
words->maxNumOfWords = maxNumOfWords;
/* Make sure that we have at least one page of words */
Assert(words->maxNumOfWords >= BM_NUM_OF_HRL_WORDS_PER_PAGE);
oldcxt = MemoryContextSwitchTo(mcxt);
words->hwords = palloc0(sizeof(BM_HRL_WORD)*numOfHeaderWords);
words->cwords = palloc0(sizeof(BM_HRL_WORD)*words->maxNumOfWords);
MemoryContextSwitchTo(oldcxt);
}
/*
* _bitmap_copy_batchwords() -- copy a given BMBatchWords to another
* BMBatchWords.
*/
void
_bitmap_copy_batchwords(BMBatchWords* words, BMBatchWords* copyWords)
{
uint32 numOfHeaderWords;
copyWords->maxNumOfWords = words->maxNumOfWords;
copyWords->nwordsread = words->nwordsread;
copyWords->nextread = words->nextread;
copyWords->firstTid = words->firstTid;
copyWords->startNo = words->startNo;
copyWords->nwords = words->nwords;
numOfHeaderWords = BM_CALC_H_WORDS(copyWords->maxNumOfWords);
memcpy(copyWords->hwords, words->hwords,
sizeof(BM_HRL_WORD)*numOfHeaderWords);
memcpy(copyWords->cwords, words->cwords,
sizeof(BM_HRL_WORD)*copyWords->maxNumOfWords);
}
/*
* _bitmap_reset_batchwords() -- reset the BMBatchWords for re-use.
*/
void
_bitmap_reset_batchwords(BMBatchWords *words)
{
words->startNo = 0;
words->nwords = 0;
MemSet(words->hwords, 0,
sizeof(BM_HRL_WORD) * BM_CALC_H_WORDS(words->maxNumOfWords));
}
/*
* _bitmap_cleanup_batchwords() -- release spaces allocated for the BMBatchWords.
*/
void _bitmap_cleanup_batchwords(BMBatchWords* words)
{
if (words == NULL)
return;
if (words->hwords)
pfree(words->hwords);
if (words->cwords)
pfree(words->cwords);
}
/*
* _bitmap_cleanup_scanpos() -- release space allocated for
* BMVector.
*/
void
_bitmap_cleanup_scanpos(BMVector bmScanPos, uint32 numBitmapVectors)
{
uint32 keyNo;
if (numBitmapVectors == 0)
{
return;
}
for (keyNo=0; keyNo<numBitmapVectors; keyNo++)
{
if (BufferIsValid((bmScanPos[keyNo]).bm_lovBuffer))
ReleaseBuffer((bmScanPos[keyNo]).bm_lovBuffer);
_bitmap_cleanup_batchwords((bmScanPos[keyNo]).bm_batchWords);
if (bmScanPos[keyNo].bm_batchWords != NULL)
pfree((bmScanPos[keyNo]).bm_batchWords);
}
pfree(bmScanPos);
}
/*
* _bitmap_findnexttid() -- find the next tid location in a given batch
* of bitmap words.
*/
uint64
_bitmap_findnexttid(BMBatchWords *words, BMIterateResult *result)
{
/*
* If there is not tids from previous computation, then we
* try to find next set of tids.
*/
if (result->nextTidLoc >= result->numOfTids)
_bitmap_findnexttids(words, result, BM_BATCH_TIDS);
/* if find more tids, then return the first one */
if (result->nextTidLoc < result->numOfTids)
{
result->nextTidLoc++;
return (result->nextTids[result->nextTidLoc-1]);
}
/* no more tids */
return 0;
}
/*
* _bitmap_findnexttids() -- find the next set of tids from a given
* batch of bitmap words.
*
* The maximum number of tids to be found is defined in 'maxTids'.
*/
void
_bitmap_findnexttids(BMBatchWords *words, BMIterateResult *result,
uint32 maxTids)
{
bool done = false;
result->nextTidLoc = result->numOfTids = 0;
/*
* Only in the situation that there have concurrent read/write on two
* adjacent bitmap index pages, and inserting a tid into PAGE_FULL cause expand
* compressed words to new words, and rearrange those words into PAGE_NEXT,
* and we ready to read a new page, we should adjust result-> lastScanWordNo
* to the current position.
*
* The value of words->startNo will always be 0, this value will only used at
* _bitmap_union to union a bunch of bitmaps, the union result will be stored
* at words. result->lastScanWordNo indicates the location in words->cwords that
* BMIterateResult will read the word next, it's start from 0, and will
* self-incrementing during the scan. So if result->lastScanWordNo equals to
* words->startNo, means we will scan a new bitmap index pages.
*/
if (result->lastScanWordNo == words->startNo &&
words->firstTid < result->nextTid)
_bitmap_catchup_to_next_tid(words, result);
while (words->nwords > 0 && result->numOfTids < maxTids && !done)
{
uint8 oldScanPos = result->lastScanPos;
BM_HRL_WORD word = words->cwords[result->lastScanWordNo];
/* new word, zero filled */
if (oldScanPos == 0 &&
((IS_FILL_WORD(words->hwords, result->lastScanWordNo) &&
GET_FILL_BIT(word) == 0) || word == 0))
{
BM_HRL_WORD fillLength;
if (word == 0)
fillLength = 1;
else
fillLength = FILL_LENGTH(word);
/* skip over non-matches */
result->nextTid += fillLength * BM_HRL_WORD_SIZE;
result->lastScanWordNo++;
words->nwords--;
result->lastScanPos = 0;
continue;
}
else if (IS_FILL_WORD(words->hwords, result->lastScanWordNo)
&& GET_FILL_BIT(word) == 1)
{
uint64 nfillwords = FILL_LENGTH(word);
uint8 bitNo;
while (result->numOfTids + BM_HRL_WORD_SIZE <= maxTids &&
nfillwords > 0)
{
/* explain the fill word */
for (bitNo = 0; bitNo < BM_HRL_WORD_SIZE; bitNo++)
result->nextTids[result->numOfTids++] = result->nextTid++;
nfillwords--;
/* update fill word to reflect expansion */
words->cwords[result->lastScanWordNo]--;
}
if (nfillwords == 0)
{
result->lastScanWordNo++;
words->nwords--;
result->lastScanPos = 0;
continue;
}
else
{
done = true;
break;
}
}
else
{
if(oldScanPos == 0)
oldScanPos = BM_HRL_WORD_SIZE + 1;
while (oldScanPos != 0 && result->numOfTids < maxTids)
{
BM_HRL_WORD w;
if (oldScanPos == BM_HRL_WORD_SIZE + 1)
oldScanPos = 0;
w = words->cwords[result->lastScanWordNo];
result->lastScanPos = _bitmap_find_bitset(w, oldScanPos);
/* did we find a bit set in this word? */
if (result->lastScanPos != 0)
{
uint64 tid = result->nextTid + result->lastScanPos -1;
result->nextTids[result->numOfTids++] = tid;
}
else
{
result->nextTid += BM_HRL_WORD_SIZE;
/* start scanning a new word */
words->nwords--;
result->lastScanWordNo++;
result->lastScanPos = 0;
}
oldScanPos = result->lastScanPos;
}
}
}
}
/*
* _bitmap_catchup_to_next_tid - Catch up to the nextTid we need to check
* from last iteration, in the following cases:
*
* 1: When the concurrent insert causes bitmap items from previous full page
* to spill over to current page in the window when we (the read transaction)
* had released the lock on the previous page and not locked the current page.
* More details see read_words in bitmapsearch.c.
* Related to issue: https://github.com/greenplum-db/gpdb/issues/11308
* 2. Or when running bitmap heap scan path on bitmap index, since we always
* try to read from a table block's start tid. See pull_stream.
*/
void
_bitmap_catchup_to_next_tid(BMBatchWords *words, BMIterateResult *result)
{
if (words->firstTid >= result->nextTid)
return;
/*
* Iterate each word until catch up to the next tid to search.
*/
for(; words->nwords > 0 && words->firstTid < result->nextTid;
result->lastScanWordNo++)
{
if (IS_FILL_WORD(words->hwords, result->lastScanWordNo))
{
BM_HRL_WORD word = words->cwords[result->lastScanWordNo];
uint64 fillLength = FILL_LENGTH(word);
/*
* XXX: weird, why the word marks as compresed but the word is 0?
*/
if (word == 0)
{
fillLength = 1;
/* Skip all empty bits, this may cause words->firstTid > result->nextTid */
words->firstTid += fillLength * BM_HRL_WORD_SIZE;
words->nwords--;
/* reset next tid to skip all empty words */
if (words->firstTid > result->nextTid)
result->nextTid = words->firstTid;
continue;
}
else
{
while (fillLength > 0 && words->firstTid < result->nextTid)
{
/* update fill word to reflect expansion */
words->cwords[result->lastScanWordNo]--;
words->firstTid += BM_HRL_WORD_SIZE;
fillLength--;
}
/* comsume all the fill words, try to fetch next words */
if (fillLength == 0)
{
words->nwords--;
continue;
}
/*
* Catch up the next tid to search, but there still fill words.
* Return current state.
*/
if (words->firstTid >= result->nextTid)
return;
}
}
else
{
words->firstTid += BM_HRL_WORD_SIZE;
words->nwords--;
}
}
}
/*
* _bitmap_intesect() is dead code because streaming intersects
* PagetableEntry structures, not raw batch words. It's possible we may
* want to intersect batches later though -- it would definitely improve
* streaming of intersections.
*/
#ifdef NOT_USED
/*
* _bitmap_intersect() -- intersect 'numBatches' bitmap words.
*
* All 'numBatches' bitmap words are HRL compressed. The result
* bitmap words HRL compressed, except that fill set words(1s) may
* be lossily compressed.
*/
void
_bitmap_intersect(BMBatchWords **batches, uint32 numBatches,
BMBatchWords *result)
{
bool done = false;
uint32 *prevStartNos;
uint64 nextReadNo;
uint64 batchNo;
Assert(numBatches > 0);
prevStartNos = (uint32 *)palloc0(numBatches * sizeof(uint32));
nextReadNo = batches[0]->nextread;
while (!done && result->nwords < result->maxNumOfWords)
{
BM_HRL_WORD andWord = LITERAL_ALL_ONE;
BM_HRL_WORD word;
bool andWordIsLiteral = true;
/*
* We walk through the bitmap word in each list one by one
* without de-compress the bitmap words. 'nextReadNo' defines
* the position of the next word that should be read in an
* uncompressed format.
*/
for (batchNo = 0; batchNo < numBatches; batchNo++)
{
uint32 offs;
BMBatchWords *bch = batches[batchNo];
/* skip nextReadNo - nwordsread - 1 words */
_bitmap_findnextword(bch, nextReadNo);
if (bch->nwords == 0)
{
done = true;
break;
}
Assert(bch->nwordsread == nextReadNo - 1);
/* Here, startNo should point to the word to be read. */
offs = bch->startNo;
word = bch->cwords[offs];
if (CUR_WORD_IS_FILL(bch) && (GET_FILL_BIT(word) == 0))
{
uint32 n;
bch->nwordsread += FILL_LENGTH(word);
n = bch->nwordsread - nextReadNo + 1;
andWord = BM_MAKE_FILL_WORD(0, n);
andWordIsLiteral = false;
nextReadNo = bch->nwordsread + 1;
bch->startNo++;
bch->nwords--;
break;
}
else if (CUR_WORD_IS_FILL(bch) && (GET_FILL_BIT(word) == 1))
{
bch->nwordsread++;
prevStartNos[batchNo] = bch->startNo;
if (FILL_LENGTH(word) == 1)
{
bch->startNo++;
bch->nwords--;
}
else
{
uint32 s = bch->startNo;
bch->cwords[s]--;
}
andWordIsLiteral = true;
}
else if (!CUR_WORD_IS_FILL(bch))
{
prevStartNos[batchNo] = bch->startNo;
andWord &= word;
bch->nwordsread++;
bch->startNo++;
bch->nwords--;
andWordIsLiteral = true;
}
}
/* Since there are not enough words in this attribute break this loop */
if (done)
{
uint32 preBatchNo;
/* reset the attributes before batchNo */
for (preBatchNo = 0; preBatchNo < batchNo; preBatchNo++)
{
_bitmap_resetWord(batches[preBatchNo], prevStartNos[preBatchNo]);
}
break;
}
else
{
if (!andWordIsLiteral)
{
uint32 off = result->nwords/BM_HRL_WORD_SIZE;
uint32 w = result->nwords;
result->hwords[off] |= WORDNO_GET_HEADER_BIT(w);
}
result->cwords[result->nwords] = andWord;
result->nwords++;
}
if (andWordIsLiteral)
nextReadNo++;
if (batchNo == 1 && bch->nwords == 0)
done = true;
}
/* set the nextReadNo */
for (batchNo = 0; batchNo < numBatches; batchNo++)
batches[batchNo]->nextread = nextReadNo;
pfree(prevStartNos);
}
#endif /* NOT_USED */
/*
* Fast forward to the next read position by skipping the common compressed
* zeros that appear in all batches. These skipped zeros are also copied into
* the result words.
*/
static uint64
fast_forward(uint32 nbatches, BMBatchWords **batches, BMBatchWords *result)
{
int i;
uint64 min_fill_len = MAX_FILL_LENGTH;
uint64 fast_forward_words = 0;
Assert(result != NULL);
Assert(nbatches == 0 || batches != NULL);
for (i = 0; i < nbatches; i++)
{
BM_HRL_WORD word;
/*
* Fast forward the read batch from a rearrage bitmap index page.
* Since words->nwordsread may get set to a new value in read_words().
* See bitmapsearch.c read_words for more details.
*/
_bitmap_findnextword(batches[i], batches[i]->nextread);
word = batches[i]->cwords[batches[i]->startNo];
/* if we find a matching tid in one of the batches, nothing to do */
if (CUR_WORD_IS_FILL(batches[i]) && GET_FILL_BIT(word) == 1)
return batches[0]->nextread;
else if (!CUR_WORD_IS_FILL(batches[i]))
return batches[0]->nextread;
else if (CUR_WORD_IS_FILL(batches[i]) && GET_FILL_BIT(word) == 0)
{
uint64 fill_len = FILL_LENGTH(word);
/* adjust down */
if (fill_len < min_fill_len)
{
min_fill_len = fill_len;
fast_forward_words = fill_len;
}
}
}
if (fast_forward_words)
{
uint32 offset;
for (i = 0; i < nbatches; i++)
batches[i]->nextread = batches[i]->nwordsread +
fast_forward_words + 1;
/*
* Copy these fast-forwarded words to
* the result.
*/
offset = result->nwords/BM_HRL_WORD_SIZE;
result->hwords[offset] |= WORDNO_GET_HEADER_BIT(result->nwords);
result->cwords[result->nwords] = fast_forward_words;
result->nwords++;
}
return batches[0]->nextread;
}
/*
* _bitmap_union() -- union 'numBatches' bitmaps
*
* All bitmap words are HRL compressed. The result bitmap words are also
* HRL compressed, except that fill unset words may be lossily compressed.
*/
void
_bitmap_union(BMBatchWords **batches, uint32 numBatches, BMBatchWords *result)
{
bool done = false;
uint32 *prevstarts;
uint64 nextReadNo;
uint64 batchNo;
Assert ((int)numBatches >= 0);
if (numBatches == 0)
return;
/* save batch->startNo for each input bitmap vector */
prevstarts = (uint32 *)palloc0(numBatches * sizeof(uint32));
/*
* Update the real firstTid for the bachwords with unioned batches.
* This is required because we may result->firstTid is set to nextTid
* to fetch in _bitmap_nextbatchwords for bitmap index scan, but the
* read words may not reach this position yet, the below calculation
* will set it back to the real first tid of current result batch.
*/
result->firstTid = ((batches[0]->nextread - 1) * BM_HRL_WORD_SIZE) + 1;
/*
* Compute the next read offset. We fast forward compressed
* zero words when possible.
*/
nextReadNo = fast_forward(numBatches, batches, result);
while (!done && result->nwords < result->maxNumOfWords)
{
BM_HRL_WORD orWord = LITERAL_ALL_ZERO;
BM_HRL_WORD word;
bool orWordIsLiteral = true;
for (batchNo = 0; batchNo < numBatches; batchNo++)
{
BMBatchWords *bch = batches[batchNo];
/* skip nextReadNo - nwordsread - 1 words */
_bitmap_findnextword(bch, nextReadNo);
if (bch->nwords == 0)
{
done = true;
break;
}
Assert(bch->nwordsread == nextReadNo - 1);
/* Here, startNo should point to the word to be read. */
word = bch->cwords[bch->startNo];
if (CUR_WORD_IS_FILL(bch) && GET_FILL_BIT(word) == 1)
{
/* Fill word represents matches */
bch->nwordsread += FILL_LENGTH(word);
orWord = BM_MAKE_FILL_WORD(1, bch->nwordsread - nextReadNo + 1);
orWordIsLiteral = false;
nextReadNo = bch->nwordsread + 1;
bch->startNo++;
bch->nwords--;
break;
}
else if (CUR_WORD_IS_FILL(bch) && GET_FILL_BIT(word) == 0)
{
/* Fill word represents no matches */
bch->nwordsread++;
prevstarts[batchNo] = bch->startNo;
if (FILL_LENGTH(word) == 1)
{
bch->startNo++;
bch->nwords--;
}
else
bch->cwords[bch->startNo]--;
orWordIsLiteral = true;
}
else if (!CUR_WORD_IS_FILL(bch))
{
/* word is literal */
prevstarts[batchNo] = bch->startNo;
orWord |= word;
bch->nwordsread++;
bch->startNo++;
bch->nwords--;
orWordIsLiteral = true;
}
}
if (done)
{
uint32 i;
/* reset the attributes before batchNo */
for (i = 0; i < batchNo; i++)
_bitmap_resetWord(batches[i], prevstarts[i]);
break;
}
else
{
if (!orWordIsLiteral)
{
/* Word is not literal, update the result header */
uint32 offs = result->nwords/BM_HRL_WORD_SIZE;
uint32 n = result->nwords;
result->hwords[offs] |= WORDNO_GET_HEADER_BIT(n);
}
result->cwords[result->nwords] = orWord;
result->nwords++;
}
if (orWordIsLiteral)
nextReadNo++;
/* we just processed the last batch and it was empty */
if (batchNo == numBatches - 1 && batches[batchNo]->nwords == 0)
done = true;
}
/* set the next word to read for all input vectors */
for (batchNo = 0; batchNo < numBatches; batchNo++)
batches[batchNo]->nextread = nextReadNo;
pfree(prevstarts);
}
/*
* _bitmap_findnextword() -- Find the next word whose position is
* 'nextReadNo' in an uncompressed format.
*/
static void
_bitmap_findnextword(BMBatchWords *words, uint64 nextReadNo)
{
/*
* 'words->nwordsread' defines how many un-compressed words
* have been read in this bitmap. We read from
* position 'startNo', and increment 'words->nwordsread'
* differently based on the type of words that are read, until
* 'words->nwordsread' is equal to 'nextReadNo'.
*/
while (words->nwords > 0 && words->nwordsread < nextReadNo - 1)
{
/* Get the current word */
BM_HRL_WORD word = words->cwords[words->startNo];
if (CUR_WORD_IS_FILL(words))
{
if(FILL_LENGTH(word) <= (nextReadNo - words->nwordsread - 1))
{
words->nwordsread += FILL_LENGTH(word);
words->startNo++;
words->nwords--;
}
else
{
words->cwords[words->startNo] -= (nextReadNo - words->nwordsread - 1);
words->nwordsread = nextReadNo - 1;
}
}
else
{
words->nwordsread++;
words->startNo++;
words->nwords--;
}
}
}
/*
* _bitmap_resetWord() -- Reset the read position in an BMBatchWords
* to its previous value.
*
* Reset the read position in an BMBatchWords to its previous value,
* which is given in 'prevStartNo'. Based on different type of words read,
* the actual bitmap word may need to be changed.
*/
static void
_bitmap_resetWord(BMBatchWords *words, uint32 prevStartNo)
{
if (words->startNo > prevStartNo)
{
Assert(words->startNo == prevStartNo + 1);
words->startNo = prevStartNo;
words->nwords++;
}
else
{
Assert(words->startNo == prevStartNo);
Assert(CUR_WORD_IS_FILL(words));
words->cwords[words->startNo]++;
}
words->nwordsread--;
}
/*
* _bitmap_find_bitset() -- find the rightmost set bit (bit=1) in the
* given word since 'lastPos', not including 'lastPos'.
*
* The rightmost bit in the given word is considered the position 1, and
* the leftmost bit is considered the position BM_HRL_WORD_SIZE.
*
* If such set bit does not exist in this word, 0 is returned.
*/
static uint8
_bitmap_find_bitset(BM_HRL_WORD word, uint8 lastPos)
{
uint8 pos = lastPos + 1;
BM_HRL_WORD rightmostBitWord;
if (pos > BM_HRL_WORD_SIZE)
return 0;
rightmostBitWord = (((BM_HRL_WORD)1) << (pos-1));
while (pos <= BM_HRL_WORD_SIZE && (word & rightmostBitWord) == 0)
{
rightmostBitWord <<= 1;
pos++;
}
if (pos > BM_HRL_WORD_SIZE)
pos = 0;
return pos;
}
/*
* _bitmap_begin_iterate() -- initialize the given BMIterateResult instance.
*/
void
_bitmap_begin_iterate(BMBatchWords *words, BMIterateResult *result)
{
result->lastScanPos = 0;
result->lastScanWordNo = words->startNo;
result->numOfTids = 0;
result->nextTidLoc = 0;
}
/*
* _bitmap_log_metapage() -- log the changes to the metapage
*/
void
_bitmap_log_metapage(Relation rel, ForkNumber fork, Page page)
{
BMMetaPage metapage = (BMMetaPage) PageGetContents(page);
xl_bm_metapage* xlMeta;
XLogRecPtr recptr;
xlMeta = (xl_bm_metapage *)
palloc(MAXALIGN(sizeof(xl_bm_metapage)));
xlMeta->bm_node = rel->rd_node;
xlMeta->bm_fork = fork;
xlMeta->bm_lov_heapId = metapage->bm_lov_heapId;
xlMeta->bm_lov_indexId = metapage->bm_lov_indexId;
xlMeta->bm_lov_lastpage = metapage->bm_lov_lastpage;
XLogBeginInsert();
XLogRegisterData((char*)xlMeta, MAXALIGN(sizeof(xl_bm_metapage)));
recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_META);
PageSetLSN(page, recptr);
pfree(xlMeta);
}
/*
* _bitmap_log_bitmap_lastwords() -- log the last two words in a bitmap.
*/
void
_bitmap_log_bitmap_lastwords(Relation rel, Buffer lovBuffer,
OffsetNumber lovOffset, BMLOVItem lovItem)
{
xl_bm_bitmap_lastwords xlLastwords;
XLogRecPtr recptr;
xlLastwords.bm_node = rel->rd_node;
xlLastwords.bm_last_compword = lovItem->bm_last_compword;
xlLastwords.bm_last_word = lovItem->bm_last_word;
xlLastwords.lov_words_header = lovItem->lov_words_header;
xlLastwords.bm_last_setbit = lovItem->bm_last_setbit;
xlLastwords.bm_last_tid_location = lovItem->bm_last_tid_location;
xlLastwords.bm_lov_blkno = BufferGetBlockNumber(lovBuffer);
xlLastwords.bm_lov_offset = lovOffset;
XLogBeginInsert();
XLogRegisterData((char*)&xlLastwords, sizeof(xl_bm_bitmap_lastwords));
XLogRegisterBuffer(0, lovBuffer, REGBUF_STANDARD);
recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_BITMAP_LASTWORDS);
PageSetLSN(BufferGetPage(lovBuffer), recptr);
/*
* WAL consistency checking
*/
#ifdef DUMP_BITMAPAM_INSERT_RECORDS
_dump_page("insert", XactLastRecEnd, &rel->rd_node, lovBuffer);
#endif
}
/*
* _bitmap_log_lovitem() -- log adding a new lov item to a lov page.
*/
void
_bitmap_log_lovitem(Relation rel, ForkNumber fork, Buffer lovBuffer, OffsetNumber offset,
BMLOVItem lovItem, Buffer metabuf, bool is_new_lov_blkno)
{
Page lovPage = BufferGetPage(lovBuffer);
xl_bm_lovitem xlLovItem;
XLogRecPtr recptr;
Assert(BufferGetBlockNumber(lovBuffer) > 0);
xlLovItem.bm_node = rel->rd_node;
xlLovItem.bm_fork = fork;
xlLovItem.bm_lov_blkno = BufferGetBlockNumber(lovBuffer);
xlLovItem.bm_lov_offset = offset;
memcpy(&(xlLovItem.bm_lovItem), lovItem, sizeof(BMLOVItemData));
xlLovItem.bm_is_new_lov_blkno = is_new_lov_blkno;
XLogBeginInsert();
XLogRegisterData((char*)&xlLovItem, sizeof(xl_bm_lovitem));
XLogRegisterBuffer(0, lovBuffer, REGBUF_STANDARD);
if (is_new_lov_blkno)
XLogRegisterBuffer(1, metabuf, 0);
recptr = XLogInsert(RM_BITMAP_ID,
XLOG_BITMAP_INSERT_LOVITEM);
if (is_new_lov_blkno)
{
Page metapage = BufferGetPage(metabuf);
PageSetLSN(metapage, recptr);
}
PageSetLSN(lovPage, recptr);
elog(DEBUG1, "Insert a new lovItem at (blockno, offset): (%d,%d)",
BufferGetBlockNumber(lovBuffer), offset);
}
/*
* _bitmap_log_bitmapwords() -- log new bitmap words to be inserted.
*/
void
_bitmap_log_bitmapwords(Relation rel,
BMTIDBuffer *buf,
bool init_first_page, List *xl_bm_bitmapword_pages, List *bitmapBuffers,
Buffer lovBuffer, OffsetNumber lovOffset, uint64 tidnum)
{
XLogRecPtr recptr;
int rdata_no = 0;
Page lovPage = BufferGetPage(lovBuffer);
xl_bm_bitmapwords xlBitmapWords;
ListCell *lcp;
ListCell *lcb;
bool init_page;
int num_bm_pages = list_length(xl_bm_bitmapword_pages);
Assert(list_length(bitmapBuffers) == num_bm_pages);
if (num_bm_pages > MAX_BITMAP_PAGES_PER_INSERT)
elog(ERROR, "too many bitmap pages in one insert batch");
MemSet(&xlBitmapWords, 0, sizeof(xlBitmapWords));
xlBitmapWords.bm_node = rel->rd_node;
xlBitmapWords.bm_num_pages = list_length(xl_bm_bitmapword_pages);
xlBitmapWords.bm_init_first_page = init_first_page;
xlBitmapWords.bm_lov_blkno = BufferGetBlockNumber(lovBuffer);
xlBitmapWords.bm_lov_offset = lovOffset;
xlBitmapWords.bm_last_compword = buf->last_compword;
xlBitmapWords.bm_last_word = buf->last_word;
xlBitmapWords.lov_words_header =
(buf->is_last_compword_fill) ? 2 : 0;
xlBitmapWords.bm_last_setbit = tidnum;
XLogBeginInsert();
XLogRegisterData((char *) &xlBitmapWords, sizeof(xl_bm_bitmapwords));
XLogRegisterBuffer(0, lovBuffer, REGBUF_STANDARD);
rdata_no = 1;
/* Write per-page structs */
init_page = init_first_page;
forboth(lcp, xl_bm_bitmapword_pages, lcb, bitmapBuffers)
{
xl_bm_bitmapwords_perpage *xlBitmapwordsPage = lfirst(lcp);
Buffer bitmapBuffer = lfirst_int(lcb);
Page bitmapPage = BufferGetPage(bitmapBuffer);
BMBitmap bitmap;
bitmap = (BMBitmap) PageGetContentsMaxAligned(bitmapPage);
Assert(BufferIsValid(bitmapBuffer));
XLogRegisterBuffer(rdata_no, bitmapBuffer, 0);
XLogRegisterBufData(rdata_no, (char *) xlBitmapwordsPage, sizeof(xl_bm_bitmapwords_perpage));
XLogRegisterBufData(rdata_no, (char *) &bitmap->hwords[xlBitmapwordsPage->bmp_start_hword_no],
xlBitmapwordsPage->bmp_num_hwords * sizeof(BM_HRL_WORD));
XLogRegisterBufData(rdata_no, (char *) &bitmap->cwords[xlBitmapwordsPage->bmp_start_cword_no],
xlBitmapwordsPage->bmp_num_cwords * sizeof(BM_HRL_WORD));
rdata_no++;
}
recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_INSERT_WORDS);
foreach(lcb, bitmapBuffers)
{
Buffer bitmapBuffer = lfirst_int(lcb);
PageSetLSN(BufferGetPage(bitmapBuffer), recptr);
}
PageSetLSN(lovPage, recptr);
/*
* WAL consistency checking
*/
#ifdef DUMP_BITMAPAM_INSERT_RECORDS
_dump_page("insert", XactLastRecEnd, &rel->rd_node, lovBuffer);
foreach(lcb, bitmapBuffers)
{
_dump_page("insert", XactLastRecEnd, &rel->rd_node, (Buffer) lfirst_int(lcb));
}
#endif
}
/*
* _bitmap_log_updateword() -- log updating a single word in a given
* bitmap page.
*/
void
_bitmap_log_updateword(Relation rel, Buffer bitmapBuffer, int word_no)
{
Page bitmapPage;
BMBitmap bitmap;
xl_bm_updateword xlBitmapWord;
XLogRecPtr recptr;
bitmapPage = BufferGetPage(bitmapBuffer);
bitmap = (BMBitmap) PageGetContentsMaxAligned(bitmapPage);
xlBitmapWord.bm_node = rel->rd_node;
xlBitmapWord.bm_blkno = BufferGetBlockNumber(bitmapBuffer);
xlBitmapWord.bm_word_no = word_no;
xlBitmapWord.bm_cword = bitmap->cwords[word_no];
xlBitmapWord.bm_hword = bitmap->hwords[word_no/BM_HRL_WORD_SIZE];
elog(DEBUG1, "_bitmap_log_updateword: (blkno, word_no, cword, hword)="
"(%d, %d, " INT64_FORMAT ", " INT64_FORMAT ")", xlBitmapWord.bm_blkno,
xlBitmapWord.bm_word_no, xlBitmapWord.bm_cword,
xlBitmapWord.bm_hword);
XLogBeginInsert();
XLogRegisterData((char*)&xlBitmapWord, sizeof(xl_bm_updateword));
XLogRegisterBuffer(0, bitmapBuffer, 0);
recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_UPDATEWORD);
PageSetLSN(bitmapPage, recptr);
}
/*
* _bitmap_log_updatewords() -- log updating bitmap words in one or
* two bitmap pages.
*
* If nextBuffer is Invalid, we only update one page.
*
*/
void
_bitmap_log_updatewords(Relation rel,
Buffer lovBuffer, OffsetNumber lovOffset,
Buffer firstBuffer, Buffer secondBuffer,
bool new_lastpage)
{
Page firstPage = NULL;
Page secondPage = NULL;
BMBitmap firstBitmap;
BMBitmap secondBitmap;
BMBitmapOpaque firstOpaque;
BMBitmapOpaque secondOpaque;
xl_bm_updatewords xlBitmapWords;
XLogRecPtr recptr;
firstPage = BufferGetPage(firstBuffer);
firstBitmap = (BMBitmap) PageGetContentsMaxAligned(firstPage);
firstOpaque = (BMBitmapOpaque)PageGetSpecialPointer(firstPage);
xlBitmapWords.bm_two_pages = false;
xlBitmapWords.bm_first_blkno = BufferGetBlockNumber(firstBuffer);
memcpy(&xlBitmapWords.bm_first_cwords,
firstBitmap->cwords,
BM_NUM_OF_HRL_WORDS_PER_PAGE * sizeof(BM_HRL_WORD));
memcpy(&xlBitmapWords.bm_first_hwords,
firstBitmap->hwords,
BM_NUM_OF_HEADER_WORDS * sizeof(BM_HRL_WORD));
xlBitmapWords.bm_first_last_tid = firstOpaque->bm_last_tid_location;
xlBitmapWords.bm_first_num_cwords =
firstOpaque->bm_hrl_words_used;
xlBitmapWords.bm_next_blkno = firstOpaque->bm_bitmap_next;
if (BufferIsValid(secondBuffer))
{
secondPage = BufferGetPage(secondBuffer);
secondBitmap = (BMBitmap) PageGetContentsMaxAligned(secondPage);
secondOpaque = (BMBitmapOpaque)PageGetSpecialPointer(secondPage);
xlBitmapWords.bm_two_pages = true;
xlBitmapWords.bm_second_blkno = BufferGetBlockNumber(secondBuffer);
memcpy(&xlBitmapWords.bm_second_cwords,
secondBitmap->cwords,
BM_NUM_OF_HRL_WORDS_PER_PAGE * sizeof(BM_HRL_WORD));
memcpy(&xlBitmapWords.bm_second_hwords,
secondBitmap->hwords,
BM_NUM_OF_HEADER_WORDS * sizeof(BM_HRL_WORD));
xlBitmapWords.bm_second_last_tid = secondOpaque->bm_last_tid_location;
xlBitmapWords.bm_second_num_cwords =
secondOpaque->bm_hrl_words_used;
xlBitmapWords.bm_next_blkno = secondOpaque->bm_bitmap_next;
}
xlBitmapWords.bm_node = rel->rd_node;
xlBitmapWords.bm_lov_blkno = BufferGetBlockNumber(lovBuffer);
xlBitmapWords.bm_lov_offset = lovOffset;
xlBitmapWords.bm_new_lastpage = new_lastpage;
XLogBeginInsert();
XLogRegisterData((char*)&xlBitmapWords, sizeof(xl_bm_updatewords));
XLogRegisterBuffer(0, firstBuffer, 0);
if (BufferIsValid(secondBuffer))
XLogRegisterBuffer(1, secondBuffer, 0);
if (new_lastpage)
{
if (!BufferIsValid(secondBuffer))
XLogRegisterBuffer(1, lovBuffer, REGBUF_STANDARD);
else
XLogRegisterBuffer(2, lovBuffer, REGBUF_STANDARD);
}
recptr = XLogInsert(RM_BITMAP_ID, XLOG_BITMAP_UPDATEWORDS);
PageSetLSN(firstPage, recptr);
if (BufferIsValid(secondBuffer))
{
PageSetLSN(secondPage, recptr);
}
if (new_lastpage)
{
Page lovPage = BufferGetPage(lovBuffer);
PageSetLSN(lovPage, recptr);
}
}
/*
* WAL consistency checking helper.
*
* This can be used to dump an image of a page to a file, after inserting
* or replaying a WAL record. The output is *extremely* voluminous, but
* it's a very useful tool for tracking WAL-related bugs. To use, create
* a cluster with mirroring enabled. Add _dump_page() calls in the routine
* that writes a certain WAL record type, and in the corresponding WAL
* replay routine. Run a test workload. This produces a bmdump_* file
* in the master and the mirror. Run 'diff' to compare them: if the WAL
* replay recreated the same changes that were made on the master, the
* files should be identical.
*/
#ifdef DUMP_BITMAPAM_INSERT_RECORDS
#include "cdb/cdbvars.h"
FILE *dump_file = NULL;
void
_dump_page(char *file, XLogRecPtr recptr, RelFileNode *relfilenode, Buffer buf)
{
int i;
unsigned char *p;
int zerossince = 0;
if (!dump_file)
{
dump_file = fopen(psprintf("bmdump_%d_%s", GpIdentity.segindex, file), "a");
if (!dump_file)
{
elog(WARNING, "could not open dump file %s", file);
return;
}
}
fprintf(dump_file, "LSN %X/%08X relfilenode %u/%u/%u blk %u\n",
(uint32) (recptr >> 32), (uint32) recptr,
relfilenode->spcNode,
relfilenode->dbNode,
relfilenode->relNode,
BufferGetBlockNumber(buf));
p = (unsigned char *) BufferGetPage(buf);
zerossince = 0;
for (i = 0; i < BLCKSZ; i+=32)
{
int j;
bool allzeros;
allzeros = true;
for (j = 0; j < 32; j++)
{
if (p[i+j] != 0)
{
allzeros = false;
break;
}
}
if (allzeros)
continue;
if (zerossince < i)
{
fprintf(dump_file, "LSN %X/%08X %04x-%04x: zeros\n",
(uint32) (recptr >> 32), (uint32) recptr,
zerossince, i);
}
fprintf(dump_file,
"LSN %X/%08X %04x: "
"%02x%02x%02x%02x %02x%02x%02x%02x "
"%02x%02x%02x%02x %02x%02x%02x%02x "
"%02x%02x%02x%02x %02x%02x%02x%02x "
"%02x%02x%02x%02x %02x%02x%02x%02x\n",
(uint32) (recptr >> 32), (uint32) recptr, i,
p[i+ 0], p[i+ 1], p[i+ 2], p[i+ 3], p[i+ 4], p[i+ 5], p[i+ 6], p[i+ 7],
p[i+ 8], p[i+ 9], p[i+10], p[i+11], p[i+12], p[i+13], p[i+14], p[i+15],
p[i+16], p[i+17], p[i+18], p[i+19], p[i+20], p[i+21], p[i+22], p[i+23],
p[i+24], p[i+25], p[i+26], p[i+27], p[i+28], p[i+29], p[i+30], p[i+31]);
zerossince = i+32;
}
if (zerossince != BLCKSZ)
{
fprintf(dump_file, "LSN %X/%08X %02x-%02x: zeros\n",
(uint32) (recptr >> 32), (uint32) recptr,
zerossince, BLCKSZ);
}
fflush(dump_file);
}
#endif
相关信息
相关文章
0
赞
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
8、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦