greenplumn bitmap 源码
greenplumn bitmap 代码
文件路径:/src/include/access/bitmap.h
/*-------------------------------------------------------------------------
*
* bitmap.h
* header file for on-disk bitmap index access method implementation.
*
* Copyright (c) 2006-2008, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/include/access/bitmap.h
*
*-------------------------------------------------------------------------
*/
#ifndef BITMAP_H
#define BITMAP_H
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#define BM_READ BUFFER_LOCK_SHARE
#define BM_WRITE BUFFER_LOCK_EXCLUSIVE
#define BM_NOLOCK (-1)
/*
* The size in bits of a hybrid run-length(HRL) word.
* If you change this, you should change the type for
* a HRL word as well.
*/
#define BM_HRL_WORD_SIZE 64
/* the type for a HRL word */
typedef uint64 BM_HRL_WORD;
#define BM_HRL_WORD_LEFTMOST (BM_HRL_WORD_SIZE-1)
#define BITMAP_VERSION 2
#define BITMAP_MAGIC 0x4249544D
/*
* Metapage, always the first page (page 0) in the index.
*
* This page stores some meta-data information about this index.
*/
typedef struct BMMetaPageData
{
/*
* The magic and version of the bitmap index. Using the Oid type is to
* overcome the problem that the version is not stored in
* the index before GPDB 3.4.
*/
Oid bm_magic;
Oid bm_version; /* the version of the index */
/*
* The relation ids for a heap and a btree on this heap. They are
* used to speed up finding the bitmap vector for given attribute
* value(s), see the comments for LOV pages below for more
* information. We consider these as the metadata for LOV pages.
*/
Oid bm_lov_heapId; /* the relation id for the heap */
Oid bm_lov_indexId; /* the relation id for the index */
/* the block number for the last LOV pages. */
BlockNumber bm_lov_lastpage;
} BMMetaPageData;
typedef BMMetaPageData *BMMetaPage;
/*
* The meta page is always the first block of the index
*/
#define BM_METAPAGE 0
/*
* The maximum number of heap tuples in one page that is considered
* in the bitmap index. We set this number to be a multiplication
* of BM_HRL_WORD_SIZE because we can then bits for heap
* tuples in different heap pages are stored in different words.
* This makes it easier during the search.
*
* Because the tid value for AO tables can be more than MaxHeapTuplesPerPage,
* we use the maximum possible offset number here.
*/
#define BM_MAX_TUPLES_PER_PAGE \
(((((1 << (8 * sizeof(OffsetNumber))) - 1) / BM_HRL_WORD_SIZE) + 1) * \
BM_HRL_WORD_SIZE)
/*
* LOV (List Of Values) page -- pages to store a list of distinct
* values for attribute(s) to be indexed, some metadata related to
* their corresponding bitmap vectors, and the pointers to their
* bitmap vectors. For each distinct value, there is a BMLOVItemData
* associated with it. A LOV page maintains an array of BMLOVItemData
* instances, called lov items.
*
* To speed up finding the lov item for a given value, we
* create a heap to maintain all distinct values along with the
* block numbers and offset numbers for their lov items in LOV pages.
* That is, there are total "<number_of_attributes> + 2" attributes
* in this new heap. Along with this heap, we also create a new btree
* index on this heap using attribute(s) as btree keys. In this way,
* for any given value, we search this btree to find
* the block number and offset number for its corresponding lov item.
*/
/*
* The first LOV page is reserved for NULL keys
*/
#define BM_LOV_STARTPAGE 1
/*
* Items in a LOV page.
*
* Each item is corresponding to a distinct value for attribute(s)
* to be indexed. For multi-column indexes on (a_1,a_2,...,a_n), we say
* two values (l_1,l_2,...,l_n) and (k_1,k_2,...,k_n) for (a_1,a_2,...,a_n)
* are the same if and only if for all i, l_i=k_i.
*
*/
typedef struct BMLOVItemData
{
/* the first page and last page of the bitmap vector. */
BlockNumber bm_lov_head;
BlockNumber bm_lov_tail;
/*
* Additional information to be used to append new bits into
* existing bitmap vector that this distinct value is associated with.
* The following two words do not store in the regular bitmap page,
* defined below.
*/
/* the last complete word in its bitmap vector. */
BM_HRL_WORD bm_last_compword;
/*
* the last word in its bitmap vector. This word is not
* a complete word. If a new appending bit makes this word
* to be complete, this word will merge with bm_last_compword.
*/
BM_HRL_WORD bm_last_word;
/*
* the tid location for the last bit stored in bm_last_compword.
* A tid location represents the index position for a bit in a
* bitmap vector, which is conceptualized as an array
* of bits. This value -- the index position starts from 1, and
* is calculated through (block#)*BM_MAX_TUPLES_PER_PAGE + (offset#),
* where (block#) and (offset#) are from the heap tuple ctid.
* This value is used while updating a bit in the middle of
* its bitmap vector. When moving the last complete word to
* the bitmap page, this value will also be written to that page.
* Each bitmap page maintains a similar value -- the tid location
* for the last bit stored in that page. This will help us
* know the range of tid locations for bits in a bitmap page
* without decompressing all bits.
*/
uint64 bm_last_tid_location;
/*
* the tid location of the last bit whose value is 1 (a set bit).
* Each bitmap vector will be visited only when there is a new
* set bit to be appended/updated. In the appending case, a new
* tid location is presented. With this value, we can calculate
* how many bits are 0s between this new set bit and the previous
* set bit.
*/
uint64 bm_last_setbit;
/*
* Only two least-significant bits in this byte are used.
*
* If the first least-significant bit is 1, then it represents
* that bm_last_word is a fill word. If the second least-significant
* bit is 1, it represents that bm_last_compword is a fill word.
*/
uint8 lov_words_header;
} BMLOVItemData;
typedef BMLOVItemData *BMLOVItem;
#define BM_MAX_LOVITEMS_PER_PAGE \
((BLCKSZ - sizeof(PageHeaderData)) / sizeof(BMLOVItemData))
#define BM_LAST_WORD_BIT 1
#define BM_LAST_COMPWORD_BIT 2
#define BM_LASTWORD_IS_FILL(lov) \
(bool) (lov->lov_words_header & BM_LAST_WORD_BIT ? true : false)
#define BM_LAST_COMPWORD_IS_FILL(lov) \
(bool) (lov->lov_words_header & BM_LAST_COMPWORD_BIT ? true : false)
#define BM_BOTH_LOV_WORDS_FILL(lov) \
(BM_LASTWORD_IS_FILL(lov) && BM_LAST_COMPWORD_IS_FILL(lov))
/*
* Bitmap page -- pages to store bits in a bitmap vector.
*
* Each bitmap page stores two parts of information: header words and
* content words. Each bit in the header words is corresponding to
* a word in the content words. If a bit in the header words is 1,
* then its corresponding content word is a compressed word. Otherwise,
* it is a literal word.
*
* If a content word is a fill word, it means that there is a sequence
* of 0 bits or 1 bits. The most significant bit in this content word
* represents the bits in this sequence are 0s or 1s. The rest of bits
* stores the value of "the number of bits / BM_HRL_WORD_SIZE".
*/
/*
* Opaque data for a bitmap page.
*/
typedef struct BMBitmapOpaqueData
{
uint32 bm_hrl_words_used; /* the number of words used */
BlockNumber bm_bitmap_next; /* the next page for this bitmap */
/*
* the tid location for the last bit in this page.
*/
uint64 bm_last_tid_location;
} BMBitmapOpaqueData;
typedef BMBitmapOpaqueData *BMBitmapOpaque;
/*
* Approximately 4078 words per 8K page
*/
#define BM_MAX_NUM_OF_HRL_WORDS_PER_PAGE \
((BLCKSZ - \
MAXALIGN(sizeof(PageHeaderData)) - \
MAXALIGN(sizeof(BMBitmapOpaqueData)))/sizeof(BM_HRL_WORD))
/* approx 255 */
#define BM_MAX_NUM_OF_HEADER_WORDS \
(((BM_MAX_NUM_OF_HRL_WORDS_PER_PAGE-1)/BM_HRL_WORD_SIZE) + 1)
/*
* To make the last header word a complete word, we limit this number to
* the multiplication of the word size.
*/
#define BM_NUM_OF_HRL_WORDS_PER_PAGE \
(((BM_MAX_NUM_OF_HRL_WORDS_PER_PAGE - \
BM_MAX_NUM_OF_HEADER_WORDS)/BM_HRL_WORD_SIZE) * BM_HRL_WORD_SIZE)
#define BM_NUM_OF_HEADER_WORDS \
(((BM_NUM_OF_HRL_WORDS_PER_PAGE-1)/BM_HRL_WORD_SIZE) + 1)
/*
* A page of a compressed bitmap
*/
typedef struct BMBitmapData
{
BM_HRL_WORD hwords[BM_NUM_OF_HEADER_WORDS];
BM_HRL_WORD cwords[BM_NUM_OF_HRL_WORDS_PER_PAGE];
} BMBitmapData;
typedef BMBitmapData *BMBitmap;
/*
* The number of tid locations to be found at once during query processing.
*/
#define BM_BATCH_TIDS 16*1024
/*
* the maximum number of words to be retrieved during BitmapIndexScan.
*/
#define BM_MAX_WORDS BM_NUM_OF_HRL_WORDS_PER_PAGE*4
/* Some macros for manipulating a bitmap word. */
#define LITERAL_ALL_ZERO 0
#define LITERAL_ALL_ONE ((BM_HRL_WORD)(~((BM_HRL_WORD)0)))
#define FILL_MASK ~(((BM_HRL_WORD)1) << (BM_HRL_WORD_SIZE - 1))
#define BM_MAKE_FILL_WORD(bit, length) \
((((BM_HRL_WORD)bit) << (BM_HRL_WORD_SIZE-1)) | (length))
#define FILL_LENGTH(w) (((BM_HRL_WORD)(w)) & FILL_MASK)
#define MAX_FILL_LENGTH ((((BM_HRL_WORD)1)<<(BM_HRL_WORD_SIZE-1))-1)
/* get the left most bit of the word */
#define GET_FILL_BIT(w) (((BM_HRL_WORD)(w))>>BM_HRL_WORD_LEFTMOST)
/*
* Given a word number, determine the bit position it that holds in its
* header word.
*/
#define WORDNO_GET_HEADER_BIT(cw_no) \
((BM_HRL_WORD)1 << (BM_HRL_WORD_SIZE - 1 - ((cw_no) % BM_HRL_WORD_SIZE)))
/* A simplified interface to IS_FILL_WORD */
#define CUR_WORD_IS_FILL(b) \
IS_FILL_WORD(b->hwords, b->startNo)
/*
* Calculate the number of header words we need given the number of
* content words
*/
#define BM_CALC_H_WORDS(c_words) \
(c_words == 0 ? c_words : (((c_words - 1)/BM_HRL_WORD_SIZE) + 1))
/*
* Convert an ItemPointer to and from an integer representation
*/
#define BM_IPTR_TO_INT(iptr) \
((uint64)ItemPointerGetBlockNumber(iptr) * BM_MAX_TUPLES_PER_PAGE + \
(uint64)ItemPointerGetOffsetNumber(iptr))
#define BM_INT_GET_BLOCKNO(i) \
((i - 1)/BM_MAX_TUPLES_PER_PAGE)
#define BM_INT_GET_OFFSET(i) \
(((i - 1) % BM_MAX_TUPLES_PER_PAGE) + 1)
/*
* To see if the content word at wordno is a compressed word or not we must look
* in the header words. Each bit in the header words corresponds to a word
* amongst the content words. If the bit is 1, the word is compressed (i.e., it
* is a fill word) otherwise it is uncompressed.
*
* See src/backend/access/bitmap/README for more details
*/
static inline bool
IS_FILL_WORD(const BM_HRL_WORD *words, int16 wordno)
{
return (words[wordno / BM_HRL_WORD_SIZE] & WORDNO_GET_HEADER_BIT(wordno)) > 0;
}
/*
* GET_NUM_BITS() -- return the number of bits included in the given
* bitmap words.
*/
static inline uint64
GET_NUM_BITS(const BM_HRL_WORD *contentWords,
const BM_HRL_WORD *headerWords,
uint32 nwords)
{
uint64 nbits = 0;
uint32 i;
for (i = 0; i < nwords; i++)
{
if (IS_FILL_WORD(headerWords, i))
nbits += FILL_LENGTH(contentWords[i]) * BM_HRL_WORD_SIZE;
else
nbits += BM_HRL_WORD_SIZE;
}
return nbits;
}
/* reloptions.c */
#define BITMAP_MIN_FILLFACTOR 10
#define BITMAP_DEFAULT_FILLFACTOR 100
#endif
相关信息
相关文章
greenplumn appendonly_compaction 源码
greenplumn appendonly_visimap 源码
0
赞
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
8、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦