greenplumn SparseData 源码
greenplumn SparseData 代码
文件路径:/gpcontrib/gp_sparse_vector/SparseData.c
/*-------------------------------------------------------------------------
*
* SparseData.c
*
* Copyright (c) 2010, Greenplum Software
* Portions Copyright (c) 2013-Present VMware, Inc. or its affiliates.
*
*
* IDENTIFICATION
* gpcontrib/gp_sparse_vector/SparseData.c
*
*-------------------------------------------------------------------------
*/
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include "SparseData.h"
/*------------------------------------------------------------------------------
* Create a SparseData structure
*
* There are ways to do this:
* First way allocates empty dynamic StringInfo of unknown initial sizes
* Second way creates a copy of an existing SparseData
* Third way creates a SparseData with zero storage in it's StringInfos
* Fourth way creates a SparseData in place using pointers to the vals and
* index data
*/
SparseData makeSparseData(void)
{
/* Allocate the struct */
SparseData sdata = (SparseData)palloc(sizeof(SparseDataStruct));
/* Allocate the included elements */
sdata->vals = makeStringInfo();
sdata->index = makeStringInfo();
sdata->vals->len = 0;
sdata->index->len = 0;
sdata->vals->cursor = 0;
sdata->index->cursor = 0;
sdata->unique_value_count=0;
sdata->total_value_count=0;
sdata->type_of_data = FLOAT8OID;
return sdata;
}
SparseData makeEmptySparseData(void)
{
/* Allocate the struct */
SparseData sdata = makeSparseData();
/*
* Free the data area
*/
pfree(sdata->vals->data);
pfree(sdata->index->data);
sdata->vals->data = palloc(1);
sdata->index->data = palloc(1);
sdata->vals->maxlen=0;
sdata->index->maxlen=0;
return sdata;
}
SparseData makeInplaceSparseData(char *vals, char *index,
int datasize, int indexsize, Oid datatype,
int unique_value_count, int total_value_count)
{
SparseData sdata = makeEmptySparseData();
sdata->unique_value_count = unique_value_count;
sdata->total_value_count = total_value_count;
sdata->vals->data = vals;
sdata->vals->len = datasize;
sdata->vals->maxlen = sdata->vals->len;
sdata->index->data = index;
sdata->index->len = indexsize;
sdata->index->maxlen = sdata->index->len;
sdata->type_of_data = datatype;
return(sdata);
}
/*------------------------------------------------------------------------------
* Copy a SparseData structure including it's data and index
*/
SparseData makeSparseDataCopy(SparseData source_sdata)
{
/* Allocate the struct */
SparseData sdata = (SparseData)palloc(sizeof(SparseDataStruct));
/* Allocate the included elements */
sdata->vals = copyStringInfo(source_sdata->vals);
sdata->index = copyStringInfo(source_sdata->index);
sdata->type_of_data = source_sdata->type_of_data;
sdata->unique_value_count = source_sdata->unique_value_count;
sdata->total_value_count = source_sdata->total_value_count;
return sdata;
}
/*
* Create a SparseData of size dimension from a constant
*/
SparseData makeSparseDataFromDouble(double constant,int64 dimension)
{
char *bytestore=(char *)palloc(sizeof(char)*9);
SparseData sdata = float8arr_to_sdata(&constant,1);
int8_to_compword(dimension,bytestore); /* create compressed version of
int8 value */
pfree(sdata->index->data);
sdata->index->data = bytestore;
sdata->index->len = int8compstoragesize(bytestore);
sdata->total_value_count=dimension;
if (sdata->index->maxlen < int8compstoragesize(bytestore))
elog(ERROR, "internal error: double value exceeds sparse data max length");
return(sdata);
}
void
freeSparseData(SparseData sdata)
{
pfree(sdata->vals);
pfree(sdata->index);
pfree(sdata);
}
void
freeSparseDataAndData(SparseData sdata)
{
pfree(sdata->vals->data);
pfree(sdata->index->data);
freeSparseData(sdata);
}
/*------------------------------------------------------------------------------
* Copy a StringInfo structure
*/
StringInfo copyStringInfo(StringInfo sinfo) {
StringInfo result;
char *data;
if (sinfo->data == NULL)
{
data = NULL;
} else
{
data = (char *)palloc(sizeof(char)*(sinfo->len+1));
memcpy(data,sinfo->data,sinfo->len);
data[sinfo->len] = '\0';
}
result = makeStringInfoFromData(data,sinfo->len);
return (result);
}
/*
* make a StringInfo from a data pointer and length
*/
StringInfo makeStringInfoFromData(char *data,int len) {
StringInfo sinfo;
sinfo = (StringInfo)palloc(sizeof(StringInfoData));
sinfo->data = data;
sinfo->len = len;
sinfo->maxlen = len;
sinfo->cursor = 0;
return sinfo;
}
/*------------------------------------------------------------------------------
* Convert a double[] to a SparseData compressed format, representing duplicate
* values in the double[] by a count and a single value.
* Note that special double values like denormalized numbers and exceptions
* like NaN are treated like any other value - if there are duplicates, the
* value of the special number is preserved and they are counted.
*------------------------------------------------------------------------------
*/
SparseData float8arr_to_sdata(double *array, int count) {
char *run_val=(char *)array;
int64 run_len=1;
SparseData sdata = makeSparseData();
sdata->type_of_data=FLOAT8OID;
for (int i=1; i<count; i++) {
char *curr_val=(char *)(array+i);
if (memcmp(curr_val,run_val,sizeof(float8)))
{ /*run is interrupted, initiate new run */
/* package up the finished run */
add_run_to_sdata(run_val, run_len, sizeof(float8), sdata);
/* mark beginning of new run */
run_val=curr_val;
run_len=1;
} else
{ /* we're still in the same run */
run_len++;
}
}
add_run_to_sdata(run_val, run_len, sizeof(float8), sdata); /* package up the last run */
/* Add the final tallies */
sdata->unique_value_count = sdata->vals->len/sizeof(double);
sdata->total_value_count = count;
return sdata;
}
SparseData arr_to_sdata(char *array, size_t width, Oid type_of_data, int count) {
char *run_val=array;
int64 run_len=1;
SparseData sdata = makeSparseData();
sdata->type_of_data=type_of_data;
for (int i=1; i<count; i++) {
char *curr_val=array+i;
if (memcmp(curr_val,run_val,width))
{ /*run is interrupted, initiate new run */
/* package up the finished run */
add_run_to_sdata(run_val, run_len, width, sdata);
/* mark beginning of new run */
run_val=curr_val;
run_len=1;
} else
{ /* we're still in the same run */
run_len++;
}
}
add_run_to_sdata(run_val, run_len, width, sdata); /* package up the last run */
/* Add the final tallies */
sdata->unique_value_count = sdata->vals->len/width;
sdata->total_value_count = count;
return sdata;
}
/*------------------------------------------------------------------------------
* Convert a SparseData compressed structure to a float8[]
*------------------------------------------------------------------------------
*/
double *sdata_to_float8arr(SparseData sdata) {
double *array;
int j, aptr;
char *iptr;
if (sdata->type_of_data != FLOAT8OID)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("data type of SparseData is not FLOAT64")));
}
array = (double *) palloc(sizeof(double) * sdata->total_value_count);
iptr = sdata->index->data;
aptr = 0;
for (int i=0; i<sdata->unique_value_count; i++) {
for (j=0;j<compword_to_int8(iptr);j++,aptr++) {
array[aptr] = ((double *)(sdata->vals->data))[i];
}
iptr+=int8compstoragesize(iptr);
}
if ((aptr) != sdata->total_value_count)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("array size is incorrect, is: %d and should be %d",
aptr,sdata->total_value_count)));
}
return array;
}
int64 *sdata_index_to_int64arr(SparseData sdata)
{
char *iptr;
int64 *array_ix = (int64 *)palloc(
sizeof(int64)*(sdata->unique_value_count));
iptr = sdata->index->data;
for (int i=0; i<sdata->unique_value_count; i++,
iptr+=int8compstoragesize(iptr)) {
array_ix[i] = compword_to_int8(iptr);
}
return(array_ix);
}
void serializeSparseData(char *target, SparseData source)
{
/* SparseDataStruct header */
memcpy(target,source,SIZEOF_SPARSEDATAHDR);
/* Two StringInfo structures describing the data and index */
memcpy(SDATA_DATA_SINFO(target), source->vals,sizeof(StringInfoData));
memcpy(SDATA_INDEX_SINFO(target),source->index,sizeof(StringInfoData));
/* The unique data values */
memcpy(SDATA_VALS_PTR(target),source->vals->data,source->vals->maxlen);
/* The index values */
memcpy(SDATA_INDEX_PTR(target),source->index->data,source->index->maxlen);
/*
* Set pointers to the data areas of the serialized structure
* First the two StringInfo structures contained in the SparseData,
* then the data areas inside each of the two StringInfos.
*/
((SparseData)target)->vals = (StringInfo)SDATA_DATA_SINFO(target);
((SparseData)target)->index = (StringInfo)SDATA_INDEX_SINFO(target);
((StringInfo)(SDATA_DATA_SINFO(target)))->data = SDATA_VALS_PTR(target);
if (source->index->data != NULL)
{
((StringInfo)(SDATA_INDEX_SINFO(target)))->data = SDATA_INDEX_PTR(target);
} else
{
((StringInfo)(SDATA_INDEX_SINFO(target)))->data = NULL;
}
}
相关信息
相关文章
0
赞
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
8、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦