greenplumn trgm_gin 源码

  • 2022-08-18
  • 浏览 (518)

greenplumn trgm_gin 代码

文件路径:/contrib/pg_trgm/trgm_gin.c

/*
 * contrib/pg_trgm/trgm_gin.c
 */
#include "postgres.h"

#include "trgm.h"

#include "access/gin.h"
#include "access/stratnum.h"
#include "fmgr.h"


PG_FUNCTION_INFO_V1(gin_extract_trgm);
PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
PG_FUNCTION_INFO_V1(gin_trgm_consistent);
PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);

/*
 * This function can only be called if a pre-9.1 version of the GIN operator
 * class definition is present in the catalogs (probably as a consequence
 * of upgrade-in-place).  Cope.
 */
Datum
gin_extract_trgm(PG_FUNCTION_ARGS)
{
	if (PG_NARGS() == 3)
		return gin_extract_value_trgm(fcinfo);
	if (PG_NARGS() == 7)
		return gin_extract_query_trgm(fcinfo);
	elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
	PG_RETURN_NULL();
}

Datum
gin_extract_value_trgm(PG_FUNCTION_ARGS)
{
	text	   *val = (text *) PG_GETARG_TEXT_PP(0);
	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
	Datum	   *entries = NULL;
	TRGM	   *trg;
	int32		trglen;

	*nentries = 0;

	trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
	trglen = ARRNELEM(trg);

	if (trglen > 0)
	{
		trgm	   *ptr;
		int32		i;

		*nentries = trglen;
		entries = (Datum *) palloc(sizeof(Datum) * trglen);

		ptr = GETARR(trg);
		for (i = 0; i < trglen; i++)
		{
			int32		item = trgm2int(ptr);

			entries[i] = Int32GetDatum(item);
			ptr++;
		}
	}

	PG_RETURN_POINTER(entries);
}

Datum
gin_extract_query_trgm(PG_FUNCTION_ARGS)
{
	text	   *val = (text *) PG_GETARG_TEXT_PP(0);
	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
	StrategyNumber strategy = PG_GETARG_UINT16(2);

	/* bool   **pmatch = (bool **) PG_GETARG_POINTER(3); */
	Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);

	/* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
	Datum	   *entries = NULL;
	TRGM	   *trg;
	int32		trglen;
	trgm	   *ptr;
	TrgmPackedGraph *graph;
	int32		i;

	switch (strategy)
	{
		case SimilarityStrategyNumber:
		case WordSimilarityStrategyNumber:
		case StrictWordSimilarityStrategyNumber:
			trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
			break;
		case ILikeStrategyNumber:
#ifndef IGNORECASE
			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
#endif
			/* FALL THRU */
		case LikeStrategyNumber:

			/*
			 * For wildcard search we extract all the trigrams that every
			 * potentially-matching string must include.
			 */
			trg = generate_wildcard_trgm(VARDATA_ANY(val),
										 VARSIZE_ANY_EXHDR(val));
			break;
		case RegExpICaseStrategyNumber:
#ifndef IGNORECASE
			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
#endif
			/* FALL THRU */
		case RegExpStrategyNumber:
			trg = createTrgmNFA(val, PG_GET_COLLATION(),
								&graph, CurrentMemoryContext);
			if (trg && ARRNELEM(trg) > 0)
			{
				/*
				 * Successful regex processing: store NFA-like graph as
				 * extra_data.  GIN API requires an array of nentries
				 * Pointers, but we just put the same value in each element.
				 */
				trglen = ARRNELEM(trg);
				*extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
				for (i = 0; i < trglen; i++)
					(*extra_data)[i] = (Pointer) graph;
			}
			else
			{
				/* No result: have to do full index scan. */
				*nentries = 0;
				*searchMode = GIN_SEARCH_MODE_ALL;
				PG_RETURN_POINTER(entries);
			}
			break;
		default:
			elog(ERROR, "unrecognized strategy number: %d", strategy);
			trg = NULL;			/* keep compiler quiet */
			break;
	}

	trglen = ARRNELEM(trg);
	*nentries = trglen;

	if (trglen > 0)
	{
		entries = (Datum *) palloc(sizeof(Datum) * trglen);
		ptr = GETARR(trg);
		for (i = 0; i < trglen; i++)
		{
			int32		item = trgm2int(ptr);

			entries[i] = Int32GetDatum(item);
			ptr++;
		}
	}

	/*
	 * If no trigram was extracted then we have to scan all the index.
	 */
	if (trglen == 0)
		*searchMode = GIN_SEARCH_MODE_ALL;

	PG_RETURN_POINTER(entries);
}

Datum
gin_trgm_consistent(PG_FUNCTION_ARGS)
{
	bool	   *check = (bool *) PG_GETARG_POINTER(0);
	StrategyNumber strategy = PG_GETARG_UINT16(1);

	/* text    *query = PG_GETARG_TEXT_PP(2); */
	int32		nkeys = PG_GETARG_INT32(3);
	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
	bool		res;
	int32		i,
				ntrue;
	double		nlimit;

	/* All cases served by this function are inexact */
	*recheck = true;

	switch (strategy)
	{
		case SimilarityStrategyNumber:
		case WordSimilarityStrategyNumber:
		case StrictWordSimilarityStrategyNumber:
			nlimit = index_strategy_get_limit(strategy);

			/* Count the matches */
			ntrue = 0;
			for (i = 0; i < nkeys; i++)
			{
				if (check[i])
					ntrue++;
			}

			/*--------------------
			 * If DIVUNION is defined then similarity formula is:
			 * c / (len1 + len2 - c)
			 * where c is number of common trigrams and it stands as ntrue in
			 * this code.  Here we don't know value of len2 but we can assume
			 * that c (ntrue) is a lower bound of len2, so upper bound of
			 * similarity is:
			 * c / (len1 + c - c)  => c / len1
			 * If DIVUNION is not defined then similarity formula is:
			 * c / max(len1, len2)
			 * And again, c (ntrue) is a lower bound of len2, but c <= len1
			 * just by definition and, consequently, upper bound of
			 * similarity is just c / len1.
			 * So, independently on DIVUNION the upper bound formula is the same.
			 */
			res = (nkeys == 0) ? false :
				(((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
			break;
		case ILikeStrategyNumber:
#ifndef IGNORECASE
			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
#endif
			/* FALL THRU */
		case LikeStrategyNumber:
			/* Check if all extracted trigrams are presented. */
			res = true;
			for (i = 0; i < nkeys; i++)
			{
				if (!check[i])
				{
					res = false;
					break;
				}
			}
			break;
		case RegExpICaseStrategyNumber:
#ifndef IGNORECASE
			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
#endif
			/* FALL THRU */
		case RegExpStrategyNumber:
			if (nkeys < 1)
			{
				/* Regex processing gave no result: do full index scan */
				res = true;
			}
			else
				res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
										 check);
			break;
		default:
			elog(ERROR, "unrecognized strategy number: %d", strategy);
			res = false;		/* keep compiler quiet */
			break;
	}

	PG_RETURN_BOOL(res);
}

/*
 * In all cases, GIN_TRUE is at least as favorable to inclusion as
 * GIN_MAYBE. If no better option is available, simply treat
 * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
 * consistent function.
 */
Datum
gin_trgm_triconsistent(PG_FUNCTION_ARGS)
{
	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
	StrategyNumber strategy = PG_GETARG_UINT16(1);

	/* text    *query = PG_GETARG_TEXT_PP(2); */
	int32		nkeys = PG_GETARG_INT32(3);
	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
	GinTernaryValue res = GIN_MAYBE;
	int32		i,
				ntrue;
	bool	   *boolcheck;
	double		nlimit;

	switch (strategy)
	{
		case SimilarityStrategyNumber:
		case WordSimilarityStrategyNumber:
		case StrictWordSimilarityStrategyNumber:
			nlimit = index_strategy_get_limit(strategy);

			/* Count the matches */
			ntrue = 0;
			for (i = 0; i < nkeys; i++)
			{
				if (check[i] != GIN_FALSE)
					ntrue++;
			}

			/*
			 * See comment in gin_trgm_consistent() about * upper bound
			 * formula
			 */
			res = (nkeys == 0)
				? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
							   ? GIN_MAYBE : GIN_FALSE);
			break;
		case ILikeStrategyNumber:
#ifndef IGNORECASE
			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
#endif
			/* FALL THRU */
		case LikeStrategyNumber:
			/* Check if all extracted trigrams are presented. */
			res = GIN_MAYBE;
			for (i = 0; i < nkeys; i++)
			{
				if (check[i] == GIN_FALSE)
				{
					res = GIN_FALSE;
					break;
				}
			}
			break;
		case RegExpICaseStrategyNumber:
#ifndef IGNORECASE
			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
#endif
			/* FALL THRU */
		case RegExpStrategyNumber:
			if (nkeys < 1)
			{
				/* Regex processing gave no result: do full index scan */
				res = GIN_MAYBE;
			}
			else
			{
				/*
				 * As trigramsMatchGraph implements a monotonic boolean
				 * function, promoting all GIN_MAYBE keys to GIN_TRUE will
				 * give a conservative result.
				 */
				boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
				for (i = 0; i < nkeys; i++)
					boolcheck[i] = (check[i] != GIN_FALSE);
				if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
										boolcheck))
					res = GIN_FALSE;
				pfree(boolcheck);
			}
			break;
		default:
			elog(ERROR, "unrecognized strategy number: %d", strategy);
			res = GIN_FALSE;	/* keep compiler quiet */
			break;
	}

	/* All cases served by this function are inexact */
	Assert(res != GIN_TRUE);
	PG_RETURN_GIN_TERNARY_VALUE(res);
}

相关信息

greenplumn 源码目录

相关文章

greenplumn trgm 源码

greenplumn trgm_gist 源码

greenplumn trgm_op 源码

greenplumn trgm_regexp 源码

0  赞