43#define VERSIONNUMBER 8
46#define ASCIIPARTHWORD 11
93 "Word, letters and digits",
97 "Scientific notation",
99 "Hyphenated word part, letters and digits",
100 "Hyphenated word part, all letters",
101 "Hyphenated word part, all ASCII",
105 "Hyphenated word, letters and digits",
106 "Hyphenated word, all ASCII",
107 "Hyphenated word, all letters",
221#define A_BINGO 0x0001
224#define A_RERUN 0x0008
225#define A_CLEAR 0x0010
226#define A_MERGE 0x0020
227#define A_CLRALL 0x0040
356 fprintf(stderr,
"closing parser\n");
376 fprintf(stderr,
"closing parser copy\n");
388#define p_iswhat(type, nonascii) \
391p_is##type(TParser *prs) \
393 pg_locale_t locale = pg_database_locale(); \
395 Assert(prs->state); \
396 wc = prs->pgwstr[prs->state->poschar]; \
397 if (prs->charmaxlen > 1 && locale->ctype_is_c && wc > 0x7f) \
399 return pg_isw##type(wc, pg_database_locale()); \
403p_isnot##type(TParser *prs) \
405 return !p_is##type(prs); \
428 return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) ==
c)) ? 1 : 0;
441 return p_iseq(prs, prs->
c);
447 return !p_iseq(prs, prs->
c);
459 return (
p_isascii(prs) && p_isalpha(prs)) ? 1 : 0;
472 if (ch <= 0x20 || ch >= 0x7F)
583 return (prs->
ignore) ? 1 : 0;
666 static const pg_wchar strange_letter[] = {
900 const pg_wchar *StopLow = strange_letter,
901 *StopHigh = strange_letter +
lengthof(strange_letter),
907 while (StopLow < StopHigh)
909 StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
910 if (*StopMiddle ==
c)
912 else if (*StopMiddle <
c)
913 StopLow = StopMiddle + 1;
915 StopHigh = StopMiddle;
1567 const char *state_name;
1572#define TPARSERSTATEACTION(state) \
1573 { CppConcat(action,state), state, CppAsString(state) }
1575#define TPARSERSTATEACTION(state) \
1576 { CppConcat(action,state), state }
1726 fprintf(stderr,
" matched rule %d flags%s%s%s%s%s%s%s%s%s%s%s\n",
1737 (item->
type > 0) ?
" type " :
"",
1887#define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
1888#define HLIDREPLACE(x) ( (x)==TAG_T )
1889#define HLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1890#define XMLHLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
1891#define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
1892#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
1901#define INTERESTINGWORD(j) \
1902 (prs->words[j].item && !prs->words[j].repeated)
1905#define BADENDPOINT(j) \
1906 ((NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword) && \
1907 !INTERESTINGWORD(j))
1942 for (
i = 0;
i < checkval->
len;
i++)
1953 data->allocated =
true;
1988 int *nextpos,
int *p,
int *q)
2008 foreach(lc, locations)
2013 for (
int i = 0;
i < pdata->
npos;
i++)
2016 int endp = pdata->
pos[
i];
2041 foreach(lc, locations)
2046 for (
int i = pdata->
npos - 1;
i >= 0;
i--)
2049 int startp = pdata->
pos[
i] - pdata->
width;
2069 posb =
Max(posb, pos);
2089 if (idxb < 0 && prs->words[
i].pos >= posb)
2098 if (idxb >= 0 && idxe >= idxb)
2110 ch.
len = idxe - idxb + 1;
2115 *nextpos = posb + 1;
2176 int *curlen,
int *poslen,
int max_words)
2228 int shortword,
int min_words,
2229 int max_words,
int max_fragments)
2246 int32 numcovers = 0,
2257 while (
hlCover(prs, query, locations, &nextpos, &p, &q))
2272 if (numcovers >= maxcovers)
2279 covers[numcovers].
curlen = curlen;
2280 covers[numcovers].
poslen = poslen;
2281 covers[numcovers].
chosen =
false;
2282 covers[numcovers].
excluded =
false;
2290 for (f = 0; f < max_fragments; f++)
2300 for (
i = 0;
i < numcovers;
i++)
2302 if (!covers[
i].chosen && !covers[
i].excluded &&
2303 (maxitems < covers[
i].poslen ||
2304 (maxitems == covers[
i].poslen &&
2305 minwords > covers[
i].curlen)))
2315 covers[minI].
chosen =
true;
2319 curlen = covers[minI].
curlen;
2321 if (curlen < max_words)
2324 maxstretch = (max_words - curlen) / 2;
2367 covers[minI].
curlen = curlen;
2372 for (
i = 0;
i < numcovers;
i++)
2393 for (
i = 0;
i < prs->
curwords && curlen < min_words;
i++)
2411 int shortword,
int min_words,
int max_words)
2419 bool bestcover =
false;
2430 while (
hlCover(prs, query, locations, &nextpos, &p, &q))
2441 for (
i = p;
i <= q && curlen < max_words;
i++)
2450 if (curlen < max_words)
2457 for (
i =
i - 1;
i < prs->
curwords && curlen < max_words;
i++)
2469 if (curlen >= min_words)
2472 if (curlen < min_words)
2478 for (
i = p - 1;
i >= 0;
i--)
2484 if (curlen >= max_words)
2488 if (curlen >= min_words)
2491 posb = (
i >= 0) ?
i : 0;
2502 for (; curlen > min_words;
i--)
2518 poscover = (posb <= p && pose >= q);
2527 if (poscover > bestcover ||
2528 (poscover == bestcover && poslen > bestlen) ||
2529 (poscover == bestcover && poslen == bestlen &&
2535 bestcover = poscover;
2547 for (
i = 0;
i < prs->
curwords && curlen < min_words;
i++)
2582 int max_fragments = 0;
2583 bool highlightall =
false;
2590 foreach(l, prsoptions)
2618 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2619 errmsg(
"unrecognized headline parameter: \"%s\"",
2626 if (min_words >= max_words)
2628 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2629 errmsg(
"%s must be less than %s",
"MinWords",
"MaxWords")));
2632 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2633 errmsg(
"%s must be positive",
"MinWords")));
2636 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2637 errmsg(
"%s must be >= 0",
"ShortWord")));
2638 if (max_fragments < 0)
2640 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2641 errmsg(
"%s must be >= 0",
"MaxFragments")));
2645 if (query->
size > 0)
2658 if (max_fragments == 0)
2659 mark_hl_words(prs, query, locations, highlightall, shortword,
2660 min_words, max_words);
2663 min_words, max_words, max_fragments);
void print(const void *obj)
#define fprintf(file, fmt, msg)
char * defGetString(DefElem *def)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define PG_GETARG_POINTER(n)
#define PG_RETURN_INT32(x)
#define PG_GETARG_INT32(n)
#define PG_RETURN_POINTER(x)
Assert(PointerIsAligned(start, uint64))
int GetDatabaseEncoding(void)
int pg_dsplen(const char *mbstr)
int pg_database_encoding_max_length(void)
int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
int pg_mblen(const char *mbstr)
char * pstrdup(const char *in)
void * repalloc(void *pointer, Size size)
void pfree(void *pointer)
void * palloc0(Size size)
#define CHECK_FOR_INTERRUPTS()
int32 pg_strtoint32(const char *s)
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
static XLogRecPtr startpos
int pg_strcasecmp(const char *s1, const char *s2)
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
void check_stack_depth(void)
HeadlineWordEntry * words
const TParserStateActionItem * pushedAtAction
struct TParserPosition * prev
const TParserStateActionItem * action
HeadlineWordEntry * words
#define PG_GETARG_TSQUERY(n)
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
List * TS_execute_locations(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
static const TParserStateActionItem actionTPS_InParseHyphen[]
static const TParserStateActionItem actionTPS_InXMLEntityNumFirst[]
static const TParserStateActionItem actionTPS_InHyphenWordFirst[]
static const TParserStateActionItem actionTPS_InXMLEntityFirst[]
static const TParserStateActionItem actionTPS_InHostFirstAN[]
static const TParserStateActionItem actionTPS_InHyphenNumWordPart[]
static const TParserStateActionItem actionTPS_InPathSecond[]
static const TParserStateActionItem actionTPS_InPathFirst[]
static const TParserStateActionItem actionTPS_InHostDomainSecond[]
static const TParserStateActionItem actionTPS_InCloseCommentFirst[]
static void SpecialFURL(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentEnd[]
static TSTernaryValue checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
void _make_compiler_happy(void)
static const TParserStateActionItem actionTPS_InURLPathStart[]
static const TParserStateActionItem actionTPS_InHostFirstDomain[]
static const TParserStateActionItem actionTPS_InHyphenDigitLookahead[]
static const TParserStateActionItem actionTPS_InHostDomain[]
static const TParserStateActionItem actionTPS_InVersion[]
static const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[]
Datum prsd_nexttoken(PG_FUNCTION_ARGS)
static const TParserStateActionItem actionTPS_InTagName[]
static const TParserStateActionItem actionTPS_InFileNext[]
static const TParserStateActionItem actionTPS_InXMLEntity[]
static const TParserStateActionItem actionTPS_InFURL[]
#define p_iswhat(type, nonascii)
static const TParserStateActionItem actionTPS_InMantissaSign[]
static void mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words, int max_fragments)
@ TPS_InXMLEntityHexNumFirst
@ TPS_InHyphenAsciiWordFirst
@ TPS_InHyphenNumWordPart
@ TPS_InHyphenNumWordFirst
@ TPS_InHyphenUnsignedInt
@ TPS_InXMLEntityNumFirst
@ TPS_InCloseCommentFirst
@ TPS_InParseHyphenHyphen
@ TPS_InHyphenAsciiWordPart
@ TPS_InHyphenDigitLookahead
static void mark_fragment(HeadlineParsedText *prs, bool highlightall, int startpos, int endpos)
static const TParserStateActionItem actionTPS_InXMLEntityEnd[]
static const TParserStateActionItem actionTPS_InHyphenNumWord[]
static const TParserStateActionItem actionTPS_InDecimal[]
static const TParserStateActionItem actionTPS_InSignedIntFirst[]
static const TParserStateActionItem actionTPS_InTagEscapeK[]
static const TParserStateActionItem actionTPS_InSpace[]
static const TParserStateActionItem actionTPS_InFile[]
static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[]
static int p_iseqC(TParser *prs)
Datum prsd_headline(PG_FUNCTION_ARGS)
static bool hlCover(HeadlineParsedText *prs, TSQuery query, List *locations, int *nextpos, int *p, int *q)
static const TParserStateActionItem actionTPS_InUDecimal[]
int(* TParserCharTest)(struct TParser *)
static const TParserStateActionItem actionTPS_InSignedInt[]
static int p_isurlchar(TParser *prs)
static const TParserStateActionItem actionTPS_InTagBeginEnd[]
static const TParserStateActionItem actionTPS_InTagFirst[]
struct TParserPosition TParserPosition
static const TParserStateActionItem actionTPS_InTagEscapeKK[]
static int p_isneC(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentLast[]
static TParserPosition * newTParserPosition(TParserPosition *prev)
static const TParserStateActionItem actionTPS_InHyphenWordPart[]
static const TParserStateActionItem actionTPS_InMantissaFirst[]
static const TParserStateActionItem actionTPS_Base[]
static void SpecialHyphen(TParser *prs)
static void mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations, bool highlightall, int shortword, int min_words, int max_words)
static const TParserStateActionItem actionTPS_InHyphenNumWordFirst[]
void(* TParserSpecial)(struct TParser *)
static const TParserStateActionItem actionTPS_InEmail[]
static const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst[]
static const TParserStateActionItem actionTPS_InURLPath[]
static const TParserStateActionItem actionTPS_InSVerVersion[]
static const TParserStateActionItem actionTPS_InAsciiWord[]
static const char *const tok_alias[]
static int p_isstophost(TParser *prs)
static void get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos, int *curlen, int *poslen, int max_words)
static const TParserStateActionItem actionTPS_InHyphenUnsignedInt[]
static int p_isasclet(TParser *prs)
static const TParserStateAction Actions[]
static const TParserStateActionItem actionTPS_InXMLBegin[]
static TParser * TParserInit(char *str, int len)
static const TParserStateActionItem actionTPS_InMantissa[]
static const TParserStateActionItem actionTPS_InVersionFirst[]
static int p_isascii(TParser *prs)
static const TParserStateActionItem actionTPS_InCommentFirst[]
static const TParserStateActionItem actionTPS_InHyphenWord[]
static int p_isignore(TParser *prs)
static const TParserStateActionItem actionTPS_InParseHyphenHyphen[]
static const TParserStateActionItem actionTPS_InPort[]
static const TParserStateActionItem actionTPS_InDecimalFirst[]
Datum prsd_lextype(PG_FUNCTION_ARGS)
static const TParserStateActionItem actionTPS_InTag[]
Datum prsd_start(PG_FUNCTION_ARGS)
#define TPARSERSTATEACTION(state)
static bool TParserGet(TParser *prs)
static int p_ishost(TParser *prs)
Datum prsd_end(PG_FUNCTION_ARGS)
static int p_isURLPath(TParser *prs)
static void SpecialVerVersion(TParser *prs)
static const TParserStateActionItem actionTPS_InProtocolFirst[]
static const TParserStateActionItem actionTPS_InUnsignedInt[]
static const TParserStateActionItem actionTPS_InUDecimalFirst[]
static const TParserStateActionItem actionTPS_InTagCloseFirst[]
static int p_isEOF(TParser *prs)
static const TParserStateActionItem actionTPS_InCloseCommentLast[]
static void TParserCopyClose(TParser *prs)
static const TParserStateActionItem actionTPS_InFileFirst[]
static const TParserStateActionItem actionTPS_InNumWord[]
static const TParserStateActionItem actionTPS_InFileTwiddle[]
static TParser * TParserCopyInit(const TParser *orig)
static const TParserStateActionItem actionTPS_InHost[]
static const TParserStateActionItem actionTPS_InTagBackSleshed[]
static const TParserStateActionItem actionTPS_InProtocolSecond[]
static const TParserStateActionItem actionTPS_InWord[]
static int p_isspecial(TParser *prs)
static void TParserClose(TParser *prs)
static const TParserStateActionItem actionTPS_InXMLEntityNum[]
static const TParserStateActionItem actionTPS_InVerVersion[]
static const TParserStateActionItem actionTPS_InHyphenAsciiWord[]
static const TParserStateActionItem actionTPS_InXMLEntityHexNum[]
static const TParserStateActionItem actionTPS_InPortFirst[]
static const char *const lex_descr[]
#define INTERESTINGWORD(j)
static void SpecialTags(TParser *prs)
static const TParserStateActionItem actionTPS_InTagEnd[]
static const TParserStateActionItem actionTPS_InComment[]
static const TParserStateActionItem actionTPS_InProtocolEnd[]
static const TParserStateActionItem actionTPS_InURLPathFirst[]
static const TParserStateActionItem actionTPS_InPathFirstFirst[]