aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/tsearch2/Makefile5
-rw-r--r--contrib/tsearch2/expected/tsearch2.out119
-rw-r--r--contrib/tsearch2/query.c82
-rw-r--r--contrib/tsearch2/query_support.c111
-rw-r--r--contrib/tsearch2/sql/tsearch2.sql7
-rw-r--r--contrib/tsearch2/ts_locale.c61
-rw-r--r--contrib/tsearch2/ts_locale.h38
-rw-r--r--contrib/tsearch2/tsearch.sql.in56
-rw-r--r--contrib/tsearch2/wordparser/Makefile11
-rw-r--r--contrib/tsearch2/wordparser/deflex.c4
-rw-r--r--contrib/tsearch2/wordparser/deflex.h2
-rw-r--r--contrib/tsearch2/wordparser/parser.c1028
-rw-r--r--contrib/tsearch2/wordparser/parser.h147
-rw-r--r--contrib/tsearch2/wordparser/parser.l346
-rw-r--r--contrib/tsearch2/wparser_def.c20
15 files changed, 1613 insertions, 424 deletions
diff --git a/contrib/tsearch2/Makefile b/contrib/tsearch2/Makefile
index 4901b611ee1..2ef904ddb4e 100644
--- a/contrib/tsearch2/Makefile
+++ b/contrib/tsearch2/Makefile
@@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.11 2005/11/08 17:08:46 teodor Exp $
+# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.12 2005/11/21 12:27:57 teodor Exp $
MODULE_big = tsearch2
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
@@ -6,7 +6,8 @@ OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
wparser.o wparser_def.o \
ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
tsvector_op.o rank.o ts_stat.o \
- query_util.o query_support.o query_rewrite.o query_gist.o
+ query_util.o query_support.o query_rewrite.o query_gist.o \
+ ts_locale.o
SUBDIRS := snowball ispell wordparser
SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o)
diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out
index 296c0ac676f..a98c2216a8d 100644
--- a/contrib/tsearch2/expected/tsearch2.out
+++ b/contrib/tsearch2/expected/tsearch2.out
@@ -13,12 +13,12 @@ psql:tsearch2.sql:342: NOTICE: argument type tsvector is only a shell
psql:tsearch2.sql:396: NOTICE: type "tsquery" is not yet defined
DETAIL: Creating a shell type definition.
psql:tsearch2.sql:401: NOTICE: argument type tsquery is only a shell
-psql:tsearch2.sql:544: NOTICE: type "gtsvector" is not yet defined
+psql:tsearch2.sql:559: NOTICE: type "gtsvector" is not yet defined
DETAIL: Creating a shell type definition.
-psql:tsearch2.sql:549: NOTICE: argument type gtsvector is only a shell
-psql:tsearch2.sql:998: NOTICE: type "gtsq" is not yet defined
+psql:tsearch2.sql:564: NOTICE: argument type gtsvector is only a shell
+psql:tsearch2.sql:1054: NOTICE: type "gtsq" is not yet defined
DETAIL: Creating a shell type definition.
-psql:tsearch2.sql:1003: NOTICE: argument type gtsq is only a shell
+psql:tsearch2.sql:1059: NOTICE: argument type gtsq is only a shell
--tsvector
SELECT '1'::tsvector;
tsvector
@@ -653,7 +653,7 @@ select * from token_type('default');
11 | lpart_hword | Latin part of hyphenated word
12 | blank | Space symbols
13 | tag | HTML Tag
- 14 | http | HTTP head
+ 14 | protocol | Protocol head
15 | hword | Hyphenated word
16 | lhword | Latin hyphenated word
17 | nlhword | Non-latin hyphenated word
@@ -672,14 +672,13 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
-------+--------------------------------------
22 | 345
12 |
- 4 | qwe@efd.r
- 12 |
- 12 | '
- 12 |
+ 1 | qwe
+ 12 | @
+ 19 | efd.r
+ 12 | '
14 | http://
6 | www.com
- 12 | /
- 12 |
+ 12 | /
14 | http://
5 | aew.werc.ewr/?ad=qwe&dw
6 | aew.werc.ewr
@@ -700,10 +699,8 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
6 | 4aew.werc.ewr
12 |
14 | http://
- 5 | 5aew.werc.ewr:8100/?
- 6 | 5aew.werc.ewr
- 18 | :8100/?
- 12 |
+ 6 | 5aew.werc.ewr:8100
+ 12 | /?
1 | ad
12 | =
1 | qwe
@@ -711,12 +708,12 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
1 | dw
12 |
5 | 6aew.werc.ewr:8100/?ad=qwe&dw
- 6 | 6aew.werc.ewr
- 18 | :8100/?ad=qwe&dw
+ 6 | 6aew.werc.ewr:8100
+ 18 | /?ad=qwe&dw
12 |
5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
- 6 | 7aew.werc.ewr
- 18 | :8100/?ad=qwe&dw=%20%32
+ 6 | 7aew.werc.ewr:8100
+ 18 | /?ad=qwe&dw=%20%32
12 |
7 | +4.0e-10
12 |
@@ -747,11 +744,15 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
1 | jf
12 |
1 | sdjk
- 13 | <we hjwer <werrwe>
+ 12 | <
+ 1 | we
12 |
- 3 | ewr1
- 12 | >
+ 1 | hjwer
+ 12 |
+ 13 | <werrwe>
12 |
+ 3 | ewr1
+ 12 | >
3 | ewri2
12 |
13 | <a href="qwe<qwe>">
@@ -767,57 +768,53 @@ select * from parse('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc
12 |
19 | /wqe-324/ewr
12 |
- 6 | gist.h
- 12 |
- 6 | gist.h.c
+ 19 | gist.h
12 |
- 6 | gist.c
- 12 | .
+ 19 | gist.h.c
12 |
+ 19 | gist.c
+ 12 | .
1 | readline
12 |
20 | 4.2
12 |
20 | 4.2
- 12 | .
- 12 |
+ 12 | .
20 | 4.2
- 12 | ,
- 12 |
- 15 | readline-4
+ 12 | ,
+ 15 | readline-4.2
11 | readline
12 | -
20 | 4.2
12 |
- 15 | readline-4
+ 15 | readline-4.2
11 | readline
12 | -
20 | 4.2
- 12 | .
- 12 |
+ 12 | .
22 | 234
12 |
- 13 | <i <b>
+ 12 | <
+ 1 | i
+ 12 |
+ 13 | <b>
12 |
1 | wow
12 |
- 12 | <
- 12 |
+ 12 | <
1 | jqw
12 |
- 12 | <
- 12 | >
- 12 |
+ 12 | <>
1 | qwerty
-(138 rows)
+(135 rows)
SELECT to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
<i <b> wow < jqw <> qwerty');
- to_tsvector
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 'qwe@efd.r':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 'teodor@stack.net':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
+ to_tsvector
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 'ad':17 'dw':19 'jf':39 '234':63 '345':1 '4.2':54,55,56,59,62 '455':31 'jqw':66 'qwe':2,18,27,28,35 'wer':36 'wow':65 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':67 '234.435':30 'qwe-wer':34 'readlin':53,58,61 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 'readline-4.2':57,60 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23
(1 row)
SELECT length(to_tsvector('default', '345 qw'));
@@ -831,7 +828,7 @@ SELECT length(to_tsvector('default', '345 qwe@efd.r '' http://www.com/ http://ae
<i <b> wow < jqw <> qwerty'));
length
--------
- 53
+ 51
(1 row)
select to_tsquery('default', 'qwe & sKies ');
@@ -876,6 +873,36 @@ select to_tsquery('default', '(the|and&(i&1))&fghj');
'1' & 'fghj'
(1 row)
+select plainto_tsquery('default', 'the and z 1))& fghj');
+ plainto_tsquery
+--------------------
+ 'z' & '1' & 'fghj'
+(1 row)
+
+select plainto_tsquery('default', 'foo bar') && plainto_tsquery('default', 'asd');
+ ?column?
+-----------------------
+ 'foo' & 'bar' & 'asd'
+(1 row)
+
+select plainto_tsquery('default', 'foo bar') || plainto_tsquery('default', 'asd fg');
+ ?column?
+------------------------------
+ 'foo' & 'bar' | 'asd' & 'fg'
+(1 row)
+
+select plainto_tsquery('default', 'foo bar') || !!plainto_tsquery('default', 'asd fg');
+ ?column?
+-----------------------------------
+ 'foo' & 'bar' | !( 'asd' & 'fg' )
+(1 row)
+
+select plainto_tsquery('default', 'foo bar') && 'asd | fg';
+ ?column?
+----------------------------------
+ 'foo' & 'bar' & ( 'asd' | 'fg' )
+(1 row)
+
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
?column?
----------
diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c
index e6f1ae3a898..e312cf6af71 100644
--- a/contrib/tsearch2/query.c
+++ b/contrib/tsearch2/query.c
@@ -51,10 +51,20 @@ Datum to_tsquery_name(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsquery_current);
Datum to_tsquery_current(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(plainto_tsquery);
+Datum plainto_tsquery(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(plainto_tsquery_name);
+Datum plainto_tsquery_name(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(plainto_tsquery_current);
+Datum plainto_tsquery_current(PG_FUNCTION_ARGS);
+
/* parser's states */
#define WAITOPERAND 1
#define WAITOPERATOR 2
#define WAITFIRSTOPERAND 3
+#define WAITSINGLEOPERAND 4
/*
* node of query tree, also used
@@ -195,6 +205,14 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
else if (*(state->buf) != ' ')
return ERR;
break;
+ case WAITSINGLEOPERAND:
+ if ( *(state->buf) == '\0' )
+ return END;
+ *strval = state->buf;
+ *lenval = strlen( state->buf );
+ state->buf += strlen( state->buf );
+ state->count++;
+ return VAL;
default:
return ERR;
break;
@@ -582,7 +600,7 @@ findoprnd(ITEM * ptr, int4 *pos)
* input
*/
static QUERYTYPE *
- queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
+queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id, bool isplain)
{
QPRS_STATE state;
int4 i;
@@ -599,7 +617,7 @@ static QUERYTYPE *
/* init state */
state.buf = buf;
- state.state = WAITFIRSTOPERAND;
+ state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
state.count = 0;
state.num = 0;
state.str = NULL;
@@ -679,7 +697,7 @@ Datum
tsquery_in(PG_FUNCTION_ARGS)
{
SET_FUNCOID();
- PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
+ PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0, false));
}
/*
@@ -910,7 +928,7 @@ to_tsquery(PG_FUNCTION_ARGS)
str = text2char(in);
PG_FREE_IF_COPY(in, 1);
- query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
+ query = queryin(str, pushval_morph, PG_GETARG_INT32(0),false);
if ( query->size == 0 )
PG_RETURN_POINTER(query);
@@ -950,3 +968,59 @@ to_tsquery_current(PG_FUNCTION_ARGS)
Int32GetDatum(get_currcfg()),
PG_GETARG_DATUM(0)));
}
+
+Datum
+plainto_tsquery(PG_FUNCTION_ARGS)
+{
+ text *in = PG_GETARG_TEXT_P(1);
+ char *str;
+ QUERYTYPE *query;
+ ITEM *res;
+ int4 len;
+
+ SET_FUNCOID();
+
+ str = text2char(in);
+ PG_FREE_IF_COPY(in, 1);
+
+ query = queryin(str, pushval_morph, PG_GETARG_INT32(0), true);
+
+ if ( query->size == 0 )
+ PG_RETURN_POINTER(query);
+
+ res = clean_fakeval_v2(GETQUERY(query), &len);
+ if (!res)
+ {
+ query->len = HDRSIZEQT;
+ query->size = 0;
+ PG_RETURN_POINTER(query);
+ }
+ memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
+ pfree(res);
+ PG_RETURN_POINTER(query);
+}
+
+Datum
+plainto_tsquery_name(PG_FUNCTION_ARGS)
+{
+ text *name = PG_GETARG_TEXT_P(0);
+ Datum res;
+
+ SET_FUNCOID();
+ res = DirectFunctionCall2(plainto_tsquery,
+ Int32GetDatum(name2id_cfg(name)),
+ PG_GETARG_DATUM(1));
+
+ PG_FREE_IF_COPY(name, 0);
+ PG_RETURN_DATUM(res);
+}
+
+Datum
+plainto_tsquery_current(PG_FUNCTION_ARGS)
+{
+ SET_FUNCOID();
+ PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery,
+ Int32GetDatum(get_currcfg()),
+ PG_GETARG_DATUM(0)));
+}
+
diff --git a/contrib/tsearch2/query_support.c b/contrib/tsearch2/query_support.c
index c973def7d4d..edc2d48fcfb 100644
--- a/contrib/tsearch2/query_support.c
+++ b/contrib/tsearch2/query_support.c
@@ -14,6 +14,117 @@ tsquery_numnode(PG_FUNCTION_ARGS) {
PG_RETURN_INT32(nnode);
}
+static QTNode*
+join_tsqueries(QUERYTYPE *a, QUERYTYPE *b) {
+ QTNode *res=(QTNode*)palloc0( sizeof(QTNode) );
+
+ res->flags |= QTN_NEEDFREE;
+
+ res->valnode = (ITEM*)palloc0( sizeof(ITEM) );
+ res->valnode->type = OPR;
+
+ res->child = (QTNode**)palloc0( sizeof(QTNode*)*2 );
+ res->child[0] = QT2QTN( GETQUERY(b), GETOPERAND(b) );
+ res->child[1] = QT2QTN( GETQUERY(a), GETOPERAND(a) );
+ res->nchild = 2;
+
+ return res;
+}
+
+PG_FUNCTION_INFO_V1(tsquery_and);
+Datum tsquery_and(PG_FUNCTION_ARGS);
+
+Datum
+tsquery_and(PG_FUNCTION_ARGS) {
+ QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
+ QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)));
+ QTNode *res;
+ QUERYTYPE *query;
+
+ if ( a->size == 0 ) {
+ PG_FREE_IF_COPY(a,1);
+ PG_RETURN_POINTER(b);
+ } else if ( b->size == 0 ) {
+ PG_FREE_IF_COPY(b,1);
+ PG_RETURN_POINTER(a);
+ }
+
+ res = join_tsqueries(a, b);
+
+ res->valnode->val = '&';
+
+ query = QTN2QT( res, PlainMemory );
+
+ QTNFree(res);
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+
+ PG_RETURN_POINTER(query);
+}
+
+PG_FUNCTION_INFO_V1(tsquery_or);
+Datum tsquery_or(PG_FUNCTION_ARGS);
+
+Datum
+tsquery_or(PG_FUNCTION_ARGS) {
+ QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
+ QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)));
+ QTNode *res;
+ QUERYTYPE *query;
+
+ if ( a->size == 0 ) {
+ PG_FREE_IF_COPY(a,1);
+ PG_RETURN_POINTER(b);
+ } else if ( b->size == 0 ) {
+ PG_FREE_IF_COPY(b,1);
+ PG_RETURN_POINTER(a);
+ }
+
+ res = join_tsqueries(a, b);
+
+ res->valnode->val = '|';
+
+ query = QTN2QT( res, PlainMemory );
+
+ QTNFree(res);
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+
+ PG_RETURN_POINTER(query);
+}
+
+PG_FUNCTION_INFO_V1(tsquery_not);
+Datum tsquery_not(PG_FUNCTION_ARGS);
+
+Datum
+tsquery_not(PG_FUNCTION_ARGS) {
+ QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
+ QTNode *res;
+ QUERYTYPE *query;
+
+ if ( a->size == 0 )
+ PG_RETURN_POINTER(a);
+
+ res=(QTNode*)palloc0( sizeof(QTNode) );
+
+ res->flags |= QTN_NEEDFREE;
+
+ res->valnode = (ITEM*)palloc0( sizeof(ITEM) );
+ res->valnode->type = OPR;
+ res->valnode->val = '!';
+
+ res->child = (QTNode**)palloc0( sizeof(QTNode*) );
+ res->child[0] = QT2QTN( GETQUERY(a), GETOPERAND(a) );
+ res->nchild = 1;
+
+ query = QTN2QT( res, PlainMemory );
+
+ QTNFree(res);
+ PG_FREE_IF_COPY(a,0);
+
+ PG_RETURN_POINTER(query);
+}
+
static int
CompareTSQ( QUERYTYPE *a, QUERYTYPE *b ) {
if ( a->size != b->size ) {
diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql
index 0923ce7a197..bd0baa3b41d 100644
--- a/contrib/tsearch2/sql/tsearch2.sql
+++ b/contrib/tsearch2/sql/tsearch2.sql
@@ -173,6 +173,13 @@ select to_tsquery('default', 'asd&(and|fghj)');
select to_tsquery('default', '(asd&and)|fghj');
select to_tsquery('default', '(asd&!and)|fghj');
select to_tsquery('default', '(the|and&(i&1))&fghj');
+
+select plainto_tsquery('default', 'the and z 1))& fghj');
+select plainto_tsquery('default', 'foo bar') && plainto_tsquery('default', 'asd');
+select plainto_tsquery('default', 'foo bar') || plainto_tsquery('default', 'asd fg');
+select plainto_tsquery('default', 'foo bar') || !!plainto_tsquery('default', 'asd fg');
+select plainto_tsquery('default', 'foo bar') && 'asd | fg';
+
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
diff --git a/contrib/tsearch2/ts_locale.c b/contrib/tsearch2/ts_locale.c
new file mode 100644
index 00000000000..b84681f1b07
--- /dev/null
+++ b/contrib/tsearch2/ts_locale.c
@@ -0,0 +1,61 @@
+#include "ts_locale.h"
+
+#include "utils/builtins.h"
+#include "utils/pg_locale.h"
+#include "mb/pg_wchar.h"
+
+
+#if defined(TS_USE_WIDE) && defined(WIN32)
+
+size_t
+wchar2char( const char *to, const wchar_t *from, size_t len ) {
+ if (GetDatabaseEncoding() == PG_UTF8) {
+ int r;
+
+ if (len==0)
+ return 0;
+
+ r = WideCharToMultiByte(CP_UTF8, 0, from, len, to, nbytes,
+ NULL, NULL);
+
+
+ if ( r==0 )
+ ereport(ERROR,
+ (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+ errmsg("UTF-16 to UTF-8 translation failed: %lu",
+ GetLastError())));
+
+ return r;
+ }
+
+ return wcstombs(to, from, len);
+}
+
+size_t
+char2wchar( const wchar_t *to, const char *from, size_t len ) {
+ if (GetDatabaseEncoding() == PG_UTF8) {
+ int r;
+
+ if (len==0)
+ return 0;
+
+ r = MultiByteToWideChar(CP_UTF8, 0, from, len,
+ to, len);
+
+ if (!r) {
+ pg_verifymbstr(from, len, false);
+ ereport(ERROR,
+ (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+ errmsg("invalid multibyte character for locale"),
+ errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
+ }
+
+ Assert(r <= nbytes);
+
+ return r;
+ }
+
+ return mbstowcs(to, from, len);
+}
+
+#endif
diff --git a/contrib/tsearch2/ts_locale.h b/contrib/tsearch2/ts_locale.h
new file mode 100644
index 00000000000..a7ce6f1bbc5
--- /dev/null
+++ b/contrib/tsearch2/ts_locale.h
@@ -0,0 +1,38 @@
+#ifndef __TSLOCALE_H__
+#define __TSLOCALE_H__
+
+#include "postgres.h"
+
+#include <ctype.h>
+#include <limits.h>
+
+/*
+ * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
+ * declare them in <wchar.h>.
+ */
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif
+
+#if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
+#define TS_USE_WIDE
+
+#ifdef WIN32
+
+size_t wchar2char( const char *to, const wchar_t *from, size_t len );
+size_t char2wchar( const wchar_t *to, const char *from, size_t len );
+
+#else /* WIN32 */
+
+/* correct mbstowcs */
+#define char2wchar mbstowcs
+#define wchar2char wcstombs
+
+#endif /* WIN32 */
+
+#endif /* defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER) */
+
+#endif /* __TSLOCALE_H__ */
diff --git a/contrib/tsearch2/tsearch.sql.in b/contrib/tsearch2/tsearch.sql.in
index 9bdf641e121..4fdf974d0d1 100644
--- a/contrib/tsearch2/tsearch.sql.in
+++ b/contrib/tsearch2/tsearch.sql.in
@@ -427,6 +427,21 @@ RETURNS tsquery
AS 'MODULE_PATHNAME','to_tsquery_current'
LANGUAGE 'c' with (isstrict,iscachable);
+CREATE FUNCTION plainto_tsquery(oid, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION plainto_tsquery(text, text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','plainto_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION plainto_tsquery(text)
+RETURNS tsquery
+AS 'MODULE_PATHNAME','plainto_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
--operations
CREATE FUNCTION exectsq(tsvector, tsquery)
RETURNS bool
@@ -929,6 +944,47 @@ CREATE OR REPLACE FUNCTION numnode(tsquery)
language 'C'
with (isstrict,iscachable);
+CREATE OR REPLACE FUNCTION tsquery_and(tsquery,tsquery)
+ returns tsquery
+ as 'MODULE_PATHNAME', 'tsquery_and'
+ language 'C'
+ with (isstrict,iscachable);
+
+CREATE OPERATOR && (
+ LEFTARG = tsquery,
+ RIGHTARG = tsquery,
+ PROCEDURE = tsquery_and,
+ COMMUTATOR = '&&',
+ RESTRICT = contsel,
+ JOIN = contjoinsel
+);
+
+CREATE OR REPLACE FUNCTION tsquery_or(tsquery,tsquery)
+ returns tsquery
+ as 'MODULE_PATHNAME', 'tsquery_or'
+ language 'C'
+ with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+ LEFTARG = tsquery,
+ RIGHTARG = tsquery,
+ PROCEDURE = tsquery_or,
+ COMMUTATOR = '||',
+ RESTRICT = contsel,
+ JOIN = contjoinsel
+);
+
+CREATE OR REPLACE FUNCTION tsquery_not(tsquery)
+ returns tsquery
+ as 'MODULE_PATHNAME', 'tsquery_not'
+ language 'C'
+ with (isstrict,iscachable);
+
+CREATE OPERATOR !! (
+ RIGHTARG = tsquery,
+ PROCEDURE = tsquery_not
+);
+
--------------rewrite subsystem
CREATE OR REPLACE FUNCTION rewrite(tsquery, text)
diff --git a/contrib/tsearch2/wordparser/Makefile b/contrib/tsearch2/wordparser/Makefile
index 0070970e216..c4eceba60bb 100644
--- a/contrib/tsearch2/wordparser/Makefile
+++ b/contrib/tsearch2/wordparser/Makefile
@@ -1,8 +1,8 @@
-# $PostgreSQL: pgsql/contrib/tsearch2/wordparser/Makefile,v 1.8 2005/10/18 01:30:49 tgl Exp $
+# $PostgreSQL: pgsql/contrib/tsearch2/wordparser/Makefile,v 1.9 2005/11/21 12:27:57 teodor Exp $
SUBOBJS = parser.o deflex.o
-EXTRA_CLEAN = SUBSYS.o $(SUBOBJS) parser.c
+EXTRA_CLEAN = SUBSYS.o $(SUBOBJS)
PG_CPPFLAGS = -I$(srcdir)/..
@@ -20,13 +20,6 @@ override CFLAGS += $(CFLAGS_SL)
all: SUBSYS.o
-parser.c: parser.l
-ifdef FLEX
- $(FLEX) $(FLEXFLAGS) -8 -Ptsearch2_yy -o'$@' $<
-else
- @$(missing) flex $< $@
-endif
-
SUBSYS.o: $(SUBOBJS)
$(LD) $(LDREL) $(LDOUT) $@ $^
diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c
index bbf3271b666..8f93d277a1e 100644
--- a/contrib/tsearch2/wordparser/deflex.c
+++ b/contrib/tsearch2/wordparser/deflex.c
@@ -15,7 +15,7 @@ const char *lex_descr[] = {
"Latin part of hyphenated word",
"Space symbols",
"HTML Tag",
- "HTTP head",
+ "Protocol head",
"Hyphenated word",
"Latin hyphenated word",
"Non-latin hyphenated word",
@@ -42,7 +42,7 @@ const char *tok_alias[] = {
"lpart_hword",
"blank",
"tag",
- "http",
+ "protocol",
"hword",
"lhword",
"nlhword",
diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h
index 651d1f9e773..893f8430515 100644
--- a/contrib/tsearch2/wordparser/deflex.h
+++ b/contrib/tsearch2/wordparser/deflex.h
@@ -17,7 +17,7 @@
#define LATPARTHYPHENWORD 11
#define SPACE 12
#define TAG 13
-#define HTTP 14
+#define PROTOCOL 14
#define HYPHENWORD 15
#define LATHYPHENWORD 16
#define CYRHYPHENWORD 17
diff --git a/contrib/tsearch2/wordparser/parser.c b/contrib/tsearch2/wordparser/parser.c
new file mode 100644
index 00000000000..e414a865ffd
--- /dev/null
+++ b/contrib/tsearch2/wordparser/parser.c
@@ -0,0 +1,1028 @@
+#include "postgres.h"
+
+#include "utils/builtins.h"
+#include "utils/pg_locale.h"
+#include "mb/pg_wchar.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "ts_locale.h"
+
+
+static TParserPosition*
+newTParserPosition(TParserPosition *prev) {
+ TParserPosition *res = (TParserPosition*)palloc(sizeof(TParserPosition));
+
+ if ( prev )
+ memcpy(res, prev, sizeof(TParserPosition));
+ else
+ memset(res, 0, sizeof(TParserPosition));
+
+ res->prev = prev;
+
+ res->pushedAtAction = NULL;
+
+ return res;
+}
+
+TParser*
+TParserInit( char *str, int len ) {
+ TParser *prs = (TParser*)palloc0( sizeof(TParser) );
+
+ prs->charmaxlen = pg_database_encoding_max_length();
+ prs->str = str;
+ prs->lenstr = len;
+
+#ifdef TS_USE_WIDE
+ /*
+ * Use wide char code only when max encoding length > 1 and ctype != C.
+ * Some operating systems fail with multi-byte encodings and a C locale.
+ * Also, for a C locale there is no need to process as multibyte.
+ * From backend/utils/adt/oracle_compat.c Teodor
+ */
+
+ if ( prs->charmaxlen > 1 && !lc_ctype_is_c() ) {
+ prs->usewide=true;
+ prs->wstr = (wchar_t*)palloc( sizeof(wchar_t) * prs->lenstr );
+ prs->lenwstr = char2wchar( prs->wstr, prs->str, prs->lenstr );
+ } else
+#endif
+ prs->usewide=false;
+
+ prs->state = newTParserPosition(NULL);
+ prs->state->state = TPS_Base;
+
+ return prs;
+}
+
+void
+TParserClose( TParser* prs ) {
+ while( prs->state ) {
+ TParserPosition *ptr = prs->state->prev;
+ pfree( prs->state );
+ prs->state = ptr;
+ }
+
+ if ( prs->wstr )
+ pfree( prs->wstr );
+ pfree( prs );
+}
+
+/*
+ * defining support function, equvalent is* macroses, but
+ * working with any possible encodings and locales
+ */
+
+#ifdef TS_USE_WIDE
+
+#define p_iswhat(type) \
+static int \
+p_is##type(TParser *prs) { \
+ Assert( prs->state ); \
+ return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \
+ is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \
+} \
+ \
+static int \
+p_isnot##type(TParser *prs) { \
+ return !p_is##type(prs); \
+}
+
+
+
+/* p_iseq should be used only for ascii symbols */
+
+static int
+p_iseq(TParser *prs, char c) {
+ Assert( prs->state );
+ return ( ( prs->state->charlen==1 && *( prs->str + prs->state->posbyte ) == c ) ) ? 1 : 0;
+}
+
+#else /* TS_USE_WIDE */
+
+#define p_iswhat(type) \
+static int \
+p_is##type(TParser *prs) { \
+ Assert( prs->state ); \
+ return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \
+} \
+ \
+static int \
+p_isnot##type(TParser *prs) { \
+ return !p_is##type(prs); \
+}
+
+
+static int
+p_iseq(TParser *prs, char c) {
+ Assert( prs->state );
+ return ( *( prs->str + prs->state->posbyte ) == c ) ) ? 1 : 0;
+}
+
+#endif /* TS_USE_WIDE */
+
+p_iswhat(alnum)
+p_iswhat(alpha)
+p_iswhat(digit)
+p_iswhat(lower)
+p_iswhat(print)
+p_iswhat(punct)
+p_iswhat(space)
+p_iswhat(upper)
+p_iswhat(xdigit)
+
+static int
+p_isEOF(TParser *prs) {
+ Assert( prs->state );
+ return (prs->state->posbyte == prs->lenstr || prs->state->charlen==0) ? 1 : 0;
+}
+
+static int
+p_iseqC(TParser *prs) {
+ return p_iseq(prs, prs->c);
+}
+
+static int
+p_isneC(TParser *prs) {
+ return !p_iseq(prs, prs->c);
+}
+
+static int
+p_isascii(TParser *prs) {
+ return ( prs->state->charlen==1 && isascii( (unsigned char) *( prs->str + prs->state->posbyte ) ) ) ? 1 : 0;
+}
+
+static int
+p_islatin(TParser *prs) {
+ return ( p_isalpha(prs) && p_isascii(prs) ) ? 1 : 0;
+}
+
+static int
+p_isnonlatin(TParser *prs) {
+ return ( p_isalpha(prs) && !p_isascii(prs) ) ? 1 : 0;
+}
+
+void _make_compiler_happy(void);
+void
+_make_compiler_happy(void) {
+ p_isalnum(NULL); p_isnotalnum(NULL);
+ p_isalpha(NULL); p_isnotalpha(NULL);
+ p_isdigit(NULL); p_isnotdigit(NULL);
+ p_islower(NULL); p_isnotlower(NULL);
+ p_isprint(NULL); p_isnotprint(NULL);
+ p_ispunct(NULL); p_isnotpunct(NULL);
+ p_isspace(NULL); p_isnotspace(NULL);
+ p_isupper(NULL); p_isnotupper(NULL);
+ p_isxdigit(NULL); p_isnotxdigit(NULL);
+ p_isEOF(NULL);
+ p_iseqC(NULL); p_isneC(NULL);
+}
+
+
+static void
+SpecialTags(TParser *prs) {
+ switch( prs->state->lencharlexeme ) {
+ case 8: /* </script */
+ if ( pg_strncasecmp( prs->lexeme, "</script", 8 ) == 0 )
+ prs->ignore = false;
+ break;
+ case 7: /* <script || </style */
+ if ( pg_strncasecmp( prs->lexeme, "</style", 7 ) == 0 )
+ prs->ignore = false;
+ else if ( pg_strncasecmp( prs->lexeme, "<script", 7 ) == 0 )
+ prs->ignore = true;
+ break;
+ case 6: /* <style */
+ if ( pg_strncasecmp( prs->lexeme, "<style", 6 ) == 0 )
+ prs->ignore = true;
+ break;
+ default: break;
+ }
+}
+
+static void
+SpecialFURL(TParser *prs) {
+ prs->wanthost = true;
+ prs->state->posbyte -= prs->state->lenbytelexeme;
+ prs->state->poschar -= prs->state->lencharlexeme;
+}
+
+static void
+SpecialHyphen(TParser *prs) {
+ prs->state->posbyte -= prs->state->lenbytelexeme;
+ prs->state->poschar -= prs->state->lencharlexeme;
+}
+
+static int
+p_isstophost(TParser *prs) {
+ if ( prs->wanthost ) {
+ prs->wanthost = false;
+ return 1;
+ }
+ return 0;
+}
+
+static int
+p_isignore(TParser *prs) {
+ return (prs->ignore) ? 1 : 0;
+}
+
+static int
+p_ishost(TParser *prs) {
+ TParser *tmpprs = TParserInit( prs->str+prs->state->posbyte, prs->lenstr - prs->state->posbyte );
+ int res = 0;
+
+ if ( TParserGet(tmpprs) && tmpprs->type == HOST ) {
+ prs->state->posbyte += tmpprs->lenbytelexeme;
+ prs->state->poschar += tmpprs->lencharlexeme;
+ prs->state->lenbytelexeme += tmpprs->lenbytelexeme;
+ prs->state->lencharlexeme += tmpprs->lencharlexeme;
+ prs->state->charlen = tmpprs->state->charlen;
+ res = 1;
+ }
+ TParserClose(tmpprs);
+
+ return res;
+}
+
+static int
+p_isURI(TParser *prs) {
+ TParser *tmpprs = TParserInit( prs->str+prs->state->posbyte, prs->lenstr - prs->state->posbyte );
+ int res = 0;
+
+ tmpprs->state = newTParserPosition( tmpprs->state );
+ tmpprs->state->state = TPS_InFileFirst;
+
+ if ( TParserGet(tmpprs) && (tmpprs->type == URI || tmpprs->type == FILEPATH) ) {
+ prs->state->posbyte += tmpprs->lenbytelexeme;
+ prs->state->poschar += tmpprs->lencharlexeme;
+ prs->state->lenbytelexeme += tmpprs->lenbytelexeme;
+ prs->state->lencharlexeme += tmpprs->lencharlexeme;
+ prs->state->charlen = tmpprs->state->charlen;
+ res = 1;
+ }
+ TParserClose(tmpprs);
+
+ return res;
+}
+
+/*
+ * Table of state/action of parser
+ */
+
+#define A_NEXT 0x0000
+#define A_BINGO 0x0001
+#define A_POP 0x0002
+#define A_PUSH 0x0004
+#define A_RERUN 0x0008
+#define A_CLEAR 0x0010
+#define A_MERGE 0x0020
+#define A_CLRALL 0x0040
+
+static TParserStateActionItem actionTPS_Base[] = {
+ {p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL},
+ {p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InLatWord, 0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InCyrWord, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InUnsignedInt, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
+ {p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
+ {p_iseqC, '&', A_PUSH, TPS_InHTMLEntityFirst, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {NULL, 0, A_NEXT, TPS_InSpace, 0, NULL}
+};
+
+
+static TParserStateActionItem actionTPS_InUWord[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, UWORD, NULL},
+ {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL},
+ {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, UWORD, NULL}
+};
+
+static TParserStateActionItem actionTPS_InLatWord[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, LATWORD, NULL},
+ {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst,0, NULL},
+ {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+ {p_iseqC, ':', A_PUSH, TPS_InProtocolFirst, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
+ {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, LATWORD, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCyrWord[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, CYRWORD, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst,0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, CYRWORD, NULL}
+};
+
+static TParserStateActionItem actionTPS_InUnsignedInt[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InUDecimalFirst, 0, NULL},
+ {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {p_islatin, 0, A_PUSH, TPS_InHost, 0, NULL},
+ {p_isalpha, 0, A_NEXT, TPS_InUWord, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}
+};
+
+static TParserStateActionItem actionTPS_InSignedIntFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT|A_CLEAR, TPS_InSignedInt, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InSignedInt[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL},
+ {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}
+};
+
+static TParserStateActionItem actionTPS_InSpace[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL},
+ {p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL},
+ {p_isignore, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_BINGO, TPS_Base, SPACE, NULL},
+ {p_iseqC, '+', A_BINGO, TPS_Base, SPACE, NULL},
+ {p_iseqC, '&', A_BINGO, TPS_Base, SPACE, NULL},
+ {p_iseqC, '/', A_BINGO, TPS_Base, SPACE, NULL},
+ {p_isnotalnum, 0, A_NEXT, TPS_InSpace, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, SPACE, NULL}
+};
+
+static TParserStateActionItem actionTPS_InUDecimalFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InUDecimal[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
+ {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL}
+};
+
+static TParserStateActionItem actionTPS_InDecimalFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InDecimal[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL},
+ {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL}
+};
+
+static TParserStateActionItem actionTPS_InVersionFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InVersion[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}
+};
+
+static TParserStateActionItem actionTPS_InMantissaFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
+ {p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InMantissaSign, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InMantissaSign[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InMantissa[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHTMLEntityFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst,0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHTMLEntity[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
+ {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHTMLEntityNum[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
+ {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHTMLEntityEnd[] = {
+ {NULL, 0, A_BINGO|A_CLEAR,TPS_Base, HTMLENTITY, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL},
+ {p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL},
+ {p_islatin, 0, A_PUSH, TPS_InTag, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagCloseFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InTag, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTag[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags},
+ {p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL},
+ {p_iseqC, '"', A_NEXT, TPS_InTagEscapeKK, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '=', A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '#', A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL},
+ {p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagEscapeK[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
+ {p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL},
+ {NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagEscapeKK[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
+ {p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL},
+ {NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagBackSleshed[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {NULL, 0, A_MERGE, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagEnd[] = {
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, TAG, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCommentFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCommentLast[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InComment[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InCloseCommentFirst,0, NULL},
+ {NULL, 0, A_NEXT, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCloseCommentFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InCloseCommentLast, 0, NULL},
+ {NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCloseCommentLast[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL},
+ {NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCommentEnd[] = {
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, TAG, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHostFirstDomen[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHostDomenSecond, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
+ //{p_iseqC, '-', A_POP, TPS_InHostFirstAN, 0, NULL},
+ //{p_iseqC, '.', A_POP, TPS_InHostFirstDomen, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHostDomenSecond[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHostDomen, 0, NULL},
+ {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
+ {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHostDomen[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHostDomen, 0, NULL},
+ {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
+ {p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
+ {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+ {p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isstophost, 0, A_BINGO|A_CLRALL,TPS_InURIStart, HOST, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL}
+};
+
+static TParserStateActionItem actionTPS_InPortFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InPort[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
+ {p_isstophost, 0, A_BINGO|A_CLRALL,TPS_InURIStart, HOST, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHostFirstAN[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHost[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL},
+ {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InEmail[] = {
+ {p_ishost, 0, A_BINGO|A_CLRALL, TPS_Base, EMAIL, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InFileFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL},
+ {p_iseqC, '.', A_CLEAR, TPS_InFile, 0, NULL},
+ {p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL},
+ {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InFile[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, FILEPATH, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
+ {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
+};
+
+static TParserStateActionItem actionTPS_InFileNext[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL},
+ {p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InURIFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '"', A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '\'', A_POP, TPS_Null, 0, NULL},
+ {p_isnotspace, 0, A_CLEAR, TPS_InURI, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL},
+};
+
+static TParserStateActionItem actionTPS_InURIStart[] = {
+ {NULL, 0, A_NEXT, TPS_InURI, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InURI[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, URI, NULL},
+ {p_iseqC, '"', A_BINGO, TPS_Base, URI, NULL},
+ {p_iseqC, '\'', A_BINGO, TPS_Base, URI, NULL},
+ {p_isnotspace, 0, A_NEXT, TPS_InURI, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, URI, NULL}
+};
+
+static TParserStateActionItem actionTPS_InFURL[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isURI, 0, A_BINGO|A_CLRALL,TPS_Base, FURL, SpecialFURL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InProtocolFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InProtocolSecond[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InProtocolEnd[] = {
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, PROTOCOL, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenLatWordFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenLatWord[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst,0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen}
+};
+
+static TParserStateActionItem actionTPS_InHyphenCyrWordFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenCyrWord[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst,0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen}
+};
+
+static TParserStateActionItem actionTPS_InHyphenUWordFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
+ {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenUWord[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen},
+ {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst,0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}
+};
+
+static TParserStateActionItem actionTPS_InHyphenValueFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenValue[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst,0, NULL},
+ {p_isalpha, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}
+};
+
+static TParserStateActionItem actionTPS_InHyphenValueExact[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}
+};
+
+static TParserStateActionItem actionTPS_InParseHyphen[] = {
+ {p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart,0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart,0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt,0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InParseHyphenHyphen,0, NULL},
+ {NULL, 0, A_RERUN, TPS_Base, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InParseHyphenHyphen[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isalnum, 0, A_BINGO|A_CLEAR,TPS_InParseHyphen, SPACE, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenCyrWordPart[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, CYRPARTHYPHENWORD,NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart,0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, CYRPARTHYPHENWORD,NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenLatWordPart[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, LATPARTHYPHENWORD,NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart,0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, LATPARTHYPHENWORD,NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenUWordPart[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, PARTHYPHENWORD, NULL},
+ {p_isalnum, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHYPHENWORD, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt,0, NULL},
+ {p_isalpha, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHDecimalPartFirst,0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, UNSIGNEDINT, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHDecimalPartFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InHDecimalPart, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHDecimalPart[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHDecimalPart, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst,0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, DECIMAL, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHVersionPartFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InHVersionPart, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHVersionPart[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHVersionPart, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst,0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, VERSIONNUMBER, NULL}
+};
+
+/*
+ * order should be the same as in typedef enum {} TParserState!!
+ */
+
+static const TParserStateAction Actions[] = {
+ { TPS_Base, actionTPS_Base },
+ { TPS_InUWord, actionTPS_InUWord },
+ { TPS_InLatWord, actionTPS_InLatWord },
+ { TPS_InCyrWord, actionTPS_InCyrWord },
+ { TPS_InUnsignedInt, actionTPS_InUnsignedInt },
+ { TPS_InSignedIntFirst, actionTPS_InSignedIntFirst },
+ { TPS_InSignedInt, actionTPS_InSignedInt },
+ { TPS_InSpace, actionTPS_InSpace },
+ { TPS_InUDecimalFirst, actionTPS_InUDecimalFirst },
+ { TPS_InUDecimal, actionTPS_InUDecimal },
+ { TPS_InDecimalFirst, actionTPS_InDecimalFirst },
+ { TPS_InDecimal, actionTPS_InDecimal },
+ { TPS_InVersionFirst, actionTPS_InVersionFirst },
+ { TPS_InVersion, actionTPS_InVersion },
+ { TPS_InMantissaFirst, actionTPS_InMantissaFirst },
+ { TPS_InMantissaSign, actionTPS_InMantissaSign },
+ { TPS_InMantissa, actionTPS_InMantissa },
+ { TPS_InHTMLEntityFirst, actionTPS_InHTMLEntityFirst },
+ { TPS_InHTMLEntity, actionTPS_InHTMLEntity },
+ { TPS_InHTMLEntityNumFirst, actionTPS_InHTMLEntityNumFirst },
+ { TPS_InHTMLEntityNum, actionTPS_InHTMLEntityNum },
+ { TPS_InHTMLEntityEnd, actionTPS_InHTMLEntityEnd },
+ { TPS_InTagFirst, actionTPS_InTagFirst },
+ { TPS_InTagCloseFirst, actionTPS_InTagCloseFirst },
+ { TPS_InTag, actionTPS_InTag },
+ { TPS_InTagEscapeK, actionTPS_InTagEscapeK },
+ { TPS_InTagEscapeKK, actionTPS_InTagEscapeKK },
+ { TPS_InTagBackSleshed, actionTPS_InTagBackSleshed },
+ { TPS_InTagEnd, actionTPS_InTagEnd },
+ { TPS_InCommentFirst, actionTPS_InCommentFirst },
+ { TPS_InCommentLast, actionTPS_InCommentLast },
+ { TPS_InComment, actionTPS_InComment },
+ { TPS_InCloseCommentFirst, actionTPS_InCloseCommentFirst },
+ { TPS_InCloseCommentLast, actionTPS_InCloseCommentLast },
+ { TPS_InCommentEnd, actionTPS_InCommentEnd },
+ { TPS_InHostFirstDomen, actionTPS_InHostFirstDomen },
+ { TPS_InHostDomenSecond, actionTPS_InHostDomenSecond },
+ { TPS_InHostDomen, actionTPS_InHostDomen },
+ { TPS_InPortFirst, actionTPS_InPortFirst },
+ { TPS_InPort, actionTPS_InPort },
+ { TPS_InHostFirstAN, actionTPS_InHostFirstAN },
+ { TPS_InHost, actionTPS_InHost },
+ { TPS_InEmail, actionTPS_InEmail },
+ { TPS_InFileFirst, actionTPS_InFileFirst },
+ { TPS_InFile, actionTPS_InFile },
+ { TPS_InFileNext, actionTPS_InFileNext },
+ { TPS_InURIFirst, actionTPS_InURIFirst },
+ { TPS_InURIStart, actionTPS_InURIStart },
+ { TPS_InURI, actionTPS_InURI },
+ { TPS_InFURL, actionTPS_InFURL },
+ { TPS_InProtocolFirst, actionTPS_InProtocolFirst },
+ { TPS_InProtocolSecond, actionTPS_InProtocolSecond },
+ { TPS_InProtocolEnd, actionTPS_InProtocolEnd },
+ { TPS_InHyphenLatWordFirst, actionTPS_InHyphenLatWordFirst },
+ { TPS_InHyphenLatWord, actionTPS_InHyphenLatWord },
+ { TPS_InHyphenCyrWordFirst, actionTPS_InHyphenCyrWordFirst },
+ { TPS_InHyphenCyrWord, actionTPS_InHyphenCyrWord },
+ { TPS_InHyphenUWordFirst, actionTPS_InHyphenUWordFirst },
+ { TPS_InHyphenUWord, actionTPS_InHyphenUWord },
+ { TPS_InHyphenValueFirst, actionTPS_InHyphenValueFirst },
+ { TPS_InHyphenValue, actionTPS_InHyphenValue },
+ { TPS_InHyphenValueExact, actionTPS_InHyphenValueExact },
+ { TPS_InParseHyphen, actionTPS_InParseHyphen },
+ { TPS_InParseHyphenHyphen, actionTPS_InParseHyphenHyphen },
+ { TPS_InHyphenCyrWordPart, actionTPS_InHyphenCyrWordPart },
+ { TPS_InHyphenLatWordPart, actionTPS_InHyphenLatWordPart },
+ { TPS_InHyphenUWordPart, actionTPS_InHyphenUWordPart },
+ { TPS_InHyphenUnsignedInt, actionTPS_InHyphenUnsignedInt },
+ { TPS_InHDecimalPartFirst, actionTPS_InHDecimalPartFirst },
+ { TPS_InHDecimalPart, actionTPS_InHDecimalPart },
+ { TPS_InHVersionPartFirst, actionTPS_InHVersionPartFirst },
+ { TPS_InHVersionPart, actionTPS_InHVersionPart },
+ { TPS_Null, NULL }
+};
+
+
+bool
+TParserGet( TParser *prs ) {
+ TParserStateActionItem *item=NULL;
+
+ if ( prs->state->posbyte >= prs->lenstr )
+ return false;
+
+ Assert( prs->state );
+ prs->lexeme = prs->str + prs->state->posbyte;
+ prs->state->pushedAtAction = NULL;
+
+ /* look at string */
+ while (prs->state->posbyte <= prs->lenstr) {
+ if ( prs->state->posbyte == prs->lenstr )
+ prs->state->charlen = 0;
+ else
+ prs->state->charlen = ( prs->charmaxlen == 1 ) ? prs->charmaxlen :
+ pg_mblen( prs->str + prs->state->posbyte );
+
+ Assert( prs->state->posbyte + prs->state->charlen <= prs->lenstr );
+ Assert( prs->state->state >=TPS_Base && prs->state->state < TPS_Null );
+ Assert( Actions[ prs->state->state ].state == prs->state->state );
+
+ item = Actions[ prs->state->state ].action;
+ Assert(item!=NULL);
+
+ if ( item < prs->state->pushedAtAction )
+ item = prs->state->pushedAtAction;
+
+ /* find action by character class */
+ while( item->isclass ) {
+ prs->c = item->c;
+ if ( item->isclass(prs)!=0 ) {
+ if ( item > prs->state->pushedAtAction ) /* remember: after pushing we were by false way */
+ break;
+ }
+ item++;
+ }
+
+ prs->state->pushedAtAction = NULL;
+
+ /* call special handler if exists */
+ if ( item->special )
+ item->special(prs);
+
+ /* BINGO, lexeme is found */
+ if ( item->flags & A_BINGO ) {
+ Assert( item->type>0 );
+ prs->lenbytelexeme = prs->state->lenbytelexeme;
+ prs->lencharlexeme = prs->state->lencharlexeme;
+ prs->state->lenbytelexeme = prs->state->lencharlexeme = 0;
+ prs->type = item->type;
+ }
+
+ /* do various actions by flags */
+ if ( item->flags & A_POP ) { /* pop stored state in stack */
+ TParserPosition *ptr = prs->state->prev;
+ pfree( prs->state );
+ prs->state = ptr;
+ Assert( prs->state );
+ } else if ( item->flags & A_PUSH ) { /* push (store) state in stack */
+ prs->state->pushedAtAction = item; /* remember where we push */
+ prs->state = newTParserPosition( prs->state );
+ } else if ( item->flags & A_CLEAR ) { /* clear previous pushed state */
+ TParserPosition *ptr;
+ Assert( prs->state->prev );
+ ptr = prs->state->prev->prev;
+ pfree( prs->state->prev );
+ prs->state->prev = ptr;
+ } else if ( item->flags & A_CLRALL ) { /* clear all previous pushed state */
+ TParserPosition *ptr;
+ while( prs->state->prev ) {
+ ptr = prs->state->prev->prev;
+ pfree( prs->state->prev );
+ prs->state->prev = ptr;
+ }
+ } else if ( item->flags & A_MERGE ) { /* merge posinfo with current and pushed state */
+ TParserPosition *ptr = prs->state;
+ Assert( prs->state->prev );
+ prs->state = prs->state->prev;
+
+ prs->state->posbyte = ptr->posbyte;
+ prs->state->poschar = ptr->poschar;
+ prs->state->charlen = ptr->charlen;
+ prs->state->lenbytelexeme = ptr->lenbytelexeme;
+ prs->state->lencharlexeme = ptr->lencharlexeme;
+ pfree(ptr);
+ }
+
+ /* set new state if pointed */
+ if ( item->tostate != TPS_Null )
+ prs->state->state = item->tostate;
+
+ /* check for go away */
+ if ( (item->flags & A_BINGO) || (prs->state->posbyte >= prs->lenstr && (item->flags & A_RERUN)==0 ) )
+ break;
+
+ /* go to begining of loop if we should rerun or we just restore state */
+ if ( item->flags & ( A_RERUN | A_POP ) )
+ continue;
+
+ /* move forward */
+ if ( prs->state->charlen ) {
+ prs->state->posbyte += prs->state->charlen;
+ prs->state->lenbytelexeme += prs->state->charlen;
+ prs->state->poschar ++;
+ prs->state->lencharlexeme ++;
+ }
+ }
+
+ return (item && (item->flags & A_BINGO)) ? true : false;
+}
+
+
diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h
index 3f0e0cd6359..ee5b3b7ab54 100644
--- a/contrib/tsearch2/wordparser/parser.h
+++ b/contrib/tsearch2/wordparser/parser.h
@@ -1,10 +1,147 @@
#ifndef __PARSER_H__
#define __PARSER_H__
-extern char *token;
-extern int tokenlen;
-int tsearch2_yylex(void);
-void tsearch2_start_parse_str(char *, int);
-void tsearch2_end_parse(void);
+#include <ctype.h>
+#include <limits.h>
+#include "ts_locale.h"
+
+typedef enum {
+ TPS_Base = 0,
+ TPS_InUWord,
+ TPS_InLatWord,
+ TPS_InCyrWord,
+ TPS_InUnsignedInt,
+ TPS_InSignedIntFirst,
+ TPS_InSignedInt,
+ TPS_InSpace,
+ TPS_InUDecimalFirst,
+ TPS_InUDecimal,
+ TPS_InDecimalFirst,
+ TPS_InDecimal,
+ TPS_InVersionFirst,
+ TPS_InVersion,
+ TPS_InMantissaFirst,
+ TPS_InMantissaSign,
+ TPS_InMantissa,
+ TPS_InHTMLEntityFirst,
+ TPS_InHTMLEntity,
+ TPS_InHTMLEntityNumFirst,
+ TPS_InHTMLEntityNum,
+ TPS_InHTMLEntityEnd,
+ TPS_InTagFirst,
+ TPS_InTagCloseFirst,
+ TPS_InTag,
+ TPS_InTagEscapeK,
+ TPS_InTagEscapeKK,
+ TPS_InTagBackSleshed,
+ TPS_InTagEnd,
+ TPS_InCommentFirst,
+ TPS_InCommentLast,
+ TPS_InComment,
+ TPS_InCloseCommentFirst,
+ TPS_InCloseCommentLast,
+ TPS_InCommentEnd,
+ TPS_InHostFirstDomen,
+ TPS_InHostDomenSecond,
+ TPS_InHostDomen,
+ TPS_InPortFirst,
+ TPS_InPort,
+ TPS_InHostFirstAN,
+ TPS_InHost,
+ TPS_InEmail,
+ TPS_InFileFirst,
+ TPS_InFile,
+ TPS_InFileNext,
+ TPS_InURIFirst,
+ TPS_InURIStart,
+ TPS_InURI,
+ TPS_InFURL,
+ TPS_InProtocolFirst,
+ TPS_InProtocolSecond,
+ TPS_InProtocolEnd,
+ TPS_InHyphenLatWordFirst,
+ TPS_InHyphenLatWord,
+ TPS_InHyphenCyrWordFirst,
+ TPS_InHyphenCyrWord,
+ TPS_InHyphenUWordFirst,
+ TPS_InHyphenUWord,
+ TPS_InHyphenValueFirst,
+ TPS_InHyphenValue,
+ TPS_InHyphenValueExact,
+ TPS_InParseHyphen,
+ TPS_InParseHyphenHyphen,
+ TPS_InHyphenCyrWordPart,
+ TPS_InHyphenLatWordPart,
+ TPS_InHyphenUWordPart,
+ TPS_InHyphenUnsignedInt,
+ TPS_InHDecimalPartFirst,
+ TPS_InHDecimalPart,
+ TPS_InHVersionPartFirst,
+ TPS_InHVersionPart,
+ TPS_Null /* last state (fake value) */
+} TParserState;
+
+/* forward declaration */
+struct TParser;
+
+
+typedef int (*TParserCharTest)(struct TParser*); /* any p_is* functions except p_iseq */
+typedef void (*TParserSpecial)(struct TParser*); /* special handler for special cases... */
+
+typedef struct {
+ TParserCharTest isclass;
+ char c;
+ uint16 flags;
+ TParserState tostate;
+ int type;
+ TParserSpecial special;
+} TParserStateActionItem;
+
+typedef struct {
+ TParserState state;
+ TParserStateActionItem *action;
+} TParserStateAction;
+
+typedef struct TParserPosition {
+ int posbyte; /* position of parser in bytes */
+ int poschar; /* osition of parser in characters */
+ int charlen; /* length of current char */
+ int lenbytelexeme;
+ int lencharlexeme;
+ TParserState state;
+ struct TParserPosition *prev;
+ int flags;
+ TParserStateActionItem *pushedAtAction;
+} TParserPosition;
+
+typedef struct TParser {
+ /* string and position information */
+ char *str; /* multibyte string */
+ int lenstr; /* length of mbstring */
+ wchar_t *wstr; /* wide character string */
+ int lenwstr; /* length of wsting */
+
+ /* State of parse */
+ int charmaxlen;
+ bool usewide;
+ TParserPosition *state;
+ bool ignore;
+ bool wanthost;
+
+ /* silly char */
+ char c;
+
+ /* out */
+ char *lexeme;
+ int lenbytelexeme;
+ int lencharlexeme;
+ int type;
+
+} TParser;
+
+
+TParser* TParserInit( char *, int );
+bool TParserGet( TParser* );
+void TParserClose( TParser* );
#endif
diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l
deleted file mode 100644
index a7cb4684c32..00000000000
--- a/contrib/tsearch2/wordparser/parser.l
+++ /dev/null
@@ -1,346 +0,0 @@
-%{
-#include "postgres.h"
-
-#include "deflex.h"
-#include "parser.h"
-#include "common.h"
-
-/* Avoid exit() on fatal scanner errors */
-#undef fprintf
-#define fprintf(file, fmt, msg) ts_error(ERROR, fmt, msg)
-
-char *token = NULL; /* pointer to token */
-int tokenlen;
-static char *s = NULL; /* to return WHOLE hyphenated-word */
-
-YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
-
-typedef struct {
- int tlen;
- int clen;
- char *str;
-} TagStorage;
-
-static TagStorage ts={0,0,NULL};
-
-static void
-addTag(void)
-{
- while( ts.clen+tsearch2_yyleng+1 > ts.tlen ) {
- ts.tlen*=2;
- ts.str=realloc(ts.str,ts.tlen);
- if (!ts.str)
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
- }
- memcpy(ts.str+ts.clen,tsearch2_yytext,tsearch2_yyleng);
- ts.clen+=tsearch2_yyleng;
- ts.str[ts.clen]='\0';
-}
-
-static void
-startTag(void)
-{
- if ( ts.str==NULL ) {
- ts.tlen=tsearch2_yyleng+1;
- ts.str=malloc(ts.tlen);
- if (!ts.str)
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
- }
- ts.clen=0;
- ts.str[0]='\0';
- addTag();
-}
-
-%}
-
-%option 8bit
-%option never-interactive
-%option nodefault
-%option nounput
-%option noyywrap
-
-/* parser's state for parsing hyphenated-word */
-%x DELIM
-/* parser's state for parsing URL*/
-%x URL
-%x SERVER
-
-/* parser's state for parsing TAGS */
-%x INTAG
-%x QINTAG
-%x INCOMMENT
-%x INSCRIPT
-
-/* cyrillic koi8 char */
-CYRALNUM [0-9\200-\377]
-CYRALPHA [\200-\377]
-ALPHA [a-zA-Z\200-\377]
-ALNUM [0-9a-zA-Z\200-\377]
-
-
-HOSTNAME ([-_[:alnum:]]+\.)+[[:alpha:]]+
-URI [-_[:alnum:]/%,\.;=&?#]+
-
-%%
-
-"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; startTag(); }
-
-<INSCRIPT>"</"[Ss][Cc][Rr][Ii][Pp][Tt]">" {
- BEGIN INITIAL;
- addTag();
- token = ts.str;
- tokenlen = ts.clen;
- return TAG;
-}
-
-"<!--" { BEGIN INCOMMENT; startTag(); }
-
-<INCOMMENT>"-->" {
- BEGIN INITIAL;
- addTag();
- token = ts.str;
- tokenlen = ts.clen;
- return TAG;
-}
-
-
-"<"[\![:alpha:]] { BEGIN INTAG; startTag(); }
-
-"</"[[:alpha:]] { BEGIN INTAG; startTag(); }
-
-<INTAG>"\"" { BEGIN QINTAG; addTag(); }
-
-<QINTAG>"\\\"" { addTag(); }
-
-<QINTAG>"\"" { BEGIN INTAG; addTag(); }
-
-<INTAG>">" {
- BEGIN INITIAL;
- addTag();
- token = ts.str;
- tokenlen = ts.clen;
- return TAG;
-}
-
-<QINTAG,INTAG,INCOMMENT,INSCRIPT>.|\n { addTag(); }
-
-\&(quot|amp|nbsp|lt|gt)\; {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return HTMLENTITY;
-}
-
-\&\#[0-9][0-9]?[0-9]?\; {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return HTMLENTITY;
-}
-
-[-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return EMAIL;
-}
-
-[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+ /* float */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return SCIENTIFIC;
-}
-
-[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return VERSIONNUMBER;
-}
-
-[+-]?[0-9]+\.[0-9]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return DECIMAL;
-}
-
-[+-][0-9]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return SIGNEDINT;
-}
-
-<DELIM,INITIAL>[0-9]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return UNSIGNEDINT;
-}
-
-http"://" {
- BEGIN URL;
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return HTTP;
-}
-
-ftp"://" {
- BEGIN URL;
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return HTTP;
-}
-
-<URL,INITIAL>{HOSTNAME}[/:]{URI} {
- BEGIN SERVER;
- if (s) { free(s); s=NULL; }
- s = strdup( tsearch2_yytext );
- tokenlen = tsearch2_yyleng;
- yyless( 0 );
- token = s;
- return FURL;
-}
-
-<SERVER,URL,INITIAL>{HOSTNAME} {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return HOST;
-}
-
-<SERVER>[/:]{URI} {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return URI;
-}
-
-[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return FILEPATH;
-}
-
-({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */ {
- BEGIN DELIM;
- if (s) { free(s); s=NULL; }
- s = strdup( tsearch2_yytext );
- tokenlen = tsearch2_yyleng;
- yyless( 0 );
- token = s;
- return CYRHYPHENWORD;
-}
-
-([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */ {
- BEGIN DELIM;
- if (s) { free(s); s=NULL; }
- s = strdup( tsearch2_yytext );
- tokenlen = tsearch2_yyleng;
- yyless( 0 );
- token = s;
- return LATHYPHENWORD;
-}
-
-({ALNUM}+-)+{ALNUM}+ /* composite-word */ {
- BEGIN DELIM;
- if (s) { free(s); s=NULL; }
- s = strdup( tsearch2_yytext );
- tokenlen = tsearch2_yyleng;
- yyless( 0 );
- token = s;
- return HYPHENWORD;
-}
-
-<DELIM>[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return VERSIONNUMBER;
-}
-
-<DELIM>\+?[0-9]+\.[0-9]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return DECIMAL;
-}
-
-<DELIM>{CYRALPHA}+ /* one word in composite-word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return CYRPARTHYPHENWORD;
-}
-
-<DELIM>[[:alpha:]]+ /* one word in composite-word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return LATPARTHYPHENWORD;
-}
-
-<DELIM>{ALNUM}+ /* one word in composite-word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return PARTHYPHENWORD;
-}
-
-<DELIM>- {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return SPACE;
-}
-
-<DELIM,SERVER,URL>.|\n /* return in basic state */ {
- BEGIN INITIAL;
- yyless( 0 );
-}
-
-{CYRALPHA}+ /* normal word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return CYRWORD;
-}
-
-[[:alpha:]]+ /* normal word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return LATWORD;
-}
-
-{ALNUM}+ /* normal word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return UWORD;
-}
-
-[ \r\n\t]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return SPACE;
-}
-
-. {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return SPACE;
-}
-
-%%
-
-/* clearing after parsing from string */
-void
-tsearch2_end_parse(void)
-{
- if (s)
- {
- free(s);
- s = NULL;
- }
- tsearch2_yy_delete_buffer( buf );
- buf = NULL;
-}
-
-/* start parse from string */
-void
-tsearch2_start_parse_str(char* str, int limit)
-{
- if (buf)
- tsearch2_end_parse();
- buf = tsearch2_yy_scan_bytes( str, limit );
- tsearch2_yy_switch_to_buffer( buf );
- BEGIN INITIAL;
-}
diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c
index 66862578872..897ff2795e2 100644
--- a/contrib/tsearch2/wparser_def.c
+++ b/contrib/tsearch2/wparser_def.c
@@ -39,8 +39,7 @@ Datum prsd_start(PG_FUNCTION_ARGS);
Datum
prsd_start(PG_FUNCTION_ARGS)
{
- tsearch2_start_parse_str((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1));
- PG_RETURN_POINTER(NULL);
+ PG_RETURN_POINTER(TParserInit( (char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1)));
}
PG_FUNCTION_INFO_V1(prsd_getlexeme);
@@ -48,14 +47,17 @@ Datum prsd_getlexeme(PG_FUNCTION_ARGS);
Datum
prsd_getlexeme(PG_FUNCTION_ARGS)
{
- /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
+ TParser *p=(TParser*)PG_GETARG_POINTER(0);
char **t = (char **) PG_GETARG_POINTER(1);
int *tlen = (int *) PG_GETARG_POINTER(2);
- int type = tsearch2_yylex();
- *t = token;
- *tlen = tokenlen;
- PG_RETURN_INT32(type);
+ if ( !TParserGet(p) )
+ PG_RETURN_INT32(0);
+
+ *t = p->lexeme;
+ *tlen = p->lenbytelexeme;
+
+ PG_RETURN_INT32(p->type);
}
PG_FUNCTION_INFO_V1(prsd_end);
@@ -63,8 +65,8 @@ Datum prsd_end(PG_FUNCTION_ARGS);
Datum
prsd_end(PG_FUNCTION_ARGS)
{
- /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
- tsearch2_end_parse();
+ TParser *p=(TParser*)PG_GETARG_POINTER(0);
+ TParserClose(p);
PG_RETURN_VOID();
}