aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/tsrank.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/tsrank.c')
-rw-r--r--src/backend/utils/adt/tsrank.c286
1 files changed, 159 insertions, 127 deletions
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 065c94d2097..d23e05e9939 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.12 2008/01/01 19:45:53 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.13 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -71,45 +71,60 @@ cnt_length(TSVector t)
return len;
}
-static int
-WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
-{
- if (ptr->len == item->length)
- return strncmp(
- eval + ptr->pos,
- qval + item->distance,
- item->length);
- return (ptr->len > item->length) ? 1 : -1;
-}
+#define WordECompareQueryItem(e,q,p,i,m) \
+ tsCompareString((q) + (i)->distance, (i)->length, \
+ (e) + (p)->pos, (p)->len, (m))
+
/*
- * Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q'
- * is the TSQuery containing 'item'. Returns NULL if not found.
+ * Returns a pointer to a WordEntry's array corresponding to 'item' from
+ * tsvector 't'. 'q' is the TSQuery containing 'item'.
+ * Returns NULL if not found.
*/
static WordEntry *
-find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
+find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
{
WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
- WordEntry *StopMiddle;
+ WordEntry *StopMiddle = StopHigh;
int difference;
- /* Loop invariant: StopLow <= item < StopHigh */
+ *nitem=0;
+ /* Loop invariant: StopLow <= item < StopHigh */
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
- difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+ difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
if (difference == 0)
- return StopMiddle;
- else if (difference < 0)
+ {
+ StopHigh = StopMiddle;
+ *nitem=1;
+ break;
+ }
+ else if (difference > 0)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
- return NULL;
+ if ( item->prefix == true )
+ {
+ if ( StopLow >= StopHigh )
+ StopMiddle = StopHigh;
+
+ *nitem=0;
+
+ while( StopMiddle < (WordEntry *) STRPTR(t) &&
+ WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0 )
+ {
+ (*nitem)++;
+ StopMiddle++;
+ }
+ }
+
+ return ( *nitem > 0 ) ? StopHigh : NULL;
}
@@ -123,12 +138,9 @@ compareQueryOperand(const void *a, const void *b, void *arg)
QueryOperand *qa = (*(QueryOperand **) a);
QueryOperand *qb = (*(QueryOperand **) b);
- if (qa->length == qb->length)
- return strncmp(operand + qa->distance,
- operand + qb->distance,
- qb->length);
-
- return (qa->length > qb->length) ? 1 : -1;
+ return tsCompareString(operand + qa->distance, qa->length,
+ operand + qb->distance, qb->length,
+ false);
}
/*
@@ -198,12 +210,14 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
k,
l,
p;
- WordEntry *entry;
+ WordEntry *entry,
+ *firstentry;
WordEntryPos *post,
*ct;
int4 dimt,
lenct,
- dist;
+ dist,
+ nitem;
float res = -1.0;
QueryOperand **item;
int size = q->size;
@@ -219,40 +233,44 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
for (i = 0; i < size; i++)
{
- entry = find_wordentry(t, q, item[i]);
+ firstentry = entry = find_wordentry(t, q, item[i], &nitem);
if (!entry)
continue;
- if (entry->haspos)
- pos[i] = _POSVECPTR(t, entry);
- else
- pos[i] = &POSNULL;
-
-
- dimt = pos[i]->npos;
- post = pos[i]->pos;
- for (k = 0; k < i; k++)
+ while( entry - firstentry < nitem )
{
- if (!pos[k])
- continue;
- lenct = pos[k]->npos;
- ct = pos[k]->pos;
- for (l = 0; l < dimt; l++)
+ if (entry->haspos)
+ pos[i] = _POSVECPTR(t, entry);
+ else
+ pos[i] = &POSNULL;
+
+ dimt = pos[i]->npos;
+ post = pos[i]->pos;
+ for (k = 0; k < i; k++)
{
- for (p = 0; p < lenct; p++)
+ if (!pos[k])
+ continue;
+ lenct = pos[k]->npos;
+ ct = pos[k]->pos;
+ for (l = 0; l < dimt; l++)
{
- dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
- if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
+ for (p = 0; p < lenct; p++)
{
- float curw;
-
- if (!dist)
- dist = MAXENTRYPOS;
- curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
- res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
+ dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
+ if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
+ {
+ float curw;
+
+ if (!dist)
+ dist = MAXENTRYPOS;
+ curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
+ res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
+ }
}
}
}
+
+ entry++;
}
}
pfree(pos);
@@ -263,11 +281,13 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
static float
calc_rank_or(float *w, TSVector t, TSQuery q)
{
- WordEntry *entry;
+ WordEntry *entry,
+ *firstentry;
WordEntryPos *post;
int4 dimt,
j,
- i;
+ i,
+ nitem;
float res = 0.0;
QueryOperand **item;
int size = q->size;
@@ -280,41 +300,46 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
wjm;
int4 jm;
- entry = find_wordentry(t, q, item[i]);
+ firstentry = entry = find_wordentry(t, q, item[i], &nitem);
if (!entry)
continue;
- if (entry->haspos)
- {
- dimt = POSDATALEN(t, entry);
- post = POSDATAPTR(t, entry);
- }
- else
+ while( entry - firstentry < nitem )
{
- dimt = POSNULL.npos;
- post = POSNULL.pos;
- }
+ if (entry->haspos)
+ {
+ dimt = POSDATALEN(t, entry);
+ post = POSDATAPTR(t, entry);
+ }
+ else
+ {
+ dimt = POSNULL.npos;
+ post = POSNULL.pos;
+ }
- resj = 0.0;
- wjm = -1.0;
- jm = 0;
- for (j = 0; j < dimt; j++)
- {
- resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
- if (wpos(post[j]) > wjm)
+ resj = 0.0;
+ wjm = -1.0;
+ jm = 0;
+ for (j = 0; j < dimt; j++)
{
- wjm = wpos(post[j]);
- jm = j;
+ resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
+ if (wpos(post[j]) > wjm)
+ {
+ wjm = wpos(post[j]);
+ jm = j;
+ }
}
- }
/*
- limit (sum(i/i^2),i->inf) = pi^2/6
- resj = sum(wi/i^2),i=1,noccurence,
- wi - should be sorted desc,
- don't sort for now, just choose maximum weight. This should be corrected
- Oleg Bartunov
+ limit (sum(i/i^2),i->inf) = pi^2/6
+ resj = sum(wi/i^2),i=1,noccurence,
+ wi - should be sorted desc,
+ don't sort for now, just choose maximum weight. This should be corrected
+ Oleg Bartunov
*/
- res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
+ res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
+
+ entry++;
+ }
}
if (size > 0)
res = res / size;
@@ -594,11 +619,13 @@ static DocRepresentation *
get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
{
QueryItem *item = GETQUERY(qr->query);
- WordEntry *entry;
+ WordEntry *entry,
+ *firstentry;
WordEntryPos *post;
int4 dimt,
j,
- i;
+ i,
+ nitem;
int len = qr->query->size * 4,
cur = 0;
DocRepresentation *doc;
@@ -619,63 +646,68 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
continue;
- entry = find_wordentry(txt, qr->query, curoperand);
+ firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
if (!entry)
continue;
- if (entry->haspos)
- {
- dimt = POSDATALEN(txt, entry);
- post = POSDATAPTR(txt, entry);
- }
- else
- {
- dimt = POSNULL.npos;
- post = POSNULL.pos;
- }
-
- while (cur + dimt >= len)
+ while( entry - firstentry < nitem )
{
- len *= 2;
- doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
- }
-
- for (j = 0; j < dimt; j++)
- {
- if (j == 0)
+ if (entry->haspos)
+ {
+ dimt = POSDATALEN(txt, entry);
+ post = POSDATAPTR(txt, entry);
+ }
+ else
{
- int k;
+ dimt = POSNULL.npos;
+ post = POSNULL.pos;
+ }
- doc[cur].nitem = 0;
- doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
+ while (cur + dimt >= len)
+ {
+ len *= 2;
+ doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
+ }
- for (k = 0; k < qr->query->size; k++)
+ for (j = 0; j < dimt; j++)
+ {
+ if (j == 0)
{
- QueryOperand *kptr = &item[k].operand;
- QueryOperand *iptr = &item[i].operand;
-
- if (k == i ||
- (item[k].type == QI_VAL &&
- compareQueryOperand(&kptr, &iptr, operand) == 0))
+ int k;
+
+ doc[cur].nitem = 0;
+ doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
+
+ for (k = 0; k < qr->query->size; k++)
{
- /*
- * if k == i, we've already checked above that it's
- * type == Q_VAL
- */
- doc[cur].item[doc[cur].nitem] = item + k;
- doc[cur].nitem++;
- QR_SET_OPERAND_EXISTS(qr, item + k);
+ QueryOperand *kptr = &item[k].operand;
+ QueryOperand *iptr = &item[i].operand;
+
+ if (k == i ||
+ (item[k].type == QI_VAL &&
+ compareQueryOperand(&kptr, &iptr, operand) == 0))
+ {
+ /*
+ * if k == i, we've already checked above that it's
+ * type == Q_VAL
+ */
+ doc[cur].item[doc[cur].nitem] = item + k;
+ doc[cur].nitem++;
+ QR_SET_OPERAND_EXISTS(qr, item + k);
+ }
}
}
+ else
+ {
+ doc[cur].nitem = doc[cur - 1].nitem;
+ doc[cur].item = doc[cur - 1].item;
+ }
+ doc[cur].pos = WEP_GETPOS(post[j]);
+ doc[cur].wclass = WEP_GETWEIGHT(post[j]);
+ cur++;
}
- else
- {
- doc[cur].nitem = doc[cur - 1].nitem;
- doc[cur].item = doc[cur - 1].item;
- }
- doc[cur].pos = WEP_GETPOS(post[j]);
- doc[cur].wclass = WEP_GETWEIGHT(post[j]);
- cur++;
+
+ entry++;
}
}