diff options
Diffstat (limited to 'src/backend/utils/adt/tsrank.c')
-rw-r--r-- | src/backend/utils/adt/tsrank.c | 286 |
1 files changed, 159 insertions, 127 deletions
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c index 065c94d2097..d23e05e9939 100644 --- a/src/backend/utils/adt/tsrank.c +++ b/src/backend/utils/adt/tsrank.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.12 2008/01/01 19:45:53 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.13 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -71,45 +71,60 @@ cnt_length(TSVector t) return len; } -static int -WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item) -{ - if (ptr->len == item->length) - return strncmp( - eval + ptr->pos, - qval + item->distance, - item->length); - return (ptr->len > item->length) ? 1 : -1; -} +#define WordECompareQueryItem(e,q,p,i,m) \ + tsCompareString((q) + (i)->distance, (i)->length, \ + (e) + (p)->pos, (p)->len, (m)) + /* - * Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q' - * is the TSQuery containing 'item'. Returns NULL if not found. + * Returns a pointer to a WordEntry's array corresponding to 'item' from + * tsvector 't'. 'q' is the TSQuery containing 'item'. + * Returns NULL if not found. */ static WordEntry * -find_wordentry(TSVector t, TSQuery q, QueryOperand *item) +find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem) { WordEntry *StopLow = ARRPTR(t); WordEntry *StopHigh = (WordEntry *) STRPTR(t); - WordEntry *StopMiddle; + WordEntry *StopMiddle = StopHigh; int difference; - /* Loop invariant: StopLow <= item < StopHigh */ + *nitem=0; + /* Loop invariant: StopLow <= item < StopHigh */ while (StopLow < StopHigh) { StopMiddle = StopLow + (StopHigh - StopLow) / 2; - difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item); + difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false); if (difference == 0) - return StopMiddle; - else if (difference < 0) + { + StopHigh = StopMiddle; + *nitem=1; + break; + } + else if (difference > 0) StopLow = StopMiddle + 1; else StopHigh = StopMiddle; } - return NULL; + if ( item->prefix == true ) + { + if ( StopLow >= StopHigh ) + StopMiddle = StopHigh; + + *nitem=0; + + while( StopMiddle < (WordEntry *) STRPTR(t) && + WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0 ) + { + (*nitem)++; + StopMiddle++; + } + } + + return ( *nitem > 0 ) ? StopHigh : NULL; } @@ -123,12 +138,9 @@ compareQueryOperand(const void *a, const void *b, void *arg) QueryOperand *qa = (*(QueryOperand **) a); QueryOperand *qb = (*(QueryOperand **) b); - if (qa->length == qb->length) - return strncmp(operand + qa->distance, - operand + qb->distance, - qb->length); - - return (qa->length > qb->length) ? 1 : -1; + return tsCompareString(operand + qa->distance, qa->length, + operand + qb->distance, qb->length, + false); } /* @@ -198,12 +210,14 @@ calc_rank_and(float *w, TSVector t, TSQuery q) k, l, p; - WordEntry *entry; + WordEntry *entry, + *firstentry; WordEntryPos *post, *ct; int4 dimt, lenct, - dist; + dist, + nitem; float res = -1.0; QueryOperand **item; int size = q->size; @@ -219,40 +233,44 @@ calc_rank_and(float *w, TSVector t, TSQuery q) for (i = 0; i < size; i++) { - entry = find_wordentry(t, q, item[i]); + firstentry = entry = find_wordentry(t, q, item[i], &nitem); if (!entry) continue; - if (entry->haspos) - pos[i] = _POSVECPTR(t, entry); - else - pos[i] = &POSNULL; - - - dimt = pos[i]->npos; - post = pos[i]->pos; - for (k = 0; k < i; k++) + while( entry - firstentry < nitem ) { - if (!pos[k]) - continue; - lenct = pos[k]->npos; - ct = pos[k]->pos; - for (l = 0; l < dimt; l++) + if (entry->haspos) + pos[i] = _POSVECPTR(t, entry); + else + pos[i] = &POSNULL; + + dimt = pos[i]->npos; + post = pos[i]->pos; + for (k = 0; k < i; k++) { - for (p = 0; p < lenct; p++) + if (!pos[k]) + continue; + lenct = pos[k]->npos; + ct = pos[k]->pos; + for (l = 0; l < dimt; l++) { - dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); - if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL))) + for (p = 0; p < lenct; p++) { - float curw; - - if (!dist) - dist = MAXENTRYPOS; - curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist)); - res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw); + dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); + if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL))) + { + float curw; + + if (!dist) + dist = MAXENTRYPOS; + curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist)); + res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw); + } } } } + + entry++; } } pfree(pos); @@ -263,11 +281,13 @@ calc_rank_and(float *w, TSVector t, TSQuery q) static float calc_rank_or(float *w, TSVector t, TSQuery q) { - WordEntry *entry; + WordEntry *entry, + *firstentry; WordEntryPos *post; int4 dimt, j, - i; + i, + nitem; float res = 0.0; QueryOperand **item; int size = q->size; @@ -280,41 +300,46 @@ calc_rank_or(float *w, TSVector t, TSQuery q) wjm; int4 jm; - entry = find_wordentry(t, q, item[i]); + firstentry = entry = find_wordentry(t, q, item[i], &nitem); if (!entry) continue; - if (entry->haspos) - { - dimt = POSDATALEN(t, entry); - post = POSDATAPTR(t, entry); - } - else + while( entry - firstentry < nitem ) { - dimt = POSNULL.npos; - post = POSNULL.pos; - } + if (entry->haspos) + { + dimt = POSDATALEN(t, entry); + post = POSDATAPTR(t, entry); + } + else + { + dimt = POSNULL.npos; + post = POSNULL.pos; + } - resj = 0.0; - wjm = -1.0; - jm = 0; - for (j = 0; j < dimt; j++) - { - resj = resj + wpos(post[j]) / ((j + 1) * (j + 1)); - if (wpos(post[j]) > wjm) + resj = 0.0; + wjm = -1.0; + jm = 0; + for (j = 0; j < dimt; j++) { - wjm = wpos(post[j]); - jm = j; + resj = resj + wpos(post[j]) / ((j + 1) * (j + 1)); + if (wpos(post[j]) > wjm) + { + wjm = wpos(post[j]); + jm = j; + } } - } /* - limit (sum(i/i^2),i->inf) = pi^2/6 - resj = sum(wi/i^2),i=1,noccurence, - wi - should be sorted desc, - don't sort for now, just choose maximum weight. This should be corrected - Oleg Bartunov + limit (sum(i/i^2),i->inf) = pi^2/6 + resj = sum(wi/i^2),i=1,noccurence, + wi - should be sorted desc, + don't sort for now, just choose maximum weight. This should be corrected + Oleg Bartunov */ - res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685; + res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685; + + entry++; + } } if (size > 0) res = res / size; @@ -594,11 +619,13 @@ static DocRepresentation * get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) { QueryItem *item = GETQUERY(qr->query); - WordEntry *entry; + WordEntry *entry, + *firstentry; WordEntryPos *post; int4 dimt, j, - i; + i, + nitem; int len = qr->query->size * 4, cur = 0; DocRepresentation *doc; @@ -619,63 +646,68 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) if (QR_GET_OPERAND_EXISTS(qr, &item[i])) continue; - entry = find_wordentry(txt, qr->query, curoperand); + firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem); if (!entry) continue; - if (entry->haspos) - { - dimt = POSDATALEN(txt, entry); - post = POSDATAPTR(txt, entry); - } - else - { - dimt = POSNULL.npos; - post = POSNULL.pos; - } - - while (cur + dimt >= len) + while( entry - firstentry < nitem ) { - len *= 2; - doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); - } - - for (j = 0; j < dimt; j++) - { - if (j == 0) + if (entry->haspos) + { + dimt = POSDATALEN(txt, entry); + post = POSDATAPTR(txt, entry); + } + else { - int k; + dimt = POSNULL.npos; + post = POSNULL.pos; + } - doc[cur].nitem = 0; - doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size); + while (cur + dimt >= len) + { + len *= 2; + doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); + } - for (k = 0; k < qr->query->size; k++) + for (j = 0; j < dimt; j++) + { + if (j == 0) { - QueryOperand *kptr = &item[k].operand; - QueryOperand *iptr = &item[i].operand; - - if (k == i || - (item[k].type == QI_VAL && - compareQueryOperand(&kptr, &iptr, operand) == 0)) + int k; + + doc[cur].nitem = 0; + doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size); + + for (k = 0; k < qr->query->size; k++) { - /* - * if k == i, we've already checked above that it's - * type == Q_VAL - */ - doc[cur].item[doc[cur].nitem] = item + k; - doc[cur].nitem++; - QR_SET_OPERAND_EXISTS(qr, item + k); + QueryOperand *kptr = &item[k].operand; + QueryOperand *iptr = &item[i].operand; + + if (k == i || + (item[k].type == QI_VAL && + compareQueryOperand(&kptr, &iptr, operand) == 0)) + { + /* + * if k == i, we've already checked above that it's + * type == Q_VAL + */ + doc[cur].item[doc[cur].nitem] = item + k; + doc[cur].nitem++; + QR_SET_OPERAND_EXISTS(qr, item + k); + } } } + else + { + doc[cur].nitem = doc[cur - 1].nitem; + doc[cur].item = doc[cur - 1].item; + } + doc[cur].pos = WEP_GETPOS(post[j]); + doc[cur].wclass = WEP_GETWEIGHT(post[j]); + cur++; } - else - { - doc[cur].nitem = doc[cur - 1].nitem; - doc[cur].item = doc[cur - 1].item; - } - doc[cur].pos = WEP_GETPOS(post[j]); - doc[cur].wclass = WEP_GETWEIGHT(post[j]); - cur++; + + entry++; } } |