aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2006-10-01 22:25:48 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2006-10-01 22:25:48 +0000
commitdb2dcf58be1c15a5ce91396b3205fb982ee7a656 (patch)
tree3ee16bae0902641b07d0ccb830e6718f8a66554f /src
parent8e4fe3ba1ed8aa6bfa59b99555d67a864a2bee84 (diff)
downloadpostgresql-db2dcf58be1c15a5ce91396b3205fb982ee7a656.tar.gz
postgresql-db2dcf58be1c15a5ce91396b3205fb982ee7a656.zip
Make some marginal performance improvements in reportErrorPosition(),
which turns out to be a dominant part of the runtime in scenarios involving lots of parse-time warnings (such as Stephen Frost's example of an INSERT with a lot of backslash-containing strings). There's not a whole lot we can do about the character-at-a-time scanning, but we can at least avoid traversing the query twice.
Diffstat (limited to 'src')
-rw-r--r--src/interfaces/libpq/fe-protocol3.c152
1 files changed, 86 insertions, 66 deletions
diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c
index a6abb062858..1feae23a437 100644
--- a/src/interfaces/libpq/fe-protocol3.c
+++ b/src/interfaces/libpq/fe-protocol3.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/interfaces/libpq/fe-protocol3.c,v 1.27 2006/08/18 19:52:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/interfaces/libpq/fe-protocol3.c,v 1.28 2006/10/01 22:25:48 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -883,10 +883,9 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
#define MIN_RIGHT_CUT 10 /* try to keep this far away from EOL */
char *wquery;
- int clen,
- slen,
+ int slen,
+ cno,
i,
- w,
*qidx,
*scridx,
qoffset,
@@ -894,9 +893,15 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
ibeg,
iend,
loc_line;
- bool beg_trunc,
+ bool mb_encoding,
+ beg_trunc,
end_trunc;
+ /* Convert loc from 1-based to 0-based; no-op if out of range */
+ loc--;
+ if (loc < 0)
+ return;
+
/* Need a writable copy of the query */
wquery = strdup(query);
if (wquery == NULL)
@@ -905,13 +910,13 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
/*
* Each character might occupy multiple physical bytes in the string, and
* in some Far Eastern character sets it might take more than one screen
- * column as well. We compute the starting byte offset and starting
+ * column as well. We compute the starting byte offset and starting
* screen column of each logical character, and store these in qidx[] and
* scridx[] respectively.
*/
/* we need a safe allocation size... */
- slen = strlen(query) + 1;
+ slen = strlen(wquery) + 1;
qidx = (int *) malloc(slen * sizeof(int));
if (qidx == NULL)
@@ -927,79 +932,93 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
return;
}
+ /* We can optimize a bit if it's a single-byte encoding */
+ mb_encoding = (pg_encoding_max_length(encoding) != 1);
+
+ /*
+ * Within the scanning loop, cno is the current character's logical number,
+ * qoffset is its offset in wquery, and scroffset is its starting logical
+ * screen column (all indexed from 0). "loc" is the logical character
+ * number of the error location. We scan to determine loc_line (the
+ * 1-based line number containing loc) and ibeg/iend (first character
+ * number and last+1 character number of the line containing loc).
+ * Note that qidx[] and scridx[] are filled only as far as iend.
+ */
qoffset = 0;
scroffset = 0;
- for (i = 0; query[qoffset] != '\0'; i++)
- {
- qidx[i] = qoffset;
- scridx[i] = scroffset;
- w = pg_encoding_dsplen(encoding, &query[qoffset]);
- /* treat control chars as width 1; see tab hack below */
- if (w <= 0)
- w = 1;
- scroffset += w;
- qoffset += pg_encoding_mblen(encoding, &query[qoffset]);
- }
- qidx[i] = qoffset;
- scridx[i] = scroffset;
- clen = i;
+ loc_line = 1;
+ ibeg = 0;
+ iend = -1; /* -1 means not set yet */
- /* convert loc to zero-based offset in qidx/scridx arrays */
- loc--;
-
- /* do we have something to show? */
- if (loc >= 0 && loc <= clen)
+ for (cno = 0; wquery[qoffset] != '\0'; cno++)
{
- /* input line number of our syntax error. */
- loc_line = 1;
- /* first included char of extract. */
- ibeg = 0;
- /* last-plus-1 included char of extract. */
- iend = clen;
+ char ch = wquery[qoffset];
+
+ qidx[cno] = qoffset;
+ scridx[cno] = scroffset;
/*
* Replace tabs with spaces in the writable copy. (Later we might
* want to think about coping with their variable screen width, but
* not today.)
- *
- * Extract line number and begin and end indexes of line containing
- * error location. There will not be any newlines or carriage returns
- * in the selected extract.
*/
- for (i = 0; i < clen; i++)
+ if (ch == '\t')
+ wquery[qoffset] = ' ';
+
+ /*
+ * If end-of-line, count lines and mark positions. Each \r or \n counts
+ * as a line except when \r \n appear together.
+ */
+ else if (ch == '\r' || ch == '\n')
{
- /* character length must be 1 or it's not ASCII */
- if ((qidx[i + 1] - qidx[i]) == 1)
+ if (cno < loc)
{
- if (wquery[qidx[i]] == '\t')
- wquery[qidx[i]] = ' ';
- else if (wquery[qidx[i]] == '\r' || wquery[qidx[i]] == '\n')
- {
- if (i < loc)
- {
- /*
- * count lines before loc. Each \r or \n counts
- * as a line except when \r \n appear together.
- */
- if (wquery[qidx[i]] == '\r' ||
- i == 0 ||
- (qidx[i] - qidx[i - 1]) != 1 ||
- wquery[qidx[i - 1]] != '\r')
- loc_line++;
- /* extract beginning = last line start before loc. */
- ibeg = i + 1;
- }
- else
- {
- /* set extract end. */
- iend = i;
- /* done scanning. */
- break;
- }
- }
+ if (ch == '\r' ||
+ cno == 0 ||
+ wquery[qidx[cno - 1]] != '\r')
+ loc_line++;
+ /* extract beginning = last line start before loc. */
+ ibeg = cno + 1;
+ }
+ else
+ {
+ /* set extract end. */
+ iend = cno;
+ /* done scanning. */
+ break;
}
}
+ /* Advance */
+ if (mb_encoding)
+ {
+ int w;
+
+ w = pg_encoding_dsplen(encoding, &wquery[qoffset]);
+ /* treat any non-tab control chars as width 1 */
+ if (w <= 0)
+ w = 1;
+ scroffset += w;
+ qoffset += pg_encoding_mblen(encoding, &wquery[qoffset]);
+ }
+ else
+ {
+ /* We assume wide chars only exist in multibyte encodings */
+ scroffset++;
+ qoffset++;
+ }
+ }
+ /* Fix up if we didn't find an end-of-line after loc */
+ if (iend < 0)
+ {
+ iend = cno; /* query length in chars, +1 */
+ qidx[iend] = qoffset;
+ scridx[iend] = scroffset;
+ }
+
+ /* Print only if loc is within computed query length */
+ if (loc <= cno)
+ {
/* If the line extracted is too long, we truncate it. */
beg_trunc = false;
end_trunc = false;
@@ -1050,7 +1069,8 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
scroffset = 0;
for (; i < msg->len; i += pg_encoding_mblen(encoding, &msg->data[i]))
{
- w = pg_encoding_dsplen(encoding, &msg->data[i]);
+ int w = pg_encoding_dsplen(encoding, &msg->data[i]);
+
if (w <= 0)
w = 1;
scroffset += w;