aboutsummaryrefslogtreecommitdiff
path: root/src/bin/psql/stringutils.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2002-10-19 00:22:14 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2002-10-19 00:22:14 +0000
commit44dc9c1faa3bc6dfea71dc9dd7dc83bb642a818c (patch)
tree1cbfd2d139a488cd88546f05d4b4bf629d37e0cd /src/bin/psql/stringutils.c
parent4cff161703beb10aab08b614feb9dfffc3860352 (diff)
downloadpostgresql-44dc9c1faa3bc6dfea71dc9dd7dc83bb642a818c.tar.gz
postgresql-44dc9c1faa3bc6dfea71dc9dd7dc83bb642a818c.zip
Fix psql's \copy to accept table names containing schemas, as well as
a column list. Bring its parsing of quoted names and quoted strings somewhat up to speed --- I believe it now handles all non-error cases the same way the backend would, but weird boundary conditions are not necessarily done the same way.
Diffstat (limited to 'src/bin/psql/stringutils.c')
-rw-r--r--src/bin/psql/stringutils.c260
1 files changed, 166 insertions, 94 deletions
diff --git a/src/bin/psql/stringutils.c b/src/bin/psql/stringutils.c
index 8ff58c46430..0401c92718c 100644
--- a/src/bin/psql/stringutils.c
+++ b/src/bin/psql/stringutils.c
@@ -1,45 +1,61 @@
/*
* psql - the PostgreSQL interactive terminal
*
- * Copyright 2000 by PostgreSQL Global Development Group
+ * Copyright 2000-2002 by PostgreSQL Global Development Group
*
- * $Header: /cvsroot/pgsql/src/bin/psql/stringutils.c,v 1.30 2002/08/27 20:16:49 petere Exp $
+ * $Header: /cvsroot/pgsql/src/bin/psql/stringutils.c,v 1.31 2002/10/19 00:22:14 tgl Exp $
*/
#include "postgres_fe.h"
-#include "stringutils.h"
-#include "settings.h"
-#include <ctype.h>
#include <assert.h>
+#include <ctype.h>
#include "libpq-fe.h"
+#include "settings.h"
+#include "stringutils.h"
-
-static void unescape_quotes(char *source, int quote, int escape);
+static void strip_quotes(char *source, char quote, char escape, int encoding);
/*
* Replacement for strtok() (a.k.a. poor man's flex)
*
- * The calling convention is similar to that of strtok.
+ * Splits a string into tokens, returning one token per call, then NULL
+ * when no more tokens exist in the given string.
+ *
+ * The calling convention is similar to that of strtok, but with more
+ * frammishes.
+ *
* s - string to parse, if NULL continue parsing the last string
- * delim - set of characters that delimit tokens (usually whitespace)
- * quote - set of characters that quote stuff, they're not part of the token
- * escape - character than can quote quotes
- * was_quoted - if not NULL, stores the quoting character if any was encountered
- * token_pos - if not NULL, receives a count to the start of the token in the
- * parsed string
+ * whitespace - set of whitespace characters that separate tokens
+ * delim - set of non-whitespace separator characters (or NULL)
+ * quote - set of characters that can quote a token (NULL if none)
+ * escape - character that can quote quotes (0 if none)
+ * del_quotes - if TRUE, strip quotes from the returned token, else return
+ * it exactly as found in the string
+ * encoding - the active character-set encoding
+ *
+ * Characters in 'delim', if any, will be returned as single-character
+ * tokens unless part of a quoted token.
+ *
+ * Double occurences of the quoting character are always taken to represent
+ * a single quote character in the data. If escape isn't 0, then escape
+ * followed by anything (except \0) is a data character too.
*
* Note that the string s is _not_ overwritten in this implementation.
+ *
+ * NB: it's okay to vary delim, quote, and escape from one call to the
+ * next on a single source string, but changing whitespace is a bad idea
+ * since you might lose data.
*/
char *
strtokx(const char *s,
+ const char *whitespace,
const char *delim,
const char *quote,
- int escape,
- char *was_quoted,
- unsigned int *token_pos,
+ char escape,
+ bool del_quotes,
int encoding)
{
static char *storage = NULL;/* store the local copy of the users
@@ -50,23 +66,32 @@ strtokx(const char *s,
/* variously abused variables: */
unsigned int offset;
char *start;
- char *cp = NULL;
+ char *p;
if (s)
{
free(storage);
- storage = strdup(s);
+ /*
+ * We may need extra space to insert delimiter nulls for adjacent
+ * tokens. 2X the space is a gross overestimate, but it's
+ * unlikely that this code will be used on huge strings anyway.
+ */
+ storage = (char *) malloc(2 * strlen(s) + 1);
+ if (!storage)
+ return NULL; /* really "out of memory" */
+ strcpy(storage, s);
string = storage;
}
if (!storage)
return NULL;
- /* skip leading "whitespace" */
- offset = strspn(string, delim);
+ /* skip leading whitespace */
+ offset = strspn(string, whitespace);
+ start = &string[offset];
- /* end of string reached */
- if (string[offset] == '\0')
+ /* end of string reached? */
+ if (*start == '\0')
{
/* technically we don't need to free here, but we're nice */
free(storage);
@@ -75,118 +100,165 @@ strtokx(const char *s,
return NULL;
}
- /* test if quoting character */
- if (quote)
- cp = strchr(quote, string[offset]);
-
- if (cp)
+ /* test if delimiter character */
+ if (delim && strchr(delim, *start))
{
- /* okay, we have a quoting character, now scan for the closer */
- char *p;
+ /*
+ * If not at end of string, we need to insert a null to terminate
+ * the returned token. We can just overwrite the next character
+ * if it happens to be in the whitespace set ... otherwise move over
+ * the rest of the string to make room. (This is why we allocated
+ * extra space above).
+ */
+ p = start + 1;
+ if (*p != '\0')
+ {
+ if (!strchr(whitespace, *p))
+ memmove(p + 1, p, strlen(p) + 1);
+ *p = '\0';
+ string = p + 1;
+ }
+ else
+ {
+ /* at end of string, so no extra work */
+ string = p;
+ }
- start = &string[offset + 1];
+ return start;
+ }
- if (token_pos)
- *token_pos = start - storage;
+ /* test if quoting character */
+ if (quote && strchr(quote, *start))
+ {
+ /* okay, we have a quoted token, now scan for the closer */
+ char thisquote = *start;
- for (p = start;
- *p && (*p != *cp || *(p - 1) == escape);
- p += PQmblen(p, encoding)
- );
+ for (p = start + 1; *p; p += PQmblen(p, encoding))
+ {
+ if (*p == escape && p[1] != '\0')
+ p++; /* process escaped anything */
+ else if (*p == thisquote && p[1] == thisquote)
+ p++; /* process doubled quote */
+ else if (*p == thisquote)
+ {
+ p++; /* skip trailing quote */
+ break;
+ }
+ }
- /* not yet end of string? */
+ /*
+ * If not at end of string, we need to insert a null to terminate
+ * the returned token. See notes above.
+ */
if (*p != '\0')
{
+ if (!strchr(whitespace, *p))
+ memmove(p + 1, p, strlen(p) + 1);
*p = '\0';
string = p + 1;
- if (was_quoted)
- *was_quoted = *cp;
- unescape_quotes(start, *cp, escape);
- return start;
}
else
{
- if (was_quoted)
- *was_quoted = *cp;
+ /* at end of string, so no extra work */
string = p;
-
- unescape_quotes(start, *cp, escape);
- return start;
}
+
+ /* Clean up the token if caller wants that */
+ if (del_quotes)
+ strip_quotes(start, thisquote, escape, encoding);
+
+ return start;
}
- /* otherwise no quoting character. scan till next delimiter */
- start = &string[offset];
+ /*
+ * Otherwise no quoting character. Scan till next whitespace,
+ * delimiter or quote. NB: at this point, *start is known not to be
+ * '\0', whitespace, delim, or quote, so we will consume at least
+ * one character.
+ */
+ offset = strcspn(start, whitespace);
- if (token_pos)
- *token_pos = start - storage;
+ if (delim)
+ {
+ unsigned int offset2 = strcspn(start, delim);
- offset = strcspn(start, delim);
- if (was_quoted)
- *was_quoted = 0;
+ if (offset > offset2)
+ offset = offset2;
+ }
- if (start[offset] != '\0')
+ if (quote)
{
- start[offset] = '\0';
- string = &start[offset] + 1;
+ unsigned int offset2 = strcspn(start, quote);
- return start;
+ if (offset > offset2)
+ offset = offset2;
+ }
+
+ p = start + offset;
+
+ /*
+ * If not at end of string, we need to insert a null to terminate
+ * the returned token. See notes above.
+ */
+ if (*p != '\0')
+ {
+ if (!strchr(whitespace, *p))
+ memmove(p + 1, p, strlen(p) + 1);
+ *p = '\0';
+ string = p + 1;
}
else
{
- string = &start[offset];
- return start;
+ /* at end of string, so no extra work */
+ string = p;
}
-}
-
+ return start;
+}
/*
- * unescape_quotes
+ * strip_quotes
*
- * Resolves escaped quotes. Used by strtokx above.
+ * Remove quotes from the string at *source. Leading and trailing occurrences
+ * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
+ * to single occurrences; if 'escape' is not 0 then 'escape' removes special
+ * significance of next character.
+ *
+ * Note that the source string is overwritten in-place.
*/
static void
-unescape_quotes(char *source, int quote, int escape)
+strip_quotes(char *source, char quote, char escape, int encoding)
{
- char *p;
- char *destination,
- *tmp;
+ char *src;
+ char *dst;
#ifdef USE_ASSERT_CHECKING
assert(source);
+ assert(quote);
#endif
- destination = calloc(1, strlen(source) + 1);
- if (!destination)
- {
- perror("calloc");
- exit(EXIT_FAILURE);
- }
+ src = dst = source;
- tmp = destination;
+ if (*src && *src == quote)
+ src++; /* skip leading quote */
- for (p = source; *p; p++)
+ while (*src)
{
- char c;
-
- if (*p == escape && *(p + 1) && quote == *(p + 1))
- {
- c = *(p + 1);
- p++;
- }
- else
- c = *p;
-
- *tmp = c;
- tmp++;
+ char c = *src;
+ int i;
+
+ if (c == quote && src[1] == '\0')
+ break; /* skip trailing quote */
+ else if (c == quote && src[1] == quote)
+ src++; /* process doubled quote */
+ else if (c == escape && src[1] != '\0')
+ src++; /* process escaped character */
+
+ i = PQmblen(src, encoding);
+ while (i--)
+ *dst++ = *src++;
}
- /* Terminating null character */
- *tmp = '\0';
-
- strcpy(source, destination);
-
- free(destination);
+ *dst = '\0';
}