diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2002-10-19 00:22:14 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2002-10-19 00:22:14 +0000 |
commit | 44dc9c1faa3bc6dfea71dc9dd7dc83bb642a818c (patch) | |
tree | 1cbfd2d139a488cd88546f05d4b4bf629d37e0cd /src/bin/psql/stringutils.c | |
parent | 4cff161703beb10aab08b614feb9dfffc3860352 (diff) | |
download | postgresql-44dc9c1faa3bc6dfea71dc9dd7dc83bb642a818c.tar.gz postgresql-44dc9c1faa3bc6dfea71dc9dd7dc83bb642a818c.zip |
Fix psql's \copy to accept table names containing schemas, as well as
a column list. Bring its parsing of quoted names and quoted strings
somewhat up to speed --- I believe it now handles all non-error cases
the same way the backend would, but weird boundary conditions are not
necessarily done the same way.
Diffstat (limited to 'src/bin/psql/stringutils.c')
-rw-r--r-- | src/bin/psql/stringutils.c | 260 |
1 files changed, 166 insertions, 94 deletions
diff --git a/src/bin/psql/stringutils.c b/src/bin/psql/stringutils.c index 8ff58c46430..0401c92718c 100644 --- a/src/bin/psql/stringutils.c +++ b/src/bin/psql/stringutils.c @@ -1,45 +1,61 @@ /* * psql - the PostgreSQL interactive terminal * - * Copyright 2000 by PostgreSQL Global Development Group + * Copyright 2000-2002 by PostgreSQL Global Development Group * - * $Header: /cvsroot/pgsql/src/bin/psql/stringutils.c,v 1.30 2002/08/27 20:16:49 petere Exp $ + * $Header: /cvsroot/pgsql/src/bin/psql/stringutils.c,v 1.31 2002/10/19 00:22:14 tgl Exp $ */ #include "postgres_fe.h" -#include "stringutils.h" -#include "settings.h" -#include <ctype.h> #include <assert.h> +#include <ctype.h> #include "libpq-fe.h" +#include "settings.h" +#include "stringutils.h" - -static void unescape_quotes(char *source, int quote, int escape); +static void strip_quotes(char *source, char quote, char escape, int encoding); /* * Replacement for strtok() (a.k.a. poor man's flex) * - * The calling convention is similar to that of strtok. + * Splits a string into tokens, returning one token per call, then NULL + * when no more tokens exist in the given string. + * + * The calling convention is similar to that of strtok, but with more + * frammishes. + * * s - string to parse, if NULL continue parsing the last string - * delim - set of characters that delimit tokens (usually whitespace) - * quote - set of characters that quote stuff, they're not part of the token - * escape - character than can quote quotes - * was_quoted - if not NULL, stores the quoting character if any was encountered - * token_pos - if not NULL, receives a count to the start of the token in the - * parsed string + * whitespace - set of whitespace characters that separate tokens + * delim - set of non-whitespace separator characters (or NULL) + * quote - set of characters that can quote a token (NULL if none) + * escape - character that can quote quotes (0 if none) + * del_quotes - if TRUE, strip quotes from the returned token, else return + * it exactly as found in the string + * encoding - the active character-set encoding + * + * Characters in 'delim', if any, will be returned as single-character + * tokens unless part of a quoted token. + * + * Double occurences of the quoting character are always taken to represent + * a single quote character in the data. If escape isn't 0, then escape + * followed by anything (except \0) is a data character too. * * Note that the string s is _not_ overwritten in this implementation. + * + * NB: it's okay to vary delim, quote, and escape from one call to the + * next on a single source string, but changing whitespace is a bad idea + * since you might lose data. */ char * strtokx(const char *s, + const char *whitespace, const char *delim, const char *quote, - int escape, - char *was_quoted, - unsigned int *token_pos, + char escape, + bool del_quotes, int encoding) { static char *storage = NULL;/* store the local copy of the users @@ -50,23 +66,32 @@ strtokx(const char *s, /* variously abused variables: */ unsigned int offset; char *start; - char *cp = NULL; + char *p; if (s) { free(storage); - storage = strdup(s); + /* + * We may need extra space to insert delimiter nulls for adjacent + * tokens. 2X the space is a gross overestimate, but it's + * unlikely that this code will be used on huge strings anyway. + */ + storage = (char *) malloc(2 * strlen(s) + 1); + if (!storage) + return NULL; /* really "out of memory" */ + strcpy(storage, s); string = storage; } if (!storage) return NULL; - /* skip leading "whitespace" */ - offset = strspn(string, delim); + /* skip leading whitespace */ + offset = strspn(string, whitespace); + start = &string[offset]; - /* end of string reached */ - if (string[offset] == '\0') + /* end of string reached? */ + if (*start == '\0') { /* technically we don't need to free here, but we're nice */ free(storage); @@ -75,118 +100,165 @@ strtokx(const char *s, return NULL; } - /* test if quoting character */ - if (quote) - cp = strchr(quote, string[offset]); - - if (cp) + /* test if delimiter character */ + if (delim && strchr(delim, *start)) { - /* okay, we have a quoting character, now scan for the closer */ - char *p; + /* + * If not at end of string, we need to insert a null to terminate + * the returned token. We can just overwrite the next character + * if it happens to be in the whitespace set ... otherwise move over + * the rest of the string to make room. (This is why we allocated + * extra space above). + */ + p = start + 1; + if (*p != '\0') + { + if (!strchr(whitespace, *p)) + memmove(p + 1, p, strlen(p) + 1); + *p = '\0'; + string = p + 1; + } + else + { + /* at end of string, so no extra work */ + string = p; + } - start = &string[offset + 1]; + return start; + } - if (token_pos) - *token_pos = start - storage; + /* test if quoting character */ + if (quote && strchr(quote, *start)) + { + /* okay, we have a quoted token, now scan for the closer */ + char thisquote = *start; - for (p = start; - *p && (*p != *cp || *(p - 1) == escape); - p += PQmblen(p, encoding) - ); + for (p = start + 1; *p; p += PQmblen(p, encoding)) + { + if (*p == escape && p[1] != '\0') + p++; /* process escaped anything */ + else if (*p == thisquote && p[1] == thisquote) + p++; /* process doubled quote */ + else if (*p == thisquote) + { + p++; /* skip trailing quote */ + break; + } + } - /* not yet end of string? */ + /* + * If not at end of string, we need to insert a null to terminate + * the returned token. See notes above. + */ if (*p != '\0') { + if (!strchr(whitespace, *p)) + memmove(p + 1, p, strlen(p) + 1); *p = '\0'; string = p + 1; - if (was_quoted) - *was_quoted = *cp; - unescape_quotes(start, *cp, escape); - return start; } else { - if (was_quoted) - *was_quoted = *cp; + /* at end of string, so no extra work */ string = p; - - unescape_quotes(start, *cp, escape); - return start; } + + /* Clean up the token if caller wants that */ + if (del_quotes) + strip_quotes(start, thisquote, escape, encoding); + + return start; } - /* otherwise no quoting character. scan till next delimiter */ - start = &string[offset]; + /* + * Otherwise no quoting character. Scan till next whitespace, + * delimiter or quote. NB: at this point, *start is known not to be + * '\0', whitespace, delim, or quote, so we will consume at least + * one character. + */ + offset = strcspn(start, whitespace); - if (token_pos) - *token_pos = start - storage; + if (delim) + { + unsigned int offset2 = strcspn(start, delim); - offset = strcspn(start, delim); - if (was_quoted) - *was_quoted = 0; + if (offset > offset2) + offset = offset2; + } - if (start[offset] != '\0') + if (quote) { - start[offset] = '\0'; - string = &start[offset] + 1; + unsigned int offset2 = strcspn(start, quote); - return start; + if (offset > offset2) + offset = offset2; + } + + p = start + offset; + + /* + * If not at end of string, we need to insert a null to terminate + * the returned token. See notes above. + */ + if (*p != '\0') + { + if (!strchr(whitespace, *p)) + memmove(p + 1, p, strlen(p) + 1); + *p = '\0'; + string = p + 1; } else { - string = &start[offset]; - return start; + /* at end of string, so no extra work */ + string = p; } -} - + return start; +} /* - * unescape_quotes + * strip_quotes * - * Resolves escaped quotes. Used by strtokx above. + * Remove quotes from the string at *source. Leading and trailing occurrences + * of 'quote' are removed; embedded double occurrences of 'quote' are reduced + * to single occurrences; if 'escape' is not 0 then 'escape' removes special + * significance of next character. + * + * Note that the source string is overwritten in-place. */ static void -unescape_quotes(char *source, int quote, int escape) +strip_quotes(char *source, char quote, char escape, int encoding) { - char *p; - char *destination, - *tmp; + char *src; + char *dst; #ifdef USE_ASSERT_CHECKING assert(source); + assert(quote); #endif - destination = calloc(1, strlen(source) + 1); - if (!destination) - { - perror("calloc"); - exit(EXIT_FAILURE); - } + src = dst = source; - tmp = destination; + if (*src && *src == quote) + src++; /* skip leading quote */ - for (p = source; *p; p++) + while (*src) { - char c; - - if (*p == escape && *(p + 1) && quote == *(p + 1)) - { - c = *(p + 1); - p++; - } - else - c = *p; - - *tmp = c; - tmp++; + char c = *src; + int i; + + if (c == quote && src[1] == '\0') + break; /* skip trailing quote */ + else if (c == quote && src[1] == quote) + src++; /* process doubled quote */ + else if (c == escape && src[1] != '\0') + src++; /* process escaped character */ + + i = PQmblen(src, encoding); + while (i--) + *dst++ = *src++; } - /* Terminating null character */ - *tmp = '\0'; - - strcpy(source, destination); - - free(destination); + *dst = '\0'; } |