aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2001-11-29 21:02:41 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2001-11-29 21:02:41 +0000
commit636a939fe54d317099e8574997d96975a54c792b (patch)
treea355959c7fb779906dcf5b569925ae93f1953287
parenta4e8cd306c295e4a959e04d4aa20e53d8c9ae2e6 (diff)
downloadpostgresql-636a939fe54d317099e8574997d96975a54c792b.tar.gz
postgresql-636a939fe54d317099e8574997d96975a54c792b.zip
Fix array_out's failure to backslash backslashes, per bug# 524. Also,
remove brain-dead rule that double quotes are needed if and only if the datatype is pass-by-reference; neither direction of the implication holds water. Instead, examine the actual data string to see if it contains any characters that force us to quote it. Add some documentation about quoting of array values, which was previously explained nowhere AFAICT.
-rw-r--r--doc/src/sgml/array.sgml34
-rw-r--r--src/backend/utils/adt/arrayfuncs.c96
-rw-r--r--src/test/regress/expected/arrays.out38
3 files changed, 109 insertions, 59 deletions
diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml
index 99cfde85f2c..1696d61c257 100644
--- a/doc/src/sgml/array.sgml
+++ b/doc/src/sgml/array.sgml
@@ -1,4 +1,4 @@
-<!-- $Header: /cvsroot/pgsql/doc/src/sgml/array.sgml,v 1.17 2001/11/28 20:49:09 petere Exp $ -->
+<!-- $Header: /cvsroot/pgsql/doc/src/sgml/array.sgml,v 1.18 2001/11/29 21:02:41 tgl Exp $ -->
<chapter id="arrays">
<title>Arrays</title>
@@ -248,4 +248,36 @@ SELECT * FROM sal_emp WHERE pay_by_quarter **= 10000;
</para>
</note>
+ <formalpara>
+ <title>Quoting array elements.</title>
+ <para>
+ As shown above, when writing an array literal value you may write double
+ quotes around any individual array
+ element. You <emphasis>must</> do so if the element value would otherwise
+ confuse the array-value parser. For example, elements containing curly
+ braces, commas, double quotes, backslashes, or white space must be
+ double-quoted. To put a double quote or backslash in an array element
+ value, precede it with a backslash.
+ </para>
+ </formalpara>
+
+ <tip>
+ <para>
+ Remember that what you write in an SQL query will first be interpreted
+ as a string literal, and then as an array. This doubles the number of
+ backslashes you need. For example, to insert a <type>text</> array
+ value containing a backslash and a double quote, you'd need to write
+<programlisting>
+INSERT ... VALUES ('{"\\\\","\\""}');
+</programlisting>
+ The string-literal processor removes one level of backslashes, so that
+ what arrives at the array-value parser looks like <literal>{"\\","\""}</>.
+ In turn, the strings fed to the <type>text</> datatype's input routine
+ become <literal>\</> and <literal>"</> respectively. (If we were working
+ with a datatype whose input routine also treated backslashes specially,
+ <type>bytea</> for example, we might need as many as eight backslashes
+ in the query to get one backslash into the stored array element.)
+ </para>
+ </tip>
+
</chapter>
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index f6687847144..0cfa371c998 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.71 2001/10/25 05:49:43 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.72 2001/11/29 21:02:41 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -346,6 +346,7 @@ ArrayCount(char *str, int *dim, int typdelim)
* If element type is pass-by-ref, the Datums point to palloc'd values.
* *nbytes is set to the amount of data space needed for the array,
* including alignment padding but not including array header overhead.
+ * CAUTION: the contents of "arrayStr" may be modified!
*---------------------------------------------------------------------------
*/
static Datum *
@@ -564,16 +565,13 @@ array_out(PG_FUNCTION_ARGS)
char *p,
*tmp,
*retval,
- **values,
- delim[2];
+ **values;
+ bool *needquotes;
int nitems,
overall_length,
i,
j,
k,
-#ifndef TCL_ARRAYS
- l,
-#endif
indx[MAXDIM];
int ndim,
*dim;
@@ -581,26 +579,29 @@ array_out(PG_FUNCTION_ARGS)
system_cache_lookup(element_type, false, &typlen, &typbyval,
&typdelim, &typelem, &typoutput, &typalign);
fmgr_info(typoutput, &outputproc);
- sprintf(delim, "%c", typdelim);
ndim = ARR_NDIM(v);
dim = ARR_DIMS(v);
nitems = ArrayGetNItems(ndim, dim);
if (nitems == 0)
{
- retval = (char *) palloc(3);
- retval[0] = '{';
- retval[1] = '}';
- retval[2] = '\0';
+ retval = pstrdup("{}");
PG_RETURN_CSTRING(retval);
}
+ /*
+ * Convert all values to string form, count total space needed
+ * (including any overhead such as escaping backslashes),
+ * and detect whether each item needs double quotes.
+ */
+ values = (char **) palloc(nitems * sizeof(char *));
+ needquotes = (bool *) palloc(nitems * sizeof(bool));
p = ARR_DATA_PTR(v);
overall_length = 1; /* [TRH] don't forget to count \0 at end. */
- values = (char **) palloc(nitems * sizeof(char *));
for (i = 0; i < nitems; i++)
{
Datum itemvalue;
+ bool nq;
itemvalue = fetch_att(p, typbyval, typlen);
values[i] = DatumGetCString(FunctionCall3(&outputproc,
@@ -612,20 +613,32 @@ array_out(PG_FUNCTION_ARGS)
else
p += INTALIGN(*(int32 *) p);
- /*
- * For the pair of double quotes
- */
- if (!typbyval)
- overall_length += 2;
-
+ /* count data plus backslashes; detect chars needing quotes */
+ nq = (values[i][0] == '\0'); /* force quotes for empty string */
for (tmp = values[i]; *tmp; tmp++)
{
+ char ch = *tmp;
+
overall_length += 1;
+ if (ch == '"' || ch == '\\')
+ {
+ nq = true;
#ifndef TCL_ARRAYS
- if (*tmp == '"')
overall_length += 1;
#endif
+ }
+ else if (ch == '{' || ch == '}' || ch == typdelim ||
+ isspace((unsigned char) ch))
+ nq = true;
}
+
+ needquotes[i] = nq;
+
+ /* Count the pair of double quotes, if needed */
+ if (nq)
+ overall_length += 2;
+
+ /* and the comma */
overall_length += 1;
}
@@ -634,41 +647,41 @@ array_out(PG_FUNCTION_ARGS)
*/
for (i = j = 0, k = 1; i < ndim; k *= dim[i++], j += k);
- p = (char *) palloc(overall_length + 2 * j);
- retval = p;
+ retval = (char *) palloc(overall_length + 2 * j);
+ p = retval;
+
+#define APPENDSTR(str) (strcpy(p, (str)), p += strlen(p))
+#define APPENDCHAR(ch) (*p++ = (ch), *p = '\0')
- strcpy(p, "{");
+ APPENDCHAR('{');
for (i = 0; i < ndim; indx[i++] = 0);
j = 0;
k = 0;
do
{
for (i = j; i < ndim - 1; i++)
- strcat(p, "{");
+ APPENDCHAR('{');
- /*
- * Surround anything that is not passed by value in double quotes.
- * See above for more details.
- */
- if (!typbyval)
+ if (needquotes[k])
{
- strcat(p, "\"");
+ APPENDCHAR('"');
#ifndef TCL_ARRAYS
- l = strlen(p);
for (tmp = values[k]; *tmp; tmp++)
{
- if (*tmp == '"')
- p[l++] = '\\';
- p[l++] = *tmp;
+ char ch = *tmp;
+
+ if (ch == '"' || ch == '\\')
+ *p++ = '\\';
+ *p++ = ch;
}
- p[l] = '\0';
+ *p = '\0';
#else
- strcat(p, values[k]);
+ APPENDSTR(values[k]);
#endif
- strcat(p, "\"");
+ APPENDCHAR('"');
}
else
- strcat(p, values[k]);
+ APPENDSTR(values[k]);
pfree(values[k++]);
for (i = ndim - 1; i >= 0; i--)
@@ -676,16 +689,21 @@ array_out(PG_FUNCTION_ARGS)
indx[i] = (indx[i] + 1) % dim[i];
if (indx[i])
{
- strcat(p, delim);
+ APPENDCHAR(typdelim);
break;
}
else
- strcat(p, "}");
+ APPENDCHAR('}');
}
j = i;
} while (j != -1);
+#undef APPENDSTR
+#undef APPENDCHAR
+
pfree(values);
+ pfree(needquotes);
+
PG_RETURN_CSTRING(retval);
}
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 74c1008d907..f729fe1ed43 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -28,11 +28,11 @@ INSERT INTO arrtest (a, b[2][2][1], c, d, e, f, g)
INSERT INTO arrtest (a, b[1][2][2], c, d[2][1])
VALUES ('{}', '{3,4}', '{foo,bar}', '{bar,foo}');
SELECT * FROM arrtest;
- a | b | c | d | e | f | g
--------------+-----------------+---------------+-------------------+---------------+-------------------+-----------------
- {1,2,3,4,5} | {{{0,0},{1,2}}} | {} | {} | | {} | {}
- {11,12,23} | {{3,4},{4,5}} | {"foobar"} | {{"elt1","elt2"}} | {"3.4","6.7"} | {"abc ","abcde"} | {"abc","abcde"}
- {} | {3,4} | {"foo","bar"} | {"bar","foo"} | | |
+ a | b | c | d | e | f | g
+-------------+-----------------+-----------+---------------+-----------+-----------------+-------------
+ {1,2,3,4,5} | {{{0,0},{1,2}}} | {} | {} | | {} | {}
+ {11,12,23} | {{3,4},{4,5}} | {foobar} | {{elt1,elt2}} | {3.4,6.7} | {"abc ",abcde} | {abc,abcde}
+ {} | {3,4} | {foo,bar} | {bar,foo} | | |
(3 rows)
SELECT arrtest.a[1],
@@ -62,11 +62,11 @@ SELECT a[1:3],
c[1:2],
d[1:1][1:2]
FROM arrtest;
- a | b | c | d
-------------+-----------------+---------------+-------------------
- {1,2,3} | {{{0,0},{1,2}}} | |
- {11,12,23} | | {"foobar"} | {{"elt1","elt2"}}
- | | {"foo","bar"} |
+ a | b | c | d
+------------+-----------------+-----------+---------------
+ {1,2,3} | {{{0,0},{1,2}}} | |
+ {11,12,23} | | {foobar} | {{elt1,elt2}}
+ | | {foo,bar} |
(3 rows)
SELECT array_dims(a) AS a,array_dims(b) AS b,array_dims(c) AS c
@@ -98,11 +98,11 @@ UPDATE arrtest
SET c[2:2] = '{"new_word"}'
WHERE array_dims(c) is not null;
SELECT a,b,c FROM arrtest;
- a | b | c
----------------+-----------------------+-----------------------
+ a | b | c
+---------------+-----------------------+-------------------
{16,25,3,4,5} | {{{113,142},{1,147}}} | {}
- {} | {3,4} | {"foo","new_word"}
- {16,25,23} | {{3,4},{4,5}} | {"foobar","new_word"}
+ {} | {3,4} | {foo,new_word}
+ {16,25,23} | {{3,4},{4,5}} | {foobar,new_word}
(3 rows)
SELECT a[1:3],
@@ -110,10 +110,10 @@ SELECT a[1:3],
c[1:2],
d[1:1][2:2]
FROM arrtest;
- a | b | c | d
-------------+-----------------------+-----------------------+------------
- {16,25,3} | {{{113,142},{1,147}}} | |
- | | {"foo","new_word"} |
- {16,25,23} | | {"foobar","new_word"} | {{"elt2"}}
+ a | b | c | d
+------------+-----------------------+-------------------+----------
+ {16,25,3} | {{{113,142},{1,147}}} | |
+ | | {foo,new_word} |
+ {16,25,23} | | {foobar,new_word} | {{elt2}}
(3 rows)