aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/bin/psql/command.c8
-rw-r--r--src/bin/psql/copy.c2
-rw-r--r--src/include/port.h1
-rw-r--r--src/port/path.c105
4 files changed, 97 insertions, 19 deletions
diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
index 613583145e2..6c75c8da6da 100644
--- a/src/bin/psql/command.c
+++ b/src/bin/psql/command.c
@@ -1224,7 +1224,7 @@ exec_command_edit(PsqlScanState scan_state, bool active_branch,
expand_tilde(&fname);
if (fname)
{
- canonicalize_path(fname);
+ canonicalize_path_enc(fname, pset.encoding);
/* Always clear buffer if the file isn't modified */
discard_on_quit = true;
}
@@ -2864,7 +2864,7 @@ exec_command_write(PsqlScanState scan_state, bool active_branch,
}
else
{
- canonicalize_path(fname);
+ canonicalize_path_enc(fname, pset.encoding);
fd = fopen(fname, "w");
}
if (!fd)
@@ -4479,7 +4479,7 @@ process_file(char *filename, bool use_relative_path)
}
else if (strcmp(filename, "-") != 0)
{
- canonicalize_path(filename);
+ canonicalize_path_enc(filename, pset.encoding);
/*
* If we were asked to resolve the pathname relative to the location
@@ -4493,7 +4493,7 @@ process_file(char *filename, bool use_relative_path)
strlcpy(relpath, pset.inputfile, sizeof(relpath));
get_parent_directory(relpath);
join_path_components(relpath, relpath, filename);
- canonicalize_path(relpath);
+ canonicalize_path_enc(relpath, pset.encoding);
filename = relpath;
}
diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c
index b13777c6865..92c955b637a 100644
--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@@ -280,7 +280,7 @@ do_copy(const char *args)
/* prepare to read or write the target file */
if (options->file && !options->program)
- canonicalize_path(options->file);
+ canonicalize_path_enc(options->file, pset.encoding);
if (options->from)
{
diff --git a/src/include/port.h b/src/include/port.h
index 4e9e5657872..703cad868ba 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -53,6 +53,7 @@ extern char *first_path_var_separator(const char *pathlist);
extern void join_path_components(char *ret_path,
const char *head, const char *tail);
extern void canonicalize_path(char *path);
+extern void canonicalize_path_enc(char *path, int encoding);
extern void make_native_path(char *filename);
extern void cleanup_path(char *path);
extern bool path_contains_parent_reference(const char *path);
diff --git a/src/port/path.c b/src/port/path.c
index 0788bff51d0..63503409d5a 100644
--- a/src/port/path.c
+++ b/src/port/path.c
@@ -36,6 +36,7 @@
#include <unistd.h>
#endif
+#include "mb/pg_wchar.h"
#include "pg_config_paths.h"
@@ -45,6 +46,10 @@
#define IS_PATH_VAR_SEP(ch) ((ch) == ';')
#endif
+#ifdef WIN32
+static void debackslash_path(char *path, int encoding);
+static int pg_sjis_mblen(const unsigned char *s);
+#endif
static void make_relative_path(char *ret_path, const char *target_path,
const char *bin_path, const char *my_exec_path);
static char *trim_directory(char *path);
@@ -149,10 +154,73 @@ last_dir_separator(const char *filename)
}
+#ifdef WIN32
+
+/*
+ * Convert '\' to '/' within the given path, assuming the path
+ * is in the specified encoding.
+ */
+static void
+debackslash_path(char *path, int encoding)
+{
+ char *p;
+
+ /*
+ * Of the supported encodings, only Shift-JIS has multibyte characters
+ * that can include a byte equal to '\' (0x5C). So rather than implement
+ * a fully encoding-aware conversion, we special-case SJIS. (Invoking the
+ * general encoding-aware logic in wchar.c is impractical here for
+ * assorted reasons.)
+ */
+ if (encoding == PG_SJIS)
+ {
+ for (p = path; *p; p += pg_sjis_mblen((const unsigned char *) p))
+ {
+ if (*p == '\\')
+ *p = '/';
+ }
+ }
+ else
+ {
+ for (p = path; *p; p++)
+ {
+ if (*p == '\\')
+ *p = '/';
+ }
+ }
+}
+
/*
- * make_native_path - on WIN32, change / to \ in the path
+ * SJIS character length
*
- * This effectively undoes canonicalize_path.
+ * This must match the behavior of
+ * pg_encoding_mblen_bounded(PG_SJIS, s)
+ * In particular, unlike the version of pg_sjis_mblen in src/common/wchar.c,
+ * do not allow caller to accidentally step past end-of-string.
+ */
+static int
+pg_sjis_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s >= 0xa1 && *s <= 0xdf)
+ len = 1; /* 1 byte kana? */
+ else if (IS_HIGHBIT_SET(*s) && s[1] != '\0')
+ len = 2; /* kanji? */
+ else
+ len = 1; /* should be ASCII */
+ return len;
+}
+
+#endif /* WIN32 */
+
+
+/*
+ * make_native_path - on WIN32, change '/' to '\' in the path
+ *
+ * This reverses the '\'-to-'/' transformation of debackslash_path.
+ * We need not worry about encodings here, since '/' does not appear
+ * as a byte of a multibyte character in any supported encoding.
*
* This is required because WIN32 COPY is an internal CMD.EXE
* command and doesn't process forward slashes in the same way
@@ -182,13 +250,14 @@ make_native_path(char *filename)
* on Windows. We need them to use filenames without spaces, for which a
* short filename is the safest equivalent, eg:
* C:/Progra~1/
+ *
+ * Presently, this is only used on paths that we can assume are in a
+ * server-safe encoding, so there's no need for an encoding-aware variant.
*/
void
cleanup_path(char *path)
{
#ifdef WIN32
- char *ptr;
-
/*
* GetShortPathName() will fail if the path does not exist, or short names
* are disabled on this file system. In both cases, we just return the
@@ -198,11 +267,8 @@ cleanup_path(char *path)
GetShortPathName(path, path, MAXPGPATH - 1);
/* Replace '\' with '/' */
- for (ptr = path; *ptr; ptr++)
- {
- if (*ptr == '\\')
- *ptr = '/';
- }
+ /* All server-safe encodings are alike here, so just use PG_SQL_ASCII */
+ debackslash_path(path, PG_SQL_ASCII);
#endif
}
@@ -253,6 +319,8 @@ typedef enum
} canonicalize_state;
/*
+ * canonicalize_path()
+ *
* Clean up path by:
* o make Win32 path use Unix slashes
* o remove trailing quote on Win32
@@ -260,10 +328,21 @@ typedef enum
* o remove duplicate (adjacent) separators
* o remove '.' (unless path reduces to only '.')
* o process '..' ourselves, removing it if possible
+ * Modifies path in-place.
+ *
+ * This comes in two variants: encoding-aware and not. The non-aware version
+ * is only safe to use on strings that are in a server-safe encoding.
*/
void
canonicalize_path(char *path)
{
+ /* All server-safe encodings are alike here, so just use PG_SQL_ASCII */
+ canonicalize_path_enc(path, PG_SQL_ASCII);
+}
+
+void
+canonicalize_path_enc(char *path, int encoding)
+{
char *p,
*to_p;
char *spath;
@@ -278,17 +357,15 @@ canonicalize_path(char *path)
/*
* The Windows command processor will accept suitably quoted paths with
* forward slashes, but barfs badly with mixed forward and back slashes.
+ * Hence, start by converting all back slashes to forward slashes.
*/
- for (p = path; *p; p++)
- {
- if (*p == '\\')
- *p = '/';
- }
+ debackslash_path(path, encoding);
/*
* In Win32, if you do: prog.exe "a b" "\c\d\" the system will pass \c\d"
* as argv[2], so trim off trailing quote.
*/
+ p = path + strlen(path);
if (p > path && *(p - 1) == '"')
*(p - 1) = '/';
#endif