diff options
author | Jameson Nash <vtjnash@gmail.com> | 2018-02-13 16:05:45 -0500 |
---|---|---|
committer | Santiago Gimeno <santiago.gimeno@gmail.com> | 2018-09-28 21:47:07 +0200 |
commit | 60abdbaed6c20a41678cd9af1d6f40a1d11bf5e2 (patch) | |
tree | 9077d8a98c049e83a07395b129202998f325f7c0 | |
parent | 19a341919546140c37834488e1f7c46721b0d2d5 (diff) | |
download | libuv-60abdbaed6c20a41678cd9af1d6f40a1d11bf5e2.tar.gz libuv-60abdbaed6c20a41678cd9af1d6f40a1d11bf5e2.zip |
unix,readv: always permit partial reads to return
For simplicity and predictability (since the user must handle the retry
anyways), always emit exactly one readv/pread/preadv syscall and return
that result to the user.
By contrast, write needs to preserve order, so it needs to keep retrying
the operation until it finishes before retiring the req from the queue.
Fixes: https://github.com/nodejs/node/issues/16601
PR-URL: https://github.com/libuv/libuv/pull/1742
Refs: https://github.com/libuv/libuv/pull/640
Refs: https://github.com/libuv/libuv/issues/1720
Reviewed-By: Santiago Gimeno <santiago.gimeno@gmail.com>
-rw-r--r-- | src/unix/fs.c | 35 | ||||
-rw-r--r-- | test/test-fs.c | 201 |
2 files changed, 182 insertions, 54 deletions
diff --git a/src/unix/fs.c b/src/unix/fs.c index 8d773aa1..35bcc6f6 100644 --- a/src/unix/fs.c +++ b/src/unix/fs.c @@ -262,17 +262,25 @@ static ssize_t uv__fs_read(uv_fs_t* req) { #if defined(__linux__) static int no_preadv; #endif + unsigned int iovmax; ssize_t result; #if defined(_AIX) struct stat buf; - if(fstat(req->file, &buf)) - return -1; - if(S_ISDIR(buf.st_mode)) { + result = fstat(req->file, &buf); + if (result) + goto done; + if (S_ISDIR(buf.st_mode)) { errno = EISDIR; - return -1; + result -1; + goto done; } #endif /* defined(_AIX) */ + + iovmax = uv__getiovmax(); + if (req->nbufs > iovmax) + req->nbufs = iovmax; + if (req->off < 0) { if (req->nbufs == 1) result = read(req->file, req->bufs[0].base, req->bufs[0].len); @@ -309,6 +317,13 @@ static ssize_t uv__fs_read(uv_fs_t* req) { } done: + /* Early cleanup of bufs allocation, since we're done with it. */ + if (req->bufs != req->bufsml) + uv__free(req->bufs); + + req->bufs = NULL; + req->nbufs = 0; + return result; } @@ -1023,8 +1038,7 @@ static size_t uv__fs_buf_offset(uv_buf_t* bufs, size_t size) { return offset; } -typedef ssize_t (*uv__fs_buf_iter_processor)(uv_fs_t* req); -static ssize_t uv__fs_buf_iter(uv_fs_t* req, uv__fs_buf_iter_processor process) { +static ssize_t uv__fs_write_all(uv_fs_t* req) { unsigned int iovmax; unsigned int nbufs; uv_buf_t* bufs; @@ -1042,7 +1056,7 @@ static ssize_t uv__fs_buf_iter(uv_fs_t* req, uv__fs_buf_iter_processor process) req->nbufs = iovmax; do - result = process(req); + result = uv__fs_write(req); while (result < 0 && errno == EINTR); if (result <= 0) { @@ -1076,7 +1090,8 @@ static void uv__fs_work(struct uv__work* w) { ssize_t r; req = container_of(w, uv_fs_t, work_req); - retry_on_eintr = !(req->fs_type == UV_FS_CLOSE); + retry_on_eintr = !(req->fs_type == UV_FS_CLOSE || + req->fs_type == UV_FS_READ); do { errno = 0; @@ -1105,7 +1120,7 @@ static void uv__fs_work(struct uv__work* w) { X(MKDIR, mkdir(req->path, req->mode)); X(MKDTEMP, uv__fs_mkdtemp(req)); X(OPEN, uv__fs_open(req)); - X(READ, uv__fs_buf_iter(req, uv__fs_read)); + X(READ, uv__fs_read(req)); X(SCANDIR, uv__fs_scandir(req)); X(READLINK, uv__fs_readlink(req)); X(REALPATH, uv__fs_realpath(req)); @@ -1116,7 +1131,7 @@ static void uv__fs_work(struct uv__work* w) { X(SYMLINK, symlink(req->path, req->new_path)); X(UNLINK, unlink(req->path)); X(UTIME, uv__fs_utime(req)); - X(WRITE, uv__fs_buf_iter(req, uv__fs_write)); + X(WRITE, uv__fs_write_all(req)); default: abort(); } #undef X diff --git a/test/test-fs.c b/test/test-fs.c index 6ed0d87b..b4df5e3d 100644 --- a/test/test-fs.c +++ b/test/test-fs.c @@ -26,6 +26,7 @@ #include <string.h> /* memset */ #include <fcntl.h> #include <sys/stat.h> +#include <limits.h> /* INT_MAX, PATH_MAX, IOV_MAX */ /* FIXME we shouldn't need to branch in this file */ #if defined(__unix__) || defined(__POSIX__) || \ @@ -121,6 +122,31 @@ static char test_buf2[] = "second-buffer\n"; static uv_buf_t iov; #ifdef _WIN32 +int uv_test_getiovmax(void) { + return INT32_MAX; /* Emulated by libuv, so no real limit. */ +} +#else +int uv_test_getiovmax(void) { +#if defined(IOV_MAX) + return IOV_MAX; +#elif defined(_SC_IOV_MAX) + static int iovmax = -1; + if (iovmax == -1) { + iovmax = sysconf(_SC_IOV_MAX); + /* On some embedded devices (arm-linux-uclibc based ip camera), + * sysconf(_SC_IOV_MAX) can not get the correct value. The return + * value is -1 and the errno is EINPROGRESS. Degrade the value to 1. + */ + if (iovmax == -1) iovmax = 1; + } + return iovmax; +#else + return 1024; +#endif +} +#endif + +#ifdef _WIN32 /* * This tag and guid have no special meaning, and don't conflict with * reserved ids. @@ -2755,16 +2781,41 @@ TEST_IMPL(fs_write_multiple_bufs) { /* Read the strings back to separate buffers. */ iovs[0] = uv_buf_init(buf, sizeof(test_buf)); iovs[1] = uv_buf_init(buf2, sizeof(test_buf2)); + ASSERT(lseek(open_req1.result, 0, SEEK_CUR) == 0); + r = uv_fs_read(NULL, &read_req, open_req1.result, iovs, 2, -1, NULL); + ASSERT(r >= 0); + ASSERT(read_req.result == sizeof(test_buf) + sizeof(test_buf2)); + ASSERT(strcmp(buf, test_buf) == 0); + ASSERT(strcmp(buf2, test_buf2) == 0); + uv_fs_req_cleanup(&read_req); + + iov = uv_buf_init(buf, sizeof(buf)); + r = uv_fs_read(NULL, &read_req, open_req1.result, &iov, 1, -1, NULL); + ASSERT(r == 0); + ASSERT(read_req.result == 0); + uv_fs_req_cleanup(&read_req); + + /* Read the strings back to separate buffers. */ + iovs[0] = uv_buf_init(buf, sizeof(test_buf)); + iovs[1] = uv_buf_init(buf2, sizeof(test_buf2)); r = uv_fs_read(NULL, &read_req, open_req1.result, iovs, 2, 0, NULL); ASSERT(r >= 0); - ASSERT(read_req.result >= 0); + if (read_req.result == sizeof(test_buf)) { + /* Infer that preadv is not available. */ + uv_fs_req_cleanup(&read_req); + r = uv_fs_read(NULL, &read_req, open_req1.result, &iovs[1], 1, read_req.result, NULL); + ASSERT(r >= 0); + ASSERT(read_req.result == sizeof(test_buf2)); + } else { + ASSERT(read_req.result == sizeof(test_buf) + sizeof(test_buf2)); + } ASSERT(strcmp(buf, test_buf) == 0); ASSERT(strcmp(buf2, test_buf2) == 0); uv_fs_req_cleanup(&read_req); iov = uv_buf_init(buf, sizeof(buf)); r = uv_fs_read(NULL, &read_req, open_req1.result, &iov, 1, - read_req.result, NULL); + sizeof(test_buf) + sizeof(test_buf2), NULL); ASSERT(r == 0); ASSERT(read_req.result == 0); uv_fs_req_cleanup(&read_req); @@ -2783,12 +2834,15 @@ TEST_IMPL(fs_write_multiple_bufs) { TEST_IMPL(fs_write_alotof_bufs) { - const size_t iovcount = 54321; + size_t iovcount; + size_t iovmax; uv_buf_t* iovs; char* buffer; size_t index; int r; + iovcount = 54321; + /* Setup. */ unlink("test_file"); @@ -2796,6 +2850,7 @@ TEST_IMPL(fs_write_alotof_bufs) { iovs = malloc(sizeof(*iovs) * iovcount); ASSERT(iovs != NULL); + iovmax = uv_test_getiovmax(); r = uv_fs_open(NULL, &open_req1, @@ -2829,7 +2884,10 @@ TEST_IMPL(fs_write_alotof_bufs) { iovs[index] = uv_buf_init(buffer + index * sizeof(test_buf), sizeof(test_buf)); - r = uv_fs_read(NULL, &read_req, open_req1.result, iovs, iovcount, 0, NULL); + ASSERT(lseek(open_req1.result, 0, SEEK_SET) == 0); + r = uv_fs_read(NULL, &read_req, open_req1.result, iovs, iovcount, -1, NULL); + if (iovcount > iovmax) + iovcount = iovmax; ASSERT(r >= 0); ASSERT((size_t)read_req.result == sizeof(test_buf) * iovcount); @@ -2841,13 +2899,14 @@ TEST_IMPL(fs_write_alotof_bufs) { uv_fs_req_cleanup(&read_req); free(buffer); + ASSERT(lseek(open_req1.result, write_req.result, SEEK_SET) == write_req.result); iov = uv_buf_init(buf, sizeof(buf)); r = uv_fs_read(NULL, &read_req, open_req1.result, &iov, 1, - read_req.result, + -1, NULL); ASSERT(r == 0); ASSERT(read_req.result == 0); @@ -2868,14 +2927,19 @@ TEST_IMPL(fs_write_alotof_bufs) { TEST_IMPL(fs_write_alotof_bufs_with_offset) { - const size_t iovcount = 54321; + size_t iovcount; + size_t iovmax; uv_buf_t* iovs; char* buffer; size_t index; int r; int64_t offset; - char* filler = "0123456789"; - int filler_len = strlen(filler); + char* filler; + int filler_len; + + filler = "0123456789"; + filler_len = strlen(filler); + iovcount = 54321; /* Setup. */ unlink("test_file"); @@ -2884,6 +2948,7 @@ TEST_IMPL(fs_write_alotof_bufs_with_offset) { iovs = malloc(sizeof(*iovs) * iovcount); ASSERT(iovs != NULL); + iovmax = uv_test_getiovmax(); r = uv_fs_open(NULL, &open_req1, @@ -2927,6 +2992,10 @@ TEST_IMPL(fs_write_alotof_bufs_with_offset) { r = uv_fs_read(NULL, &read_req, open_req1.result, iovs, iovcount, offset, NULL); ASSERT(r >= 0); + if (r == sizeof(test_buf)) + iovcount = 1; /* Infer that preadv is not available. */ + else if (iovcount > iovmax) + iovcount = iovmax; ASSERT((size_t)read_req.result == sizeof(test_buf) * iovcount); for (index = 0; index < iovcount; ++index) @@ -2940,7 +3009,7 @@ TEST_IMPL(fs_write_alotof_bufs_with_offset) { r = uv_fs_stat(NULL, &stat_req, "test_file", NULL); ASSERT(r == 0); ASSERT((int64_t)((uv_stat_t*)stat_req.ptr)->st_size == - offset + (int64_t)(iovcount * sizeof(test_buf))); + offset + (int64_t)write_req.result); uv_fs_req_cleanup(&stat_req); iov = uv_buf_init(buf, sizeof(buf)); @@ -2949,7 +3018,7 @@ TEST_IMPL(fs_write_alotof_bufs_with_offset) { open_req1.result, &iov, 1, - read_req.result + offset, + offset + write_req.result, NULL); ASSERT(r == 0); ASSERT(read_req.result == 0); @@ -2981,48 +3050,64 @@ TEST_IMPL(fs_partial_write) { #else /* !_WIN32 */ -static void thread_exec(int fd, char* data, int size, int interval, int doread) { - pid_t pid; - ssize_t result; - - pid = getpid(); - result = 1; - - while (size > 0 && result > 0) { - do { - if (doread) - result = write(fd, data, size < interval ? size : interval); - else - result = read(fd, data, size < interval ? size : interval); - } while (result == -1 && errno == EINTR); - - kill(pid, SIGUSR1); - size -= result; - data += result; - } - - ASSERT(size == 0); - ASSERT(result > 0); -} - struct thread_ctx { + pthread_t pid; int fd; - char *data; + char* data; int size; int interval; int doread; }; static void thread_main(void* arg) { - struct thread_ctx *ctx; + const struct thread_ctx* ctx; + int size; + char* data; + ctx = (struct thread_ctx*)arg; - thread_exec(ctx->fd, ctx->data, ctx->size, ctx->interval, ctx->doread); + size = ctx->size; + data = ctx->data; + + while (size > 0) { + ssize_t result; + int nbytes; + nbytes = size < ctx->interval ? size : ctx->interval; + if (ctx->doread) { + result = write(ctx->fd, data, nbytes); + /* Should not see EINTR (or other errors) */ + ASSERT(result == nbytes); + } else { + result = read(ctx->fd, data, nbytes); + /* Should not see EINTR (or other errors), + * but might get a partial read if we are faster than the writer + */ + ASSERT(result > 0 && result <= nbytes); + } + + pthread_kill(ctx->pid, SIGUSR1); + size -= result; + data += result; + } } static void sig_func(uv_signal_t* handle, int signum) { uv_signal_stop(handle); } +static size_t uv_test_fs_buf_offset(uv_buf_t* bufs, size_t size) { + size_t offset; + /* Figure out which bufs are done */ + for (offset = 0; size > 0 && bufs[offset].len <= size; ++offset) + size -= bufs[offset].len; + + /* Fix a partial read/write */ + if (size > 0) { + bufs[offset].base += size; + bufs[offset].len -= size; + } + return offset; +} + static void test_fs_partial(int doread) { struct thread_ctx ctx; uv_thread_t thread; @@ -3032,13 +3117,13 @@ static void test_fs_partial(int doread) { uv_buf_t* iovs; char* buffer; size_t index; - int result; iovcount = 54321; iovs = malloc(sizeof(*iovs) * iovcount); ASSERT(iovs != NULL); + ctx.pid = pthread_self(); ctx.doread = doread; ctx.interval = 1000; ctx.size = sizeof(test_buf) * iovcount; @@ -3060,21 +3145,49 @@ static void test_fs_partial(int doread) { ctx.fd = pipe_fds[doread]; ASSERT(0 == uv_thread_create(&thread, thread_main, &ctx)); - if (doread) - result = uv_fs_read(loop, &read_req, pipe_fds[0], iovs, iovcount, -1, NULL); - else + if (doread) { + uv_buf_t* read_iovs; + int nread; + read_iovs = iovs; + nread = 0; + while (nread < ctx.size) { + int result; + result = uv_fs_read(loop, &read_req, pipe_fds[0], read_iovs, iovcount, -1, NULL); + if (result > 0) { + size_t read_iovcount; + read_iovcount = uv_test_fs_buf_offset(read_iovs, result); + read_iovs += read_iovcount; + iovcount -= read_iovcount; + nread += result; + } else { + ASSERT(result == UV_EINTR); + } + uv_fs_req_cleanup(&read_req); + } + } else { + int result; result = uv_fs_write(loop, &write_req, pipe_fds[1], iovs, iovcount, -1, NULL); + ASSERT(write_req.result == result); + ASSERT(result == ctx.size); + uv_fs_req_cleanup(&write_req); + } - ASSERT(result == ctx.size); - ASSERT(0 == memcmp(buffer, ctx.data, result)); + ASSERT(0 == memcmp(buffer, ctx.data, ctx.size)); ASSERT(0 == uv_thread_join(&thread)); ASSERT(0 == uv_run(loop, UV_RUN_DEFAULT)); - ASSERT(0 == close(pipe_fds[0])); ASSERT(0 == close(pipe_fds[1])); uv_close((uv_handle_t*) &signal, NULL); + { /* Make sure we read everything that we wrote. */ + int result; + result = uv_fs_read(loop, &read_req, pipe_fds[0], iovs, 1, -1, NULL); + ASSERT(result == 0); + uv_fs_req_cleanup(&read_req); + } + ASSERT(0 == close(pipe_fds[0])); + free(iovs); free(buffer); free(ctx.data); |