1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
/*-------------------------------------------------------------------------
*
* pwrite.c
* Implementation of pwrite[v](2) for platforms that lack one.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/port/pwrite.c
*
* Note that this implementation changes the current file position, unlike
* the POSIX function, so we use the name pg_pwrite(). Likewise for the
* iovec version.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif
#include "port/pg_iovec.h"
#ifndef HAVE_PWRITE
ssize_t
pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
{
#ifdef WIN32
OVERLAPPED overlapped = {0};
HANDLE handle;
DWORD result;
handle = (HANDLE) _get_osfhandle(fd);
if (handle == INVALID_HANDLE_VALUE)
{
errno = EBADF;
return -1;
}
overlapped.Offset = offset;
if (!WriteFile(handle, buf, size, &result, &overlapped))
{
_dosmaperr(GetLastError());
return -1;
}
return result;
#else
if (lseek(fd, offset, SEEK_SET) < 0)
return -1;
return write(fd, buf, size);
#endif
}
#endif
#ifndef HAVE_PWRITEV
ssize_t
pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
#ifdef HAVE_WRITEV
if (iovcnt == 1)
return pg_pwrite(fd, iov[0].iov_base, iov[0].iov_len, offset);
if (lseek(fd, offset, SEEK_SET) < 0)
return -1;
return writev(fd, iov, iovcnt);
#else
ssize_t sum = 0;
ssize_t part;
for (int i = 0; i < iovcnt; ++i)
{
part = pg_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset);
if (part < 0)
{
if (i == 0)
return -1;
else
return sum;
}
sum += part;
offset += part;
if (part < iov[i].iov_len)
return sum;
}
return sum;
#endif
}
#endif
/*
* A convenience wrapper for pg_pwritev() that retries on partial write. If an
* error is returned, it is unspecified how much has been written.
*/
ssize_t
pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
struct iovec iov_copy[PG_IOV_MAX];
ssize_t sum = 0;
ssize_t part;
/* We'd better have space to make a copy, in case we need to retry. */
if (iovcnt > PG_IOV_MAX)
{
errno = EINVAL;
return -1;
}
for (;;)
{
/* Write as much as we can. */
part = pg_pwritev(fd, iov, iovcnt, offset);
if (part < 0)
return -1;
#ifdef SIMULATE_SHORT_WRITE
part = Min(part, 4096);
#endif
/* Count our progress. */
sum += part;
offset += part;
/* Step over iovecs that are done. */
while (iovcnt > 0 && iov->iov_len <= part)
{
part -= iov->iov_len;
++iov;
--iovcnt;
}
/* Are they all done? */
if (iovcnt == 0)
{
if (part > 0)
elog(ERROR, "unexpectedly wrote more than requested");
break;
}
/*
* Move whatever's left to the front of our mutable copy and adjust the
* leading iovec.
*/
Assert(iovcnt > 0);
memmove(iov_copy, iov, sizeof(*iov) * iovcnt);
Assert(iov->iov_len > part);
iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part;
iov_copy[0].iov_len -= part;
iov = iov_copy;
}
return sum;
}
|