aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/heap/hio.c
blob: 457e1174a301244760f4b4d957fb223ee5f0123b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/*-------------------------------------------------------------------------
 *
 * hio.c--
 *    POSTGRES heap access method input/output code.
 *
 * Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *    $Id: hio.c,v 1.1.1.1 1996/07/09 06:21:11 scrappy Exp $
 *
 *-------------------------------------------------------------------------
 */
#include <string.h>

#include "c.h"

#include "access/heapam.h"
#include "access/hio.h"
#include "access/htup.h"

#include "storage/block.h"
#include "storage/buf.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/itemid.h"
#include "storage/itemptr.h"
#include "storage/off.h"

#include "utils/memutils.h"
#include "utils/elog.h"
#include "utils/rel.h"

/*
 * amputunique	- place tuple at tid
 *   Currently on errors, calls elog.  Perhaps should return -1?
 *   Possible errors include the addition of a tuple to the page
 *   between the time the linep is chosen and the page is L_UP'd.
 *
 *   This should be coordinated with the B-tree code.
 *   Probably needs to have an amdelunique to allow for
 *   internal index records to be deleted and reordered as needed.
 *   For the heap AM, this should never be needed.
 */
void
RelationPutHeapTuple(Relation relation,
		     BlockNumber blockIndex,
		     HeapTuple tuple)
{
    Buffer		buffer;
    Page		pageHeader;
    BlockNumber		numberOfBlocks;
    OffsetNumber	offnum;
    unsigned int	len;
    ItemId		itemId;
    Item		item;
    
    /* ----------------
     *	increment access statistics
     * ----------------
     */
    IncrHeapAccessStat(local_RelationPutHeapTuple);
    IncrHeapAccessStat(global_RelationPutHeapTuple);
    
    Assert(RelationIsValid(relation));
    Assert(HeapTupleIsValid(tuple));
    
    numberOfBlocks = RelationGetNumberOfBlocks(relation);
    Assert(blockIndex < numberOfBlocks);
    
    buffer = ReadBuffer(relation, blockIndex);
#ifndef NO_BUFFERISVALID
    if (!BufferIsValid(buffer)) {
	elog(WARN, "RelationPutHeapTuple: no buffer for %ld in %s",
	     blockIndex, &relation->rd_rel->relname);
    }
#endif
    
    pageHeader = (Page)BufferGetPage(buffer);
    len = (unsigned)DOUBLEALIGN(tuple->t_len);	/* be conservative */
    Assert((int)len <= PageGetFreeSpace(pageHeader));
    
    offnum = PageAddItem((Page)pageHeader, (Item)tuple,
			 tuple->t_len, InvalidOffsetNumber, LP_USED);
    
    itemId = PageGetItemId((Page)pageHeader, offnum);
    item = PageGetItem((Page)pageHeader, itemId);
    
    ItemPointerSet(&((HeapTuple)item)->t_ctid, blockIndex, offnum);
    
    WriteBuffer(buffer);
    /* return an accurate tuple */
    ItemPointerSet(&tuple->t_ctid, blockIndex, offnum);
}

/*
 * The heap_insert routines "know" that a buffer page is initialized to
 * zero when a BlockExtend operation is performed. 
 */

#define PageIsNew(page) ((page)->pd_upper == 0)

/*
 * This routine is another in the series of attempts to reduce the number
 * of I/O's and system calls executed in the various benchmarks.  In
 * particular, this routine is used to append data to the end of a relation
 * file without excessive lseeks.  This code should do no more than 2 semops
 * in the ideal case.
 *
 * Eventually, we should cache the number of blocks in a relation somewhere.
 * Until that time, this code will have to do an lseek to determine the number
 * of blocks in a relation.
 * 
 * This code should ideally do at most 4 semops, 1 lseek, and possibly 1 write
 * to do an append; it's possible to eliminate 2 of the semops if we do direct
 * buffer stuff (!); the lseek and the write can go if we get
 * RelationGetNumberOfBlocks to be useful.
 *
 * NOTE: This code presumes that we have a write lock on the relation.
 *
 * Also note that this routine probably shouldn't have to exist, and does
 * screw up the call graph rather badly, but we are wasting so much time and
 * system resources being massively general that we are losing badly in our
 * performance benchmarks.
 */
void
RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
{
    Buffer		buffer;
    Page		pageHeader;
    BlockNumber		lastblock;
    OffsetNumber	offnum;
    unsigned int	len;
    ItemId		itemId;
    Item		item;
    
    Assert(RelationIsValid(relation));
    Assert(HeapTupleIsValid(tuple));
    
    /*
     * XXX This does an lseek - VERY expensive - but at the moment it
     * is the only way to accurately determine how many blocks are in
     * a relation.  A good optimization would be to get this to actually
     * work properly.
     */
    
    lastblock = RelationGetNumberOfBlocks(relation);
    
    if (lastblock == 0)
	{
	    buffer = ReadBuffer(relation, lastblock);
	    pageHeader = (Page)BufferGetPage(buffer);
	    if (PageIsNew((PageHeader) pageHeader))
		{
		    buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
		    pageHeader = (Page)BufferGetPage(buffer);
		    PageInit(pageHeader, BufferGetPageSize(buffer), 0);
		}
	}
    else
	buffer = ReadBuffer(relation, lastblock - 1);
    
    pageHeader = (Page)BufferGetPage(buffer);
    len = (unsigned)DOUBLEALIGN(tuple->t_len);	/* be conservative */
    
    /*
     * Note that this is true if the above returned a bogus page, which
     * it will do for a completely empty relation.
     */
    
    if (len > PageGetFreeSpace(pageHeader))
	{
	    buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
	    pageHeader = (Page)BufferGetPage(buffer);
	    PageInit(pageHeader, BufferGetPageSize(buffer), 0);
	    
	    if (len > PageGetFreeSpace(pageHeader))
		elog(WARN, "Tuple is too big: size %d", len);
	}
    
    offnum = PageAddItem((Page)pageHeader, (Item)tuple,
			 tuple->t_len, InvalidOffsetNumber, LP_USED);
    
    itemId = PageGetItemId((Page)pageHeader, offnum);
    item = PageGetItem((Page)pageHeader, itemId);
    
    lastblock = BufferGetBlockNumber(buffer);
    
    ItemPointerSet(&((HeapTuple)item)->t_ctid, lastblock, offnum);
    
    /* return an accurate tuple */
    ItemPointerSet(&tuple->t_ctid, lastblock, offnum);
    
    WriteBuffer(buffer);
}