diff options
Diffstat (limited to 'src/backend/access/heap')
-rw-r--r-- | src/backend/access/heap/heapam.c | 68 | ||||
-rw-r--r-- | src/backend/access/heap/hio.c | 89 | ||||
-rw-r--r-- | src/backend/access/heap/rewriteheap.c | 6 | ||||
-rw-r--r-- | src/backend/access/heap/tuptoaster.c | 23 |
4 files changed, 131 insertions, 55 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 49bca5b3299..7139b03471e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.268 2008/10/31 19:40:26 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.269 2008/11/06 20:51:14 tgl Exp $ * * * INTERFACE ROUTINES @@ -1800,22 +1800,52 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid) /* + * GetBulkInsertState - prepare status object for a bulk insert + */ +BulkInsertState +GetBulkInsertState(void) +{ + BulkInsertState bistate; + + bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData)); + bistate->strategy = GetAccessStrategy(BAS_BULKWRITE); + bistate->current_buf = InvalidBuffer; + return bistate; +} + +/* + * FreeBulkInsertState - clean up after finishing a bulk insert + */ +void +FreeBulkInsertState(BulkInsertState bistate) +{ + if (bistate->current_buf != InvalidBuffer) + ReleaseBuffer(bistate->current_buf); + FreeAccessStrategy(bistate->strategy); + pfree(bistate); +} + + +/* * heap_insert - insert tuple into a heap * * The new tuple is stamped with current transaction ID and the specified * command ID. * - * If use_wal is false, the new tuple is not logged in WAL, even for a - * non-temp relation. Safe usage of this behavior requires that we arrange - * that all new tuples go into new pages not containing any tuples from other - * transactions, and that the relation gets fsync'd before commit. - * (See also heap_sync() comments) + * If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not + * logged in WAL, even for a non-temp relation. Safe usage of this behavior + * requires that we arrange that all new tuples go into new pages not + * containing any tuples from other transactions, and that the relation gets + * fsync'd before commit. (See also heap_sync() comments) + * + * The HEAP_INSERT_SKIP_FSM option is passed directly to + * RelationGetBufferForTuple, which see for more info. * - * use_fsm is passed directly to RelationGetBufferForTuple, which see for - * more info. + * Note that these options will be applied when inserting into the heap's + * TOAST table, too, if the tuple requires any out-of-line data. * - * Note that use_wal and use_fsm will be applied when inserting into the - * heap's TOAST table, too, if the tuple requires any out-of-line data. + * The BulkInsertState object (if any; bistate can be NULL for default + * behavior) is also just passed through to RelationGetBufferForTuple. * * The return value is the OID assigned to the tuple (either here or by the * caller), or InvalidOid if no OID. The header fields of *tup are updated @@ -1825,7 +1855,7 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid) */ Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid, - bool use_wal, bool use_fsm) + int options, BulkInsertState bistate) { TransactionId xid = GetCurrentTransactionId(); HeapTuple heaptup; @@ -1877,14 +1907,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, heaptup = tup; } else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD) - heaptup = toast_insert_or_update(relation, tup, NULL, - use_wal, use_fsm); + heaptup = toast_insert_or_update(relation, tup, NULL, options); else heaptup = tup; /* Find buffer to insert this tuple into */ buffer = RelationGetBufferForTuple(relation, heaptup->t_len, - InvalidBuffer, use_fsm); + InvalidBuffer, options, bistate); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -1905,7 +1934,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, MarkBufferDirty(buffer); /* XLOG stuff */ - if (use_wal && !relation->rd_istemp) + if (!(options & HEAP_INSERT_SKIP_WAL) && !relation->rd_istemp) { xl_heap_insert xlrec; xl_heap_header xlhdr; @@ -2000,7 +2029,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, Oid simple_heap_insert(Relation relation, HeapTuple tup) { - return heap_insert(relation, tup, GetCurrentCommandId(true), true, true); + return heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL); } /* @@ -2595,8 +2624,7 @@ l2: if (need_toast) { /* Note we always use WAL and FSM during updates */ - heaptup = toast_insert_or_update(relation, newtup, &oldtup, - true, true); + heaptup = toast_insert_or_update(relation, newtup, &oldtup, 0); newtupsize = MAXALIGN(heaptup->t_len); } else @@ -2623,7 +2651,7 @@ l2: { /* Assume there's no chance to put heaptup on same page. */ newbuf = RelationGetBufferForTuple(relation, heaptup->t_len, - buffer, true); + buffer, 0, NULL); } else { @@ -2640,7 +2668,7 @@ l2: */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); newbuf = RelationGetBufferForTuple(relation, heaptup->t_len, - buffer, true); + buffer, 0, NULL); } else { diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 3723977fe09..5cfd150b8ef 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -8,13 +8,14 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.73 2008/09/30 10:52:10 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.74 2008/11/06 20:51:14 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "access/heapam.h" #include "access/hio.h" #include "storage/bufmgr.h" #include "storage/freespace.h" @@ -57,6 +58,43 @@ RelationPutHeapTuple(Relation relation, } /* + * Read in a buffer, using bulk-insert strategy if bistate isn't NULL. + */ +static Buffer +ReadBufferBI(Relation relation, BlockNumber targetBlock, + BulkInsertState bistate) +{ + Buffer buffer; + + /* If not bulk-insert, exactly like ReadBuffer */ + if (!bistate) + return ReadBuffer(relation, targetBlock); + + /* If we have the desired block already pinned, re-pin and return it */ + if (bistate->current_buf != InvalidBuffer) + { + if (BufferGetBlockNumber(bistate->current_buf) == targetBlock) + { + IncrBufferRefCount(bistate->current_buf); + return bistate->current_buf; + } + /* ... else drop the old buffer */ + ReleaseBuffer(bistate->current_buf); + bistate->current_buf = InvalidBuffer; + } + + /* Perform a read using the buffer strategy */ + buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock, + RBM_NORMAL, bistate->strategy); + + /* Save the selected block as target for future inserts */ + IncrBufferRefCount(buffer); + bistate->current_buf = buffer; + + return buffer; +} + +/* * RelationGetBufferForTuple * * Returns pinned and exclusive-locked buffer of a page in given relation @@ -80,13 +118,13 @@ RelationPutHeapTuple(Relation relation, * happen if space is freed in that page after heap_update finds there's not * enough there). In that case, the page will be pinned and locked only once. * - * If use_fsm is true (the normal case), we use FSM to help us find free - * space. If use_fsm is false, we always append a new empty page to the - * end of the relation if the tuple won't fit on the current target page. + * We normally use FSM to help us find free space. However, + * if HEAP_INSERT_SKIP_FSM is specified, we just append a new empty page to + * the end of the relation if the tuple won't fit on the current target page. * This can save some cycles when we know the relation is new and doesn't * contain useful amounts of free space. * - * The use_fsm = false case is also useful for non-WAL-logged additions to a + * HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a * relation, if the caller holds exclusive lock and is careful to invalidate * relation->rd_targblock before the first insertion --- that ensures that * all insertions will occur into newly added pages and not be intermixed @@ -94,6 +132,12 @@ RelationPutHeapTuple(Relation relation, * any committed data of other transactions. (See heap_insert's comments * for additional constraints needed for safe usage of this behavior.) * + * The caller can also provide a BulkInsertState object to optimize many + * insertions into the same relation. This keeps a pin on the current + * insertion target page (to save pin/unpin cycles) and also passes a + * BULKWRITE buffer selection strategy object to the buffer manager. + * Passing NULL for bistate selects the default behavior. + * * We always try to avoid filling existing pages further than the fillfactor. * This is OK since this routine is not consulted when updating a tuple and * keeping it on the same page, which is the scenario fillfactor is meant @@ -104,8 +148,10 @@ RelationPutHeapTuple(Relation relation, */ Buffer RelationGetBufferForTuple(Relation relation, Size len, - Buffer otherBuffer, bool use_fsm) + Buffer otherBuffer, int options, + struct BulkInsertStateData *bistate) { + bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM); Buffer buffer = InvalidBuffer; Page page; Size pageFreeSpace, @@ -116,6 +162,9 @@ RelationGetBufferForTuple(Relation relation, Size len, len = MAXALIGN(len); /* be conservative */ + /* Bulk insert is not supported for updates, only inserts. */ + Assert(otherBuffer == InvalidBuffer || !bistate); + /* * If we're gonna fail for oversize tuple, do it right away */ @@ -137,25 +186,27 @@ RelationGetBufferForTuple(Relation relation, Size len, /* * We first try to put the tuple on the same page we last inserted a tuple - * on, as cached in the relcache entry. If that doesn't work, we ask the - * shared Free Space Map to locate a suitable page. Since the FSM's info - * might be out of date, we have to be prepared to loop around and retry - * multiple times. (To insure this isn't an infinite loop, we must update - * the FSM with the correct amount of free space on each page that proves - * not to be suitable.) If the FSM has no record of a page with enough - * free space, we give up and extend the relation. + * on, as cached in the BulkInsertState or relcache entry. If that + * doesn't work, we ask the Free Space Map to locate a suitable page. + * Since the FSM's info might be out of date, we have to be prepared to + * loop around and retry multiple times. (To insure this isn't an infinite + * loop, we must update the FSM with the correct amount of free space on + * each page that proves not to be suitable.) If the FSM has no record of + * a page with enough free space, we give up and extend the relation. * * When use_fsm is false, we either put the tuple onto the existing target * page or extend the relation. */ - if (len + saveFreeSpace <= MaxHeapTupleSize) - targetBlock = relation->rd_targblock; - else + if (len + saveFreeSpace > MaxHeapTupleSize) { - /* can't fit, don't screw up FSM request tracking by trying */ + /* can't fit, don't bother asking FSM */ targetBlock = InvalidBlockNumber; use_fsm = false; } + else if (bistate && bistate->current_buf != InvalidBuffer) + targetBlock = BufferGetBlockNumber(bistate->current_buf); + else + targetBlock = relation->rd_targblock; if (targetBlock == InvalidBlockNumber && use_fsm) { @@ -189,7 +240,7 @@ RelationGetBufferForTuple(Relation relation, Size len, if (otherBuffer == InvalidBuffer) { /* easy case */ - buffer = ReadBuffer(relation, targetBlock); + buffer = ReadBufferBI(relation, targetBlock, bistate); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); } else if (otherBlock == targetBlock) @@ -274,7 +325,7 @@ RelationGetBufferForTuple(Relation relation, Size len, * it worth keeping an accurate file length in shared memory someplace, * rather than relying on the kernel to do it for us? */ - buffer = ReadBuffer(relation, P_NEW); + buffer = ReadBufferBI(relation, P_NEW, bistate); /* * We can be certain that locking the otherBuffer first is OK, since it diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index cd7302bd5d7..18c7a72d64f 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -96,7 +96,7 @@ * Portions Copyright (c) 1994-5, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.15 2008/08/11 11:05:10 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.16 2008/11/06 20:51:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -575,7 +575,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup) } else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD) heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL, - state->rs_use_wal, false); + HEAP_INSERT_SKIP_FSM | + (state->rs_use_wal ? + 0 : HEAP_INSERT_SKIP_WAL)); else heaptup = tup; diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index e3014e288ab..f8bb77bd0a9 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.90 2008/11/02 01:45:27 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.91 2008/11/06 20:51:14 tgl Exp $ * * * INTERFACE ROUTINES @@ -74,8 +74,7 @@ do { \ static void toast_delete_datum(Relation rel, Datum value); -static Datum toast_save_datum(Relation rel, Datum value, - bool use_wal, bool use_fsm); +static Datum toast_save_datum(Relation rel, Datum value, int options); static struct varlena *toast_fetch_datum(struct varlena * attr); static struct varlena *toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length); @@ -400,7 +399,7 @@ toast_delete(Relation rel, HeapTuple oldtup) * Inputs: * newtup: the candidate new tuple to be inserted * oldtup: the old row version for UPDATE, or NULL for INSERT - * use_wal, use_fsm: flags to be passed to heap_insert() for toast rows + * options: options to be passed to heap_insert() for toast rows * Result: * either newtup if no toasting is needed, or a palloc'd modified tuple * that is what should actually get stored @@ -411,7 +410,7 @@ toast_delete(Relation rel, HeapTuple oldtup) */ HeapTuple toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, - bool use_wal, bool use_fsm) + int options) { HeapTuple result_tuple; TupleDesc tupleDesc; @@ -677,8 +676,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, { old_value = toast_values[i]; toast_action[i] = 'p'; - toast_values[i] = toast_save_datum(rel, toast_values[i], - use_wal, use_fsm); + toast_values[i] = toast_save_datum(rel, toast_values[i], options); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; @@ -728,8 +726,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; - toast_values[i] = toast_save_datum(rel, toast_values[i], - use_wal, use_fsm); + toast_values[i] = toast_save_datum(rel, toast_values[i], options); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; @@ -838,8 +835,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; - toast_values[i] = toast_save_datum(rel, toast_values[i], - use_wal, use_fsm); + toast_values[i] = toast_save_datum(rel, toast_values[i], options); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; @@ -1120,8 +1116,7 @@ toast_compress_datum(Datum value) * ---------- */ static Datum -toast_save_datum(Relation rel, Datum value, - bool use_wal, bool use_fsm) +toast_save_datum(Relation rel, Datum value, int options) { Relation toastrel; Relation toastidx; @@ -1218,7 +1213,7 @@ toast_save_datum(Relation rel, Datum value, memcpy(VARDATA(&chunk_data), data_p, chunk_size); toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull); - heap_insert(toastrel, toasttup, mycid, use_wal, use_fsm); + heap_insert(toastrel, toasttup, mycid, options, NULL); /* * Create the index entry. We cheat a little here by not using |