diff options
Diffstat (limited to 'src/backend/storage/ipc')
-rw-r--r-- | src/backend/storage/ipc/Makefile.inc | 15 | ||||
-rw-r--r-- | src/backend/storage/ipc/README | 31 | ||||
-rw-r--r-- | src/backend/storage/ipc/ipc.c | 718 | ||||
-rw-r--r-- | src/backend/storage/ipc/ipci.c | 149 | ||||
-rw-r--r-- | src/backend/storage/ipc/s_lock.c | 440 | ||||
-rw-r--r-- | src/backend/storage/ipc/shmem.c | 561 | ||||
-rw-r--r-- | src/backend/storage/ipc/shmqueue.c | 251 | ||||
-rw-r--r-- | src/backend/storage/ipc/sinval.c | 169 | ||||
-rw-r--r-- | src/backend/storage/ipc/sinvaladt.c | 797 | ||||
-rw-r--r-- | src/backend/storage/ipc/spin.c | 247 |
10 files changed, 3378 insertions, 0 deletions
diff --git a/src/backend/storage/ipc/Makefile.inc b/src/backend/storage/ipc/Makefile.inc new file mode 100644 index 00000000000..b426dba0ff0 --- /dev/null +++ b/src/backend/storage/ipc/Makefile.inc @@ -0,0 +1,15 @@ +#------------------------------------------------------------------------- +# +# Makefile.inc-- +# Makefile for storage/ipc +# +# Copyright (c) 1994, Regents of the University of California +# +# +# IDENTIFICATION +# $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $ +# +#------------------------------------------------------------------------- + +SUBSRCS+= ipc.c ipci.c s_lock.c shmem.c shmqueue.c sinval.c \ + sinvaladt.c spin.c diff --git a/src/backend/storage/ipc/README b/src/backend/storage/ipc/README new file mode 100644 index 00000000000..02d66045f82 --- /dev/null +++ b/src/backend/storage/ipc/README @@ -0,0 +1,31 @@ +$Header: /cvsroot/pgsql/src/backend/storage/ipc/README,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $ +Mon Jul 18 11:09:22 PDT 1988 W.KLAS + +Cache invalidation synchronization routines: +=========================================== + +The cache synchronization is done using a message queue. Every +backend can register a message which then has to be read by +all backends. A message read by all backends is removed from the +queue automatically. If a message has been lost because the buffer +was full, all backends that haven't read this message will be +noticed that they have to reset their cache state. This is done +at the time when they try to read the message queue. + +The message queue is implemented as a shared buffer segment. Actually, +the queue is a circle to allow fast inserting, reading (invalidate data) and +maintaining the buffer. + +Access to this shared message buffer is synchronized by the lock manager. +The lock manager treats the buffer as a regular relation and sets +relation level locks (with mode = LockWait) to block backends while +another backend is writing or reading the buffer. The identifiers used +for this special 'relation' are database id = 0 and relation id = 0. + +The current implementation prints regular (e)log information +when a message has been removed from the buffer because the buffer +is full, and a backend has to reset its cache state. The elog level +is NOTICE. This can be used to improve teh behavior of backends +when invalidating or reseting their cache state. + + diff --git a/src/backend/storage/ipc/ipc.c b/src/backend/storage/ipc/ipc.c new file mode 100644 index 00000000000..306300b90c3 --- /dev/null +++ b/src/backend/storage/ipc/ipc.c @@ -0,0 +1,718 @@ +/*------------------------------------------------------------------------- + * + * ipc.c-- + * POSTGRES inter-process communication definitions. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipc.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $ + * + * NOTES + * + * Currently, semaphores are used (my understanding anyway) in two + * different ways: + * 1. as mutexes on machines that don't have test-and-set (eg. + * mips R3000). + * 2. for putting processes to sleep when waiting on a lock + * and waking them up when the lock is free. + * The number of semaphores in (1) is fixed and those are shared + * among all backends. In (2), there is 1 semaphore per process and those + * are not shared with anyone else. + * -ay 4/95 + * + *------------------------------------------------------------------------- + */ +#include <sys/types.h> +#include <sys/file.h> +#include <stdio.h> +#include <errno.h> + +/* XXX - the following dependency should be moved into the defaults.mk file */ +#ifndef _IPC_ +#define _IPC_ +#include <sys/ipc.h> +#include <sys/sem.h> +#include <sys/shm.h> +#endif + +#include "storage/ipc.h" +#include "utils/memutils.h" +#include "utils/elog.h" + +#if defined(PORTNAME_bsd44) +int UsePrivateMemory = 1; +#else +int UsePrivateMemory = 0; +#endif + +#if defined(PORTNAME_bsdi) +/* hacka, hacka, hacka (XXX) */ +union semun { + int val; /* value for SETVAL */ + struct semid_ds *buf; /* buffer for IPC_STAT & IPC_SET */ + ushort *array; /* array for GETALL & SETALL */ +}; +#endif + + +/* ---------------------------------------------------------------- + * exit() handling stuff + * ---------------------------------------------------------------- + */ + +#define MAX_ON_EXITS 20 + +static struct ONEXIT { + void (*function)(); + caddr_t arg; +} onexit_list[ MAX_ON_EXITS ]; + +static int onexit_index; + +typedef struct _PrivateMemStruct { + int id; + char *memptr; +} PrivateMem; + +PrivateMem IpcPrivateMem[16]; + +static int +PrivateMemoryCreate(IpcMemoryKey memKey, + uint32 size) +{ + static int memid = 0; + + UsePrivateMemory = 1; + + IpcPrivateMem[memid].id = memid; + IpcPrivateMem[memid].memptr = malloc(size); + if (IpcPrivateMem[memid].memptr == NULL) + elog(WARN, "PrivateMemoryCreate: not enough memory to malloc"); + memset(IpcPrivateMem[memid].memptr, 0, size); /* XXX PURIFY */ + + return (memid++); +} + +static char * +PrivateMemoryAttach(IpcMemoryId memid) +{ + return ( IpcPrivateMem[memid].memptr ); +} + + +/* ---------------------------------------------------------------- + * exitpg + * + * this function calls all the callbacks registered + * for it (to free resources) and then calls exit. + * This should be the only function to call exit(). + * -cim 2/6/90 + * ---------------------------------------------------------------- + */ +static int exitpg_inprogress = 0; + +void +exitpg(int code) +{ + int i; + + /* ---------------- + * if exitpg_inprocess is true, then it means that we + * are being invoked from within an on_exit() handler + * and so we return immediately to avoid recursion. + * ---------------- + */ + if (exitpg_inprogress) + return; + + exitpg_inprogress = 1; + + /* ---------------- + * call all the callbacks registered before calling exit(). + * ---------------- + */ + for (i = onexit_index - 1; i >= 0; --i) + (*onexit_list[i].function)(code, onexit_list[i].arg); + + exit(code); +} + +/* ------------------ + * Run all of the on_exitpg routines but don't exit in the end. + * This is used by the postmaster to re-initialize shared memory and + * semaphores after a backend dies horribly + * ------------------ + */ +void +quasi_exitpg() +{ + int i; + + /* ---------------- + * if exitpg_inprocess is true, then it means that we + * are being invoked from within an on_exit() handler + * and so we return immediately to avoid recursion. + * ---------------- + */ + if (exitpg_inprogress) + return; + + exitpg_inprogress = 1; + + /* ---------------- + * call all the callbacks registered before calling exit(). + * ---------------- + */ + for (i = onexit_index - 1; i >= 0; --i) + (*onexit_list[i].function)(0, onexit_list[i].arg); + + onexit_index = 0; + exitpg_inprogress = 0; +} + +/* ---------------------------------------------------------------- + * on_exitpg + * + * this function adds a callback function to the list of + * functions invoked by exitpg(). -cim 2/6/90 + * ---------------------------------------------------------------- + */ +int +on_exitpg(void (*function)(), caddr_t arg) +{ + if (onexit_index >= MAX_ON_EXITS) + return(-1); + + onexit_list[ onexit_index ].function = function; + onexit_list[ onexit_index ].arg = arg; + + ++onexit_index; + + return(0); +} + +/****************************************************************************/ +/* IPCPrivateSemaphoreKill(status, semId) */ +/* */ +/****************************************************************************/ +static void +IPCPrivateSemaphoreKill(int status, + int semId) /* caddr_t */ +{ + union semun semun; + semctl(semId, 0, IPC_RMID, semun); +} + + +/****************************************************************************/ +/* IPCPrivateMemoryKill(status, shmId) */ +/* */ +/****************************************************************************/ +static void +IPCPrivateMemoryKill(int status, + int shmId) /* caddr_t */ +{ + if ( UsePrivateMemory ) { + /* free ( IpcPrivateMem[shmId].memptr ); */ + } else { + if (shmctl(shmId, IPC_RMID, (struct shmid_ds *) NULL) < 0) { + elog(NOTICE, "IPCPrivateMemoryKill: shmctl(%d, %d, 0) failed: %m", + shmId, IPC_RMID); + } + } +} + + +/****************************************************************************/ +/* IpcSemaphoreCreate(semKey, semNum, permission, semStartValue) */ +/* */ +/* - returns a semaphore identifier: */ +/* */ +/* if key doesn't exist: return a new id, status:= IpcSemIdNotExist */ +/* if key exists: return the old id, status:= IpcSemIdExist */ +/* if semNum > MAX : return # of argument, status:=IpcInvalidArgument */ +/* */ +/****************************************************************************/ + +/* + * Note: + * XXX This should be split into two different calls. One should + * XXX be used to create a semaphore set. The other to "attach" a + * XXX existing set. It should be an error for the semaphore set + * XXX to to already exist or for it not to, respectively. + * + * Currently, the semaphore sets are "attached" and an error + * is detected only when a later shared memory attach fails. + */ + +IpcSemaphoreId +IpcSemaphoreCreate(IpcSemaphoreKey semKey, + int semNum, + int permission, + int semStartValue, + int removeOnExit, + int *status) +{ + int i; + int errStatus; + int semId; + u_short array[IPC_NMAXSEM]; + union semun semun; + + /* get a semaphore if non-existent */ + /* check arguments */ + if (semNum > IPC_NMAXSEM || semNum <= 0) { + *status = IpcInvalidArgument; + return(2); /* returns the number of the invalid argument */ + } + + semId = semget(semKey, 0, 0); + + if (semId == -1) { + *status = IpcSemIdNotExist; /* there doesn't exist a semaphore */ +#ifdef DEBUG_IPC + fprintf(stderr,"calling semget with %d, %d , %d\n", + semKey, + semNum, + IPC_CREAT|permission ); +#endif + semId = semget(semKey, semNum, IPC_CREAT|permission); + + if (semId < 0) { + perror("semget"); + exitpg(3); + } + for (i = 0; i < semNum; i++) { + array[i] = semStartValue; + } + semun.array = array; + errStatus = semctl(semId, 0, SETALL, semun); + if (errStatus == -1) { + perror("semctl"); + } + + if (removeOnExit) + on_exitpg(IPCPrivateSemaphoreKill, (caddr_t)semId); + + } else { + /* there is a semaphore id for this key */ + *status = IpcSemIdExist; + } + +#ifdef DEBUG_IPC + fprintf(stderr,"\nIpcSemaphoreCreate, status %d, returns %d\n", + *status, + semId ); + fflush(stdout); + fflush(stderr); +#endif + return(semId); +} + + +/****************************************************************************/ +/* IpcSemaphoreSet() - sets the initial value of the semaphore */ +/* */ +/* note: the xxx_return variables are only used for debugging. */ +/****************************************************************************/ +static int IpcSemaphoreSet_return; + +void +IpcSemaphoreSet(int semId, int semno, int value) +{ + int errStatus; + union semun semun; + + semun.val = value; + errStatus = semctl(semId, semno, SETVAL, semun); + IpcSemaphoreSet_return = errStatus; + + if (errStatus == -1) + perror("semctl"); +} + +/****************************************************************************/ +/* IpcSemaphoreKill(key) - removes a semaphore */ +/* */ +/****************************************************************************/ +void +IpcSemaphoreKill(IpcSemaphoreKey key) +{ + int semId; + union semun semun; + + /* kill semaphore if existent */ + + semId = semget(key, 0, 0); + if (semId != -1) + semctl(semId, 0, IPC_RMID, semun); +} + +/****************************************************************************/ +/* IpcSemaphoreLock(semId, sem, lock) - locks a semaphore */ +/* */ +/* note: the xxx_return variables are only used for debugging. */ +/****************************************************************************/ +static int IpcSemaphoreLock_return; + +void +IpcSemaphoreLock(IpcSemaphoreId semId, int sem, int lock) +{ + extern int errno; + int errStatus; + struct sembuf sops; + + sops.sem_op = lock; + sops.sem_flg = 0; + sops.sem_num = sem; + + /* ---------------- + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were + * sent a signal. So we try and lock the semaphore again. + * I am not certain this is correct, but the semantics aren't + * clear it fixes problems with parallel abort synchronization, + * namely that after processing an abort signal, the semaphore + * call returns with -1 (and errno == EINTR) before it should. + * -cim 3/28/90 + * ---------------- + */ + do { + errStatus = semop(semId, &sops, 1); + } while (errStatus == -1 && errno == EINTR); + + IpcSemaphoreLock_return = errStatus; + + if (errStatus == -1) { + perror("semop"); + exitpg(255); + } +} + +/****************************************************************************/ +/* IpcSemaphoreUnlock(semId, sem, lock) - unlocks a semaphore */ +/* */ +/* note: the xxx_return variables are only used for debugging. */ +/****************************************************************************/ +static int IpcSemaphoreUnlock_return; + +void +IpcSemaphoreUnlock(IpcSemaphoreId semId, int sem, int lock) +{ + extern int errno; + int errStatus; + struct sembuf sops; + + sops.sem_op = -lock; + sops.sem_flg = 0; + sops.sem_num = sem; + + + /* ---------------- + * Note: if errStatus is -1 and errno == EINTR then it means we + * returned from the operation prematurely because we were + * sent a signal. So we try and lock the semaphore again. + * I am not certain this is correct, but the semantics aren't + * clear it fixes problems with parallel abort synchronization, + * namely that after processing an abort signal, the semaphore + * call returns with -1 (and errno == EINTR) before it should. + * -cim 3/28/90 + * ---------------- + */ + do { + errStatus = semop(semId, &sops, 1); + } while (errStatus == -1 && errno == EINTR); + + IpcSemaphoreUnlock_return = errStatus; + + if (errStatus == -1) { + perror("semop"); + exitpg(255); + } +} + +int +IpcSemaphoreGetCount(IpcSemaphoreId semId, int sem) +{ + int semncnt; + union semun dummy; /* for Solaris */ + + semncnt = semctl(semId, sem, GETNCNT, dummy); + return semncnt; +} + +int +IpcSemaphoreGetValue(IpcSemaphoreId semId, int sem) +{ + int semval; + union semun dummy; /* for Solaris */ + + semval = semctl(semId, sem, GETVAL, dummy); + return semval; +} + +/****************************************************************************/ +/* IpcMemoryCreate(memKey) */ +/* */ +/* - returns the memory identifier, if creation succeeds */ +/* returns IpcMemCreationFailed, if failure */ +/****************************************************************************/ + +IpcMemoryId +IpcMemoryCreate(IpcMemoryKey memKey, uint32 size, int permission) +{ + IpcMemoryId shmid; + + if (memKey == PrivateIPCKey) { + /* private */ + shmid = PrivateMemoryCreate(memKey, size); + }else { + shmid = shmget(memKey, size, IPC_CREAT|permission); + } + + if (shmid < 0) { + fprintf(stderr,"IpcMemoryCreate: memKey=%d , size=%d , permission=%d", + memKey, size , permission ); + perror("IpcMemoryCreate: shmget(..., create, ...) failed"); + return(IpcMemCreationFailed); + } + + /* if (memKey == PrivateIPCKey) */ + on_exitpg(IPCPrivateMemoryKill, (caddr_t)shmid); + + return(shmid); +} + +/****************************************************************************/ +/* IpcMemoryIdGet(memKey, size) returns the shared memory Id */ +/* or IpcMemIdGetFailed */ +/****************************************************************************/ +IpcMemoryId +IpcMemoryIdGet(IpcMemoryKey memKey, uint32 size) +{ + IpcMemoryId shmid; + + shmid = shmget(memKey, size, 0); + + if (shmid < 0) { + fprintf(stderr,"IpcMemoryIdGet: memKey=%d , size=%d , permission=%d", + memKey, size , 0 ); + perror("IpcMemoryIdGet: shmget() failed"); + return(IpcMemIdGetFailed); + } + + return(shmid); +} + +/****************************************************************************/ +/* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */ +/* from a backend address space */ +/* (only called by backends running under the postmaster) */ +/****************************************************************************/ +void +IpcMemoryDetach(int status, char *shmaddr) +{ + if (shmdt(shmaddr) < 0) { + elog(NOTICE, "IpcMemoryDetach: shmdt(0x%x): %m", shmaddr); + } +} + +/****************************************************************************/ +/* IpcMemoryAttach(memId) returns the adress of shared memory */ +/* or IpcMemAttachFailed */ +/* */ +/* CALL IT: addr = (struct <MemoryStructure> *) IpcMemoryAttach(memId); */ +/* */ +/****************************************************************************/ +char * +IpcMemoryAttach(IpcMemoryId memId) +{ + char *memAddress; + + if (UsePrivateMemory) { + memAddress = (char *) PrivateMemoryAttach(memId); + } else { + memAddress = (char *) shmat(memId, 0, 0); + } + + /* if ( *memAddress == -1) { XXX ??? */ + if ( memAddress == (char *)-1) { + perror("IpcMemoryAttach: shmat() failed"); + return(IpcMemAttachFailed); + } + + if (!UsePrivateMemory) + on_exitpg(IpcMemoryDetach, (caddr_t) memAddress); + + return((char *) memAddress); +} + + +/****************************************************************************/ +/* IpcMemoryKill(memKey) removes a shared memory segment */ +/* (only called by the postmaster and standalone backends) */ +/****************************************************************************/ +void +IpcMemoryKill(IpcMemoryKey memKey) +{ + IpcMemoryId shmid; + + if (!UsePrivateMemory && (shmid = shmget(memKey, 0, 0)) >= 0) { + if (shmctl(shmid, IPC_RMID, (struct shmid_ds *) NULL) < 0) { + elog(NOTICE, "IpcMemoryKill: shmctl(%d, %d, 0) failed: %m", + shmid, IPC_RMID); + } + } +} + +#ifdef HAS_TEST_AND_SET +/* ------------------ + * use hardware locks to replace semaphores for sequent machines + * to avoid costs of swapping processes and to provide unlimited + * supply of locks. + * ------------------ + */ +static SLock *SLockArray = NULL; +static SLock **FreeSLockPP; +static int *UnusedSLockIP; +static slock_t *SLockMemoryLock; +static IpcMemoryId SLockMemoryId = -1; + +struct ipcdummy { /* to get alignment/size right */ + SLock *free; + int unused; + slock_t memlock; + SLock slocks[NSLOCKS]; +}; +static int SLockMemorySize = sizeof(struct ipcdummy); + +void +CreateAndInitSLockMemory(IPCKey key) +{ + int id; + SLock *slckP; + + SLockMemoryId = IpcMemoryCreate(key, + SLockMemorySize, + 0700); + AttachSLockMemory(key); + *FreeSLockPP = NULL; + *UnusedSLockIP = (int)FIRSTFREELOCKID; + for (id=0; id<(int)FIRSTFREELOCKID; id++) { + slckP = &(SLockArray[id]); + S_INIT_LOCK(&(slckP->locklock)); + slckP->flag = NOLOCK; + slckP->nshlocks = 0; + S_INIT_LOCK(&(slckP->shlock)); + S_INIT_LOCK(&(slckP->exlock)); + S_INIT_LOCK(&(slckP->comlock)); + slckP->next = NULL; + } + return; +} + +void +AttachSLockMemory(IPCKey key) +{ + struct ipcdummy *slockM; + + if (SLockMemoryId == -1) + SLockMemoryId = IpcMemoryIdGet(key,SLockMemorySize); + if (SLockMemoryId == -1) + elog(FATAL, "SLockMemory not in shared memory"); + slockM = (struct ipcdummy *) IpcMemoryAttach(SLockMemoryId); + if (slockM == IpcMemAttachFailed) + elog(FATAL, "AttachSLockMemory: could not attach segment"); + FreeSLockPP = (SLock **) &(slockM->free); + UnusedSLockIP = (int *) &(slockM->unused); + SLockMemoryLock = (slock_t *) &(slockM->memlock); + S_INIT_LOCK(SLockMemoryLock); + SLockArray = (SLock *) &(slockM->slocks[0]); + return; +} + + +#ifdef LOCKDEBUG +#define PRINT_LOCK(LOCK) printf("(locklock = %d, flag = %d, nshlocks = %d, \ +shlock = %d, exlock =%d)\n", LOCK->locklock, \ + LOCK->flag, LOCK->nshlocks, LOCK->shlock, \ + LOCK->exlock) +#endif + +void +ExclusiveLock(int lockid) +{ + SLock *slckP; + slckP = &(SLockArray[lockid]); +#ifdef LOCKDEBUG + printf("ExclusiveLock(%d)\n", lockid); + printf("IN: "); + PRINT_LOCK(slckP); +#endif + ex_try_again: + S_LOCK(&(slckP->locklock)); + switch (slckP->flag) { + case NOLOCK: + slckP->flag = EXCLUSIVELOCK; + S_LOCK(&(slckP->exlock)); + S_LOCK(&(slckP->shlock)); + S_UNLOCK(&(slckP->locklock)); +#ifdef LOCKDEBUG + printf("OUT: "); + PRINT_LOCK(slckP); +#endif + return; + case SHAREDLOCK: + case EXCLUSIVELOCK: + S_UNLOCK(&(slckP->locklock)); + S_LOCK(&(slckP->exlock)); + S_UNLOCK(&(slckP->exlock)); + goto ex_try_again; + } +} + +void +ExclusiveUnlock(int lockid) +{ + SLock *slckP; + + slckP = &(SLockArray[lockid]); +#ifdef LOCKDEBUG + printf("ExclusiveUnlock(%d)\n", lockid); + printf("IN: "); + PRINT_LOCK(slckP); +#endif + S_LOCK(&(slckP->locklock)); + /* ------------- + * give favor to read processes + * ------------- + */ + slckP->flag = NOLOCK; + if (slckP->nshlocks > 0) { + while (slckP->nshlocks > 0) { + S_UNLOCK(&(slckP->shlock)); + S_LOCK(&(slckP->comlock)); + } + S_UNLOCK(&(slckP->shlock)); + } + else { + S_UNLOCK(&(slckP->shlock)); + } + S_UNLOCK(&(slckP->exlock)); + S_UNLOCK(&(slckP->locklock)); +#ifdef LOCKDEBUG + printf("OUT: "); + PRINT_LOCK(slckP); +#endif + return; +} + +bool +LockIsFree(int lockid) +{ + return(SLockArray[lockid].flag == NOLOCK); +} + +#endif /* HAS_TEST_AND_SET */ diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c new file mode 100644 index 00000000000..18d3cccd0ee --- /dev/null +++ b/src/backend/storage/ipc/ipci.c @@ -0,0 +1,149 @@ +/*------------------------------------------------------------------------- + * + * ipci.c-- + * POSTGRES inter-process communication initialization code. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/ipci.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "storage/ipc.h" +#include "storage/multilev.h" +#include "utils/elog.h" +#include "storage/sinval.h" +#include "storage/bufmgr.h" +#include "storage/proc.h" +#include "storage/smgr.h" +#include "storage/lock.h" +#include "miscadmin.h" /* for DebugLvl */ + +/* + * SystemPortAddressCreateMemoryKey -- + * Returns a memory key given a port address. + */ +IPCKey +SystemPortAddressCreateIPCKey(SystemPortAddress address) +{ + Assert(address < 32768); /* XXX */ + + return (SystemPortAddressGetIPCKey(address)); +} + +/* + * CreateSharedMemoryAndSemaphores -- + * Creates and initializes shared memory and semaphores. + */ +/************************************************** + + CreateSharedMemoryAndSemaphores + is called exactly *ONCE* by the postmaster. + It is *NEVER* called by the postgres backend + + 0) destroy any existing semaphores for both buffer + and lock managers. + 1) create the appropriate *SHARED* memory segments + for the two resource managers. + + **************************************************/ + +void +CreateSharedMemoryAndSemaphores(IPCKey key) +{ + int size; + +#ifdef HAS_TEST_AND_SET + /* --------------- + * create shared memory for slocks + * -------------- + */ + CreateAndInitSLockMemory(IPCKeyGetSLockSharedMemoryKey(key)); +#endif + /* ---------------- + * kill and create the buffer manager buffer pool (and semaphore) + * ---------------- + */ + CreateSpinlocks(IPCKeyGetSpinLockSemaphoreKey(key)); + size = BufferShmemSize() + LockShmemSize(); + +#ifdef MAIN_MEMORY + size += MMShmemSize(); +#endif /* MAIN_MEMORY */ + + if (DebugLvl > 1) { + fprintf(stderr, "binding ShmemCreate(key=%x, size=%d)\n", + IPCKeyGetBufferMemoryKey(key), size); + } + ShmemCreate(IPCKeyGetBufferMemoryKey(key), size); + ShmemBindingTabReset(); + InitShmem(key, size); + InitBufferPool(key); + + /* ---------------- + * do the lock table stuff + * ---------------- + */ + InitLocks(); + InitMultiLevelLockm(); + if (InitMultiLevelLockm() == INVALID_TABLEID) + elog(FATAL, "Couldn't create the lock table"); + + /* ---------------- + * do process table stuff + * ---------------- + */ + InitProcGlobal(key); + on_exitpg(ProcFreeAllSemaphores, 0); + + CreateSharedInvalidationState(key); +} + + +/* + * AttachSharedMemoryAndSemaphores -- + * Attachs existant shared memory and semaphores. + */ +void +AttachSharedMemoryAndSemaphores(IPCKey key) +{ + int size; + + /* ---------------- + * create rather than attach if using private key + * ---------------- + */ + if (key == PrivateIPCKey) { + CreateSharedMemoryAndSemaphores(key); + return; + } + +#ifdef HAS_TEST_AND_SET + /* ---------------- + * attach the slock shared memory + * ---------------- + */ + AttachSLockMemory(IPCKeyGetSLockSharedMemoryKey(key)); +#endif + /* ---------------- + * attach the buffer manager buffer pool (and semaphore) + * ---------------- + */ + size = BufferShmemSize() + LockShmemSize(); + InitShmem(key, size); + InitBufferPool(key); + + /* ---------------- + * initialize lock table stuff + * ---------------- + */ + InitLocks(); + if (InitMultiLevelLockm() == INVALID_TABLEID) + elog(FATAL, "Couldn't attach to the lock table"); + + AttachSharedInvalidationState(key); +} diff --git a/src/backend/storage/ipc/s_lock.c b/src/backend/storage/ipc/s_lock.c new file mode 100644 index 00000000000..3cbe796fc59 --- /dev/null +++ b/src/backend/storage/ipc/s_lock.c @@ -0,0 +1,440 @@ +/*------------------------------------------------------------------------- + * + * s_lock.c-- + * This file contains the implementation (if any) for spinlocks. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/s_lock.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +/* + * DESCRIPTION + * The following code fragment should be written (in assembly + * language) on machines that have a native test-and-set instruction: + * + * void + * S_LOCK(char_address) + * char *char_address; + * { + * while (test_and_set(char_address)) + * ; + * } + * + * If this is not done, POSTGRES will default to using System V + * semaphores (and take a large performance hit -- around 40% of + * its time on a DS5000/240 is spent in semop(3)...). + * + * NOTES + * AIX has a test-and-set but the recommended interface is the cs(3) + * system call. This provides an 8-instruction (plus system call + * overhead) uninterruptible compare-and-set operation. True + * spinlocks might be faster but using cs(3) still speeds up the + * regression test suite by about 25%. I don't have an assembler + * manual for POWER in any case. + * + */ +#ifdef WIN32 +#include <windows.h> +#endif /* WIN32 */ +#include "storage/ipc.h" + + +#if defined(HAS_TEST_AND_SET) + +#if defined (PORTNAME_next) +/* + * NEXTSTEP (mach) + * slock_t is defined as a struct mutex. + */ +void +S_LOCK(slock_t *lock) +{ + mutex_lock(lock); +} +void +S_UNLOCK(slock_t *lock) +{ + mutex_unlock(lock); +} +void +S_INIT_LOCK(slock_t *lock) +{ + mutex_init(lock); +} + + /* S_LOCK_FREE should return 1 if lock is free; 0 if lock is locked */ +int + S_LOCK_FREE(slock_t *lock) +{ + /* For Mach, we have to delve inside the entrails of `struct +mutex'. Ick! */ + return (lock->lock == 0); +} + +#endif /* PORTNAME_next */ + + + +#if defined(PORTNAME_irix5) +/* + * SGI IRIX 5 + * slock_t is defined as a struct abilock_t, which has a single unsigned long + * member. + * + * This stuff may be supplemented in the future with Masato Kataoka's MIPS-II + * assembly from his NECEWS SVR4 port, but we probably ought to retain this + * for the R3000 chips out there. + */ +void +S_LOCK(slock_t *lock) +{ + /* spin_lock(lock); */ + while (!acquire_lock(lock)) + ; +} + +void +S_UNLOCK(slock_t *lock) +{ + (void)release_lock(lock); +} + +void +S_INIT_LOCK(slock_t *lock) +{ + (void)init_lock(lock); +} + +/* S_LOCK_FREE should return 1 if lock is free; 0 if lock is locked */ +int +S_LOCK_FREE(slock_t *lock) +{ + return(stat_lock(lock)==UNLOCKED); +} + +#endif /* PORTNAME_irix5 */ + + +/* + * OSF/1 (Alpha AXP) + * + * Note that slock_t on the Alpha AXP is msemaphore instead of char + * (see storage/ipc.h). + */ + +#if defined(PORTNAME_alpha) + +void +S_LOCK(slock_t *lock) +{ + while (msem_lock(lock, MSEM_IF_NOWAIT) < 0) + ; +} + +void +S_UNLOCK(slock_t *lock) +{ + (void) msem_unlock(lock, 0); +} + +void +S_INIT_LOCK(slock_t *lock) +{ + (void) msem_init(lock, MSEM_UNLOCKED); +} + +int +S_LOCK_FREE(slock_t *lock) +{ + return(lock->msem_state ? 0 : 1); +} + +#endif /* PORTNAME_alpha */ + +/* + * Solaris 2 + */ + +#if defined(PORTNAME_sparc_solaris) + +/* defined in port/.../tas.s */ +extern int tas(slock_t *lock); + +void +S_LOCK(slock_t *lock) +{ + while (tas(lock)) + ; +} + +void +S_UNLOCK(slock_t *lock) +{ + *lock = 0; +} + +void +S_INIT_LOCK(slock_t *lock) +{ + S_UNLOCK(lock); +} + +#endif /* PORTNAME_sparc_solaris */ + +/* + * AIX (POWER) + * + * Note that slock_t on POWER/POWER2/PowerPC is int instead of char + * (see storage/ipc.h). + */ + +#if defined(PORTNAME_aix) + +void +S_LOCK(slock_t *lock) +{ + while (cs((int *) lock, 0, 1)) + ; +} + +void +S_UNLOCK(slock_t *lock) +{ + *lock = 0; +} + +void +S_INIT_LOCK(slock_t *lock) +{ + S_UNLOCK(lock); +} + +#endif /* PORTNAME_aix */ + +/* + * HP-UX (PA-RISC) + * + * Note that slock_t on PA-RISC is a structure instead of char + * (see storage/ipc.h). + */ + +#if defined(PORTNAME_hpux) + +/* defined in port/.../tas.s */ +extern int tas(slock_t *lock); + +/* +* a "set" slock_t has a single word cleared. a "clear" slock_t has +* all words set to non-zero. +*/ +static slock_t clear_lock = { -1, -1, -1, -1 }; + +void +S_LOCK(slock_t *lock) +{ + while (tas(lock)) + ; +} + +void +S_UNLOCK(slock_t *lock) +{ + *lock = clear_lock; /* struct assignment */ +} + +void +S_INIT_LOCK(slock_t *lock) +{ + S_UNLOCK(lock); +} + +int +S_LOCK_FREE(slock_t *lock) +{ + register int *lock_word = (int *) (((long) lock + 15) & ~15); + + return(*lock_word != 0); +} + +#endif /* PORTNAME_hpux */ + +/* + * sun3 + */ + +#if (defined(sun) && ! defined(sparc)) + +void +S_LOCK(slock_t *lock) +{ + while (tas(lock)); +} + +void +S_UNLOCK(slock_t *lock) +{ + *lock = 0; +} + +void +S_INIT_LOCK(slock_t *lock) +{ + S_UNLOCK(lock); +} + +static int +tas_dummy() +{ + asm("LLA0:"); + asm(" .data"); + asm(" .text"); + asm("|#PROC# 04"); + asm(" .globl _tas"); + asm("_tas:"); + asm("|#PROLOGUE# 1"); + asm(" movel sp@(0x4),a0"); + asm(" tas a0@"); + asm(" beq LLA1"); + asm(" moveq #-128,d0"); + asm(" rts"); + asm("LLA1:"); + asm(" moveq #0,d0"); + asm(" rts"); + asm(" .data"); +} + +#endif + +/* + * SPARC (SunOS 4) + */ + +#if defined(PORTNAME_sparc) + +/* if we're using -ansi w/ gcc, use __asm__ instead of asm */ +#if defined(__STRICT_ANSI__) +#define asm(x) __asm__(x) +#endif + +static int +tas_dummy() +{ + asm(".seg \"data\""); + asm(".seg \"text\""); + asm(".global _tas"); + asm("_tas:"); + + /* + * Sparc atomic test and set (sparc calls it "atomic load-store") + */ + + asm("ldstub [%r8], %r8"); + + /* + * Did test and set actually do the set? + */ + + asm("tst %r8"); + + asm("be,a ReturnZero"); + + /* + * otherwise, just return. + */ + + asm("clr %r8"); + asm("mov 0x1, %r8"); + asm("ReturnZero:"); + asm("retl"); + asm("nop"); +} + +void +S_LOCK(unsigned char *addr) +{ + while (tas(addr)); +} + + +/* + * addr should be as in the above S_LOCK routine + */ +void +S_UNLOCK(unsigned char *addr) +{ + *addr = 0; +} + +void +S_INIT_LOCK(unsigned char *addr) +{ + *addr = 0; +} + +#endif /* PORTNAME_sparc */ + +/* + * Linux and friends + */ + +#if defined(PORTNAME_linux) || defined(PORTNAME_BSD44_derived) + +int +tas(slock_t *m) +{ + slock_t res; + __asm__("xchgb %0,%1":"=q" (res),"=m" (*m):"0" (0x1)); + return(res); +} + +void +S_LOCK(slock_t *lock) +{ + while (tas(lock)) + ; +} + +void +S_UNLOCK(slock_t *lock) +{ + *lock = 0; +} + +void +S_INIT_LOCK(slock_t *lock) +{ + S_UNLOCK(lock); +} + +#endif /* PORTNAME_linux || PORTNAME_BSD44_derived */ + + +#endif /* HAS_TEST_AND_SET */ + + +#ifdef WIN32 +void +S_LOCK(HANDLE *lock) +{ + int x = 0; + x = x / x; +} + +void +S_UNLOCK(HANDLE *lock) +{ + int x = 0; + x = x / x; +} + +void +S_INIT_LOCK(HANDLE *lock) +{ + int x = 0; + x = x / x; +} +#endif /*WIN32*/ diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c new file mode 100644 index 00000000000..4eba3729ac8 --- /dev/null +++ b/src/backend/storage/ipc/shmem.c @@ -0,0 +1,561 @@ +/*------------------------------------------------------------------------- + * + * shmem.c-- + * create shared memory and initialize shared memory data structures. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +/* + * POSTGRES processes share one or more regions of shared memory. + * The shared memory is created by a postmaster and is "attached to" + * by each of the backends. The routines in this file are used for + * allocating and binding to shared memory data structures. + * + * NOTES: + * (a) There are three kinds of shared memory data structures + * available to POSTGRES: fixed-size structures, queues and hash + * tables. Fixed-size structures contain things like global variables + * for a module and should never be allocated after the process + * initialization phase. Hash tables have a fixed maximum size, but + * their actual size can vary dynamically. When entries are added + * to the table, more space is allocated. Queues link data structures + * that have been allocated either as fixed size structures or as hash + * buckets. Each shared data structure has a string name to identify + * it (assigned in the module that declares it). + * + * (b) During initialization, each module looks for its + * shared data structures in a hash table called the "Binding Table". + * If the data structure is not present, the caller can allocate + * a new one and initialize it. If the data structure is present, + * the caller "attaches" to the structure by initializing a pointer + * in the local address space. + * The binding table has two purposes: first, it gives us + * a simple model of how the world looks when a backend process + * initializes. If something is present in the binding table, + * it is initialized. If it is not, it is uninitialized. Second, + * the binding table allows us to allocate shared memory on demand + * instead of trying to preallocate structures and hard-wire the + * sizes and locations in header files. If you are using a lot + * of shared memory in a lot of different places (and changing + * things during development), this is important. + * + * (c) memory allocation model: shared memory can never be + * freed, once allocated. Each hash table has its own free list, + * so hash buckets can be reused when an item is deleted. However, + * if one hash table grows very large and then shrinks, its space + * cannot be redistributed to other tables. We could build a simple + * hash bucket garbage collector if need be. Right now, it seems + * unnecessary. + * + * See InitSem() in sem.c for an example of how to use the + * binding table. + * + */ +#include <stdio.h> +#include <string.h> +#include "postgres.h" +#include "storage/ipc.h" +#include "storage/shmem.h" +#include "storage/spin.h" +#include "utils/hsearch.h" +#include "utils/elog.h" + +/* shared memory global variables */ + +unsigned long ShmemBase = 0; /* start and end address of + * shared memory + */ +static unsigned long ShmemEnd = 0; +static unsigned long ShmemSize = 0; /* current size (and default) */ + +SPINLOCK ShmemLock; /* lock for shared memory allocation */ + +SPINLOCK BindingLock; /* lock for binding table access */ + +static unsigned long *ShmemFreeStart = NULL; /* pointer to the OFFSET of + * first free shared memory + */ +static unsigned long *ShmemBindingTabOffset = NULL; /* start of the binding + * table (for bootstrap) + */ +static int ShmemBootstrap = FALSE; /* flag becomes true when shared mem + * is created by POSTMASTER + */ + +static HTAB *BindingTable = NULL; + +/* --------------------- + * ShmemBindingTabReset() - Resets the binding table to NULL.... + * useful when the postmaster destroys existing shared memory + * and creates all new segments after a backend crash. + * ---------------------- + */ +void +ShmemBindingTabReset() +{ + BindingTable = (HTAB *)NULL; +} + +/* + * CreateSharedRegion() -- + * + * This routine is called once by the postmaster to + * initialize the shared buffer pool. Assume there is + * only one postmaster so no synchronization is necessary + * until after this routine completes successfully. + * + * key is a unique identifier for the shmem region. + * size is the size of the region. + */ +static IpcMemoryId ShmemId; + +void +ShmemCreate(unsigned int key, unsigned int size) +{ + if (size) + ShmemSize = size; + /* create shared mem region */ + if ((ShmemId=IpcMemoryCreate(key,ShmemSize,IPCProtection)) + ==IpcMemCreationFailed) { + elog(FATAL,"ShmemCreate: cannot create region"); + exit(1); + } + + /* ShmemBootstrap is true if shared memory has been + * created, but not yet initialized. Only the + * postmaster/creator-of-all-things should have + * this flag set. + */ + ShmemBootstrap = TRUE; +} + +/* + * InitShmem() -- map region into process address space + * and initialize shared data structures. + * + */ +int +InitShmem(unsigned int key, unsigned int size) +{ + Pointer sharedRegion; + unsigned long currFreeSpace; + + HASHCTL info; + int hash_flags; + BindingEnt * result,item; + bool found; + IpcMemoryId shmid; + + /* if zero key, use default memory size */ + if (size) + ShmemSize = size; + + /* default key is 0 */ + + /* attach to shared memory region (SysV or BSD OS specific) */ + if (ShmemBootstrap && key == PrivateIPCKey) + /* if we are running backend alone */ + shmid = ShmemId; + else + shmid = IpcMemoryIdGet(IPCKeyGetBufferMemoryKey(key), ShmemSize); + sharedRegion = IpcMemoryAttach(shmid); + if (sharedRegion == NULL) { + elog(FATAL,"AttachSharedRegion: couldn't attach to shmem\n"); + return(FALSE); + } + + /* get pointers to the dimensions of shared memory */ + ShmemBase = (unsigned long) sharedRegion; + ShmemEnd = (unsigned long) sharedRegion + ShmemSize; + currFreeSpace = 0; + + /* First long in shared memory is the count of available space */ + ShmemFreeStart = (unsigned long *) ShmemBase; + /* next is a shmem pointer to the binding table */ + ShmemBindingTabOffset = ShmemFreeStart + 1; + + currFreeSpace += + sizeof(ShmemFreeStart) + sizeof(ShmemBindingTabOffset); + + /* bootstrap initialize spin locks so we can start to use the + * allocator and binding table. + */ + if (! InitSpinLocks(ShmemBootstrap, IPCKeyGetSpinLockSemaphoreKey(key))) { + return(FALSE); + } + + /* We have just allocated additional space for two spinlocks. + * Now setup the global free space count + */ + if (ShmemBootstrap) { + *ShmemFreeStart = currFreeSpace; + } + + /* if ShmemFreeStart is NULL, then the allocator won't work */ + Assert(*ShmemFreeStart); + + /* create OR attach to the shared memory binding table */ + info.keysize = BTABLE_KEYSIZE; + info.datasize = BTABLE_DATASIZE; + hash_flags = (HASH_ELEM); + + /* This will acquire the binding table lock, but not release it. */ + BindingTable = ShmemInitHash("BindingTable", + BTABLE_SIZE,BTABLE_SIZE, + &info,hash_flags); + + if (! BindingTable) { + elog(FATAL,"InitShmem: couldn't initialize Binding Table"); + return(FALSE); + } + + /* Now, check the binding table for an entry to the binding + * table. If there is an entry there, someone else created + * the table. Otherwise, we did and we have to initialize it. + */ + memset(item.key, 0, BTABLE_KEYSIZE); + strncpy(item.key,"BindingTable",BTABLE_KEYSIZE); + + result = (BindingEnt *) + hash_search(BindingTable,(char *) &item,HASH_ENTER, &found); + + + if (! result ) { + elog(FATAL,"InitShmem: corrupted binding table"); + return(FALSE); + } + + if (! found) { + /* bootstrapping shmem: we have to initialize the + * binding table now. + */ + + Assert(ShmemBootstrap); + result->location = MAKE_OFFSET(BindingTable->hctl); + *ShmemBindingTabOffset = result->location; + result->size = BTABLE_SIZE; + + ShmemBootstrap = FALSE; + + } else { + Assert(! ShmemBootstrap); + } + /* now release the lock acquired in ShmemHashInit */ + SpinRelease (BindingLock); + + Assert (result->location == MAKE_OFFSET(BindingTable->hctl)); + + return(TRUE); +} + +/* + * ShmemAlloc -- allocate word-aligned byte string from + * shared memory + * + * Assumes ShmemLock and ShmemFreeStart are initialized. + * Returns: real pointer to memory or NULL if we are out + * of space. Has to return a real pointer in order + * to be compatable with malloc(). + */ +long * +ShmemAlloc(unsigned long size) +{ + unsigned long tmpFree; + long *newSpace; + + /* + * ensure space is word aligned. + * + * Word-alignment is not good enough. We have to be more + * conservative: doubles need 8-byte alignment. (We probably only need + * this on RISC platforms but this is not a big waste of space.) + * - ay 12/94 + */ + if (size % sizeof(double)) + size += sizeof(double) - (size % sizeof(double)); + + Assert(*ShmemFreeStart); + + SpinAcquire(ShmemLock); + + tmpFree = *ShmemFreeStart + size; + if (tmpFree <= ShmemSize) { + newSpace = (long *)MAKE_PTR(*ShmemFreeStart); + *ShmemFreeStart += size; + } else { + newSpace = NULL; + } + + SpinRelease(ShmemLock); + + if (! newSpace) { + elog(NOTICE,"ShmemAlloc: out of memory "); + } + return(newSpace); +} + +/* + * ShmemIsValid -- test if an offset refers to valid shared memory + * + * Returns TRUE if the pointer is valid. + */ +int +ShmemIsValid(unsigned long addr) +{ + return ((addr<ShmemEnd) && (addr>=ShmemBase)); +} + +/* + * ShmemInitHash -- Create/Attach to and initialize + * shared memory hash table. + * + * Notes: + * + * assume caller is doing some kind of synchronization + * so that two people dont try to create/initialize the + * table at once. Use SpinAlloc() to create a spinlock + * for the structure before creating the structure itself. + */ +HTAB * +ShmemInitHash(char *name, /* table string name for binding */ + long init_size, /* initial size */ + long max_size, /* max size of the table */ + HASHCTL *infoP, /* info about key and bucket size */ + int hash_flags) /* info about infoP */ +{ + bool found; + long * location; + + /* shared memory hash tables have a fixed max size so that the + * control structures don't try to grow. The segbase is for + * calculating pointer values. The shared memory allocator + * must be specified. + */ + infoP->segbase = (long *) ShmemBase; + infoP->alloc = ShmemAlloc; + infoP->max_size = max_size; + hash_flags |= HASH_SHARED_MEM; + + /* look it up in the binding table */ + location = + ShmemInitStruct(name,my_log2(max_size) + sizeof(HHDR),&found); + + /* binding table is corrupted. Let someone else give the + * error message since they have more information + */ + if (location == NULL) { + return(0); + } + + /* it already exists, attach to it rather than allocate and + * initialize new space + */ + if (found) { + hash_flags |= HASH_ATTACH; + } + + /* these structures were allocated or bound in ShmemInitStruct */ + /* control information and parameters */ + infoP->hctl = (long *) location; + /* directory for hash lookup */ + infoP->dir = (long *) (location + sizeof(HHDR)); + + return(hash_create(init_size, infoP, hash_flags));; +} + +/* + * ShmemPIDLookup -- lookup process data structure using process id + * + * Returns: TRUE if no error. locationPtr is initialized if PID is + * found in the binding table. + * + * NOTES: + * only information about success or failure is the value of + * locationPtr. + */ +bool +ShmemPIDLookup(int pid, SHMEM_OFFSET* locationPtr) +{ + BindingEnt * result,item; + bool found; + + Assert (BindingTable); + memset(item.key, 0, BTABLE_KEYSIZE); + sprintf(item.key,"PID %d",pid); + + SpinAcquire(BindingLock); + result = (BindingEnt *) + hash_search(BindingTable,(char *) &item, HASH_ENTER, &found); + + if (! result) { + + SpinRelease(BindingLock); + elog(WARN,"ShmemInitPID: BindingTable corrupted"); + return(FALSE); + + } + + if (found) { + *locationPtr = result->location; + } else { + result->location = *locationPtr; + } + + SpinRelease(BindingLock); + return (TRUE); +} + +/* + * ShmemPIDDestroy -- destroy binding table entry for process + * using process id + * + * Returns: offset of the process struct in shared memory or + * INVALID_OFFSET if not found. + * + * Side Effect: removes the entry from the binding table + */ +SHMEM_OFFSET +ShmemPIDDestroy(int pid) +{ + BindingEnt * result,item; + bool found; + SHMEM_OFFSET location; + + Assert(BindingTable); + + memset(item.key, 0, BTABLE_KEYSIZE); + sprintf(item.key,"PID %d",pid); + + SpinAcquire(BindingLock); + result = (BindingEnt *) + hash_search(BindingTable,(char *) &item, HASH_REMOVE, &found); + + if (found) + location = result->location; + SpinRelease(BindingLock); + + if (! result) { + + elog(WARN,"ShmemPIDDestroy: PID table corrupted"); + return(INVALID_OFFSET); + + } + + if (found) + return (location); + else { + return(INVALID_OFFSET); + } +} + +/* + * ShmemInitStruct -- Create/attach to a structure in shared + * memory. + * + * This is called during initialization to find or allocate + * a data structure in shared memory. If no other processes + * have created the structure, this routine allocates space + * for it. If it exists already, a pointer to the existing + * table is returned. + * + * Returns: real pointer to the object. FoundPtr is TRUE if + * the object is already in the binding table (hence, already + * initialized). + */ +long * +ShmemInitStruct(char *name, unsigned long size, bool *foundPtr) +{ + BindingEnt * result,item; + long * structPtr; + + strncpy(item.key,name,BTABLE_KEYSIZE); + item.location = BAD_LOCATION; + + SpinAcquire(BindingLock); + + if (! BindingTable) { + /* Assert() is a macro now. substitutes inside quotes. */ + char *strname = "BindingTable"; + + /* If the binding table doesnt exist, we fake it. + * + * If we are creating the first binding table, then let + * shmemalloc() allocate the space for a new HTAB. Otherwise, + * find the old one and return that. Notice that the + * BindingLock is held until the binding table has been completely + * initialized. + */ + Assert (! strcmp(name,strname)) ; + if (ShmemBootstrap) { + /* in POSTMASTER/Single process */ + + *foundPtr = FALSE; + return((long *)ShmemAlloc(size)); + + } else { + Assert (ShmemBindingTabOffset); + + *foundPtr = TRUE; + return((long *)MAKE_PTR(*ShmemBindingTabOffset)); + } + + + } else { + /* look it up in the bindint table */ + result = (BindingEnt *) + hash_search(BindingTable,(char *) &item,HASH_ENTER, foundPtr); + } + + if (! result) { + + SpinRelease(BindingLock); + + elog(WARN,"ShmemInitStruct: Binding Table corrupted"); + return(NULL); + + } else if (*foundPtr) { + /* + * Structure is in the binding table so someone else has allocated + * it already. The size better be the same as the size we are + * trying to initialize to or there is a name conflict (or worse). + */ + if (result->size != size) { + SpinRelease(BindingLock); + + elog(NOTICE,"ShmemInitStruct: BindingTable entry size is wrong"); + /* let caller print its message too */ + return(NULL); + } + structPtr = (long *)MAKE_PTR(result->location); + } else { + + /* It isn't in the table yet. allocate and initialize it */ + structPtr = ShmemAlloc((long)size); + if (! structPtr) { + /* out of memory */ + Assert (BindingTable); + (void) hash_search(BindingTable,(char *) &item,HASH_REMOVE, foundPtr); + SpinRelease(BindingLock); + *foundPtr = FALSE; + + elog(NOTICE,"ShmemInitStruct: cannot allocate '%s'", + name); + return(NULL); + } + result->size = size; + result->location = MAKE_OFFSET(structPtr); + } + Assert (ShmemIsValid((unsigned long)structPtr)); + + SpinRelease(BindingLock); + return(structPtr); +} + + + diff --git a/src/backend/storage/ipc/shmqueue.c b/src/backend/storage/ipc/shmqueue.c new file mode 100644 index 00000000000..f08546742b5 --- /dev/null +++ b/src/backend/storage/ipc/shmqueue.c @@ -0,0 +1,251 @@ +/*------------------------------------------------------------------------- + * + * shmqueue.c-- + * shared memory linked lists + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmqueue.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $ + * + * NOTES + * + * Package for managing doubly-linked lists in shared memory. + * The only tricky thing is that SHM_QUEUE will usually be a field + * in a larger record. SHMQueueGetFirst has to return a pointer + * to the record itself instead of a pointer to the SHMQueue field + * of the record. It takes an extra pointer and does some extra + * pointer arithmetic to do this correctly. + * + * NOTE: These are set up so they can be turned into macros some day. + * + *------------------------------------------------------------------------- + */ +#include <stdio.h> /* for sprintf() */ +#include "postgres.h" +#include "storage/shmem.h" /* where the declarations go */ +#include "utils/elog.h" + +/*#define SHMQUEUE_DEBUG*/ +#ifdef SHMQUEUE_DEBUG +#define SHMQUEUE_DEBUG_DEL /* deletions */ +#define SHMQUEUE_DEBUG_HD /* head inserts */ +#define SHMQUEUE_DEBUG_TL /* tail inserts */ +#define SHMQUEUE_DEBUG_ELOG NOTICE +#endif /* SHMQUEUE_DEBUG */ + +/* + * ShmemQueueInit -- make the head of a new queue point + * to itself + */ +void +SHMQueueInit(SHM_QUEUE *queue) +{ + Assert(SHM_PTR_VALID(queue)); + (queue)->prev = (queue)->next = MAKE_OFFSET(queue); +} + +/* + * SHMQueueIsDetached -- TRUE if element is not currently + * in a queue. + */ +bool +SHMQueueIsDetached(SHM_QUEUE *queue) +{ + Assert(SHM_PTR_VALID(queue)); + return ((queue)->prev == INVALID_OFFSET); +} + +/* + * SHMQueueElemInit -- clear an element's links + */ +void +SHMQueueElemInit(SHM_QUEUE *queue) +{ + Assert(SHM_PTR_VALID(queue)); + (queue)->prev = (queue)->next = INVALID_OFFSET; +} + +/* + * SHMQueueDelete -- remove an element from the queue and + * close the links + */ +void +SHMQueueDelete(SHM_QUEUE *queue) +{ + SHM_QUEUE *nextElem = (SHM_QUEUE *) MAKE_PTR((queue)->next); + SHM_QUEUE *prevElem = (SHM_QUEUE *) MAKE_PTR((queue)->prev); + + Assert(SHM_PTR_VALID(queue)); + Assert(SHM_PTR_VALID(nextElem)); + Assert(SHM_PTR_VALID(prevElem)); + +#ifdef SHMQUEUE_DEBUG_DEL + dumpQ(queue, "in SHMQueueDelete: begin"); +#endif /* SHMQUEUE_DEBUG_DEL */ + + prevElem->next = (queue)->next; + nextElem->prev = (queue)->prev; + +#ifdef SHMQUEUE_DEBUG_DEL + dumpQ((SHM_QUEUE *)MAKE_PTR(queue->prev), "in SHMQueueDelete: end"); +#endif /* SHMQUEUE_DEBUG_DEL */ +} + +#ifdef SHMQUEUE_DEBUG +void +dumpQ(SHM_QUEUE *q, char *s) +{ + char elem[16]; + char buf[1024]; + SHM_QUEUE *start = q; + int count = 0; + + sprintf(buf, "q prevs: %x", MAKE_OFFSET(q)); + q = (SHM_QUEUE *)MAKE_PTR(q->prev); + while (q != start) + { + sprintf(elem, "--->%x", MAKE_OFFSET(q)); + strcat(buf, elem); + q = (SHM_QUEUE *)MAKE_PTR(q->prev); + if (q->prev == MAKE_OFFSET(q)) + break; + if (count++ > 40) + { + strcat(buf, "BAD PREV QUEUE!!"); + break; + } + } + sprintf(elem, "--->%x", MAKE_OFFSET(q)); + strcat(buf, elem); + elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf); + + sprintf(buf, "q nexts: %x", MAKE_OFFSET(q)); + count = 0; + q = (SHM_QUEUE *)MAKE_PTR(q->next); + while (q != start) + { + sprintf(elem, "--->%x", MAKE_OFFSET(q)); + strcat(buf, elem); + q = (SHM_QUEUE *)MAKE_PTR(q->next); + if (q->next == MAKE_OFFSET(q)) + break; + if (count++ > 10) + { + strcat(buf, "BAD NEXT QUEUE!!"); + break; + } + } + sprintf(elem, "--->%x", MAKE_OFFSET(q)); + strcat(buf, elem); + elog(SHMQUEUE_DEBUG_ELOG, "%s: %s", s, buf); +} +#endif /* SHMQUEUE_DEBUG */ + +/* + * SHMQueueInsertHD -- put elem in queue between the queue head + * and its "prev" element. + */ +void +SHMQueueInsertHD(SHM_QUEUE *queue, SHM_QUEUE *elem) +{ + SHM_QUEUE *prevPtr = (SHM_QUEUE *) MAKE_PTR((queue)->prev); + SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem); + + Assert(SHM_PTR_VALID(queue)); + Assert(SHM_PTR_VALID(elem)); + +#ifdef SHMQUEUE_DEBUG_HD + dumpQ(queue, "in SHMQueueInsertHD: begin"); +#endif /* SHMQUEUE_DEBUG_HD */ + + (elem)->next = prevPtr->next; + (elem)->prev = queue->prev; + (queue)->prev = elemOffset; + prevPtr->next = elemOffset; + +#ifdef SHMQUEUE_DEBUG_HD + dumpQ(queue, "in SHMQueueInsertHD: end"); +#endif /* SHMQUEUE_DEBUG_HD */ +} + +void +SHMQueueInsertTL(SHM_QUEUE *queue, SHM_QUEUE *elem) +{ + SHM_QUEUE *nextPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next); + SHMEM_OFFSET elemOffset = MAKE_OFFSET(elem); + + Assert(SHM_PTR_VALID(queue)); + Assert(SHM_PTR_VALID(elem)); + +#ifdef SHMQUEUE_DEBUG_TL + dumpQ(queue, "in SHMQueueInsertTL: begin"); +#endif /* SHMQUEUE_DEBUG_TL */ + + (elem)->prev = nextPtr->prev; + (elem)->next = queue->next; + (queue)->next = elemOffset; + nextPtr->prev = elemOffset; + +#ifdef SHMQUEUE_DEBUG_TL + dumpQ(queue, "in SHMQueueInsertTL: end"); +#endif /* SHMQUEUE_DEBUG_TL */ +} + +/* + * SHMQueueFirst -- Get the first element from a queue + * + * First element is queue->next. If SHMQueue is part of + * a larger structure, we want to return a pointer to the + * whole structure rather than a pointer to its SHMQueue field. + * I.E. struct { + * int stuff; + * SHMQueue elem; + * } ELEMType; + * when this element is in a queue (queue->next) is struct.elem. + * nextQueue allows us to calculate the offset of the SHMQueue + * field in the structure. + * + * call to SHMQueueFirst should take these parameters: + * + * &(queueHead),&firstElem,&(firstElem->next) + * + * Note that firstElem may well be uninitialized. if firstElem + * is initially K, &(firstElem->next) will be K+ the offset to + * next. + */ +void +SHMQueueFirst(SHM_QUEUE *queue, Pointer *nextPtrPtr, SHM_QUEUE *nextQueue) +{ + SHM_QUEUE *elemPtr = (SHM_QUEUE *) MAKE_PTR((queue)->next); + + Assert(SHM_PTR_VALID(queue)); + *nextPtrPtr = (Pointer) (((unsigned long) *nextPtrPtr) + + ((unsigned long) elemPtr) - ((unsigned long) nextQueue)); + + /* + nextPtrPtr a ptr to a structure linked in the queue + nextQueue is the SHMQueue field of the structure + *nextPtrPtr - nextQueue is 0 minus the offset of the queue + field n the record + elemPtr + (*nextPtrPtr - nexQueue) is the start of the + structure containing elemPtr. + */ +} + +/* + * SHMQueueEmpty -- TRUE if queue head is only element, FALSE otherwise + */ +bool +SHMQueueEmpty(SHM_QUEUE *queue) +{ + Assert(SHM_PTR_VALID(queue)); + + if (queue->prev == MAKE_OFFSET(queue)) + { + Assert(queue->next = MAKE_OFFSET(queue)); + return(TRUE); + } + return(FALSE); +} diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c new file mode 100644 index 00000000000..9151ee77686 --- /dev/null +++ b/src/backend/storage/ipc/sinval.c @@ -0,0 +1,169 @@ +/*------------------------------------------------------------------------- + * + * sinval.c-- + * POSTGRES shared cache invalidation communication code. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +/* #define INVALIDDEBUG 1 */ + +#include "postgres.h" + +#include "storage/sinval.h" +#include "storage/sinvaladt.h" +#include "storage/spin.h" +#include "utils/elog.h" + +extern SISeg *shmInvalBuffer;/* the shared buffer segment, set by*/ + /* SISegmentAttach() */ +extern BackendId MyBackendId; +extern BackendTag MyBackendTag; + +SPINLOCK SInvalLock = (SPINLOCK) NULL; + +/****************************************************************************/ +/* CreateSharedInvalidationState(key) Create a buffer segment */ +/* */ +/* should be called only by the POSTMASTER */ +/****************************************************************************/ +void +CreateSharedInvalidationState(IPCKey key) +{ + int status; + + /* REMOVED + SISyncKill(IPCKeyGetSIBufferMemorySemaphoreKey(key)); + SISyncInit(IPCKeyGetSIBufferMemorySemaphoreKey(key)); + */ + + /* SInvalLock gets set in spin.c, during spinlock init */ + status = SISegmentInit(true, IPCKeyGetSIBufferMemoryBlock(key)); + + if (status == -1) { + elog(FATAL, "CreateSharedInvalidationState: failed segment init"); + } +} +/****************************************************************************/ +/* AttachSharedInvalidationState(key) Attach a buffer segment */ +/* */ +/* should be called only by the POSTMASTER */ +/****************************************************************************/ +void +AttachSharedInvalidationState(IPCKey key) +{ + int status; + + if (key == PrivateIPCKey) { + CreateSharedInvalidationState(key); + return; + } + /* SInvalLock gets set in spin.c, during spinlock init */ + status = SISegmentInit(false, IPCKeyGetSIBufferMemoryBlock(key)); + + if (status == -1) { + elog(FATAL, "AttachSharedInvalidationState: failed segment init"); + } +} + +void +InitSharedInvalidationState() +{ + SpinAcquire(SInvalLock); + if (!SIBackendInit(shmInvalBuffer)) + { + SpinRelease(SInvalLock); + elog(FATAL, "Backend cache invalidation initialization failed"); + } + SpinRelease(SInvalLock); +} + +/* + * RegisterSharedInvalid -- + * Returns a new local cache invalidation state containing a new entry. + * + * Note: + * Assumes hash index is valid. + * Assumes item pointer is valid. + */ +/****************************************************************************/ +/* RegisterSharedInvalid(cacheId, hashIndex, pointer) */ +/* */ +/* register a message in the buffer */ +/* should be called by a backend */ +/****************************************************************************/ +void +RegisterSharedInvalid(int cacheId, /* XXX */ + Index hashIndex, + ItemPointer pointer) +{ + SharedInvalidData newInvalid; + + /* + * This code has been hacked to accept two types of messages. This might + * be treated more generally in the future. + * + * (1) + * cacheId= system cache id + * hashIndex= system cache hash index for a (possibly) cached tuple + * pointer= pointer of (possibly) cached tuple + * + * (2) + * cacheId= special non-syscache id + * hashIndex= object id contained in (possibly) cached relation descriptor + * pointer= null + */ + + newInvalid.cacheId = cacheId; + newInvalid.hashIndex = hashIndex; + + if (ItemPointerIsValid(pointer)) { + ItemPointerCopy(pointer, &newInvalid.pointerData); + } else { + ItemPointerSetInvalid(&newInvalid.pointerData); + } + + SpinAcquire(SInvalLock); + if (!SISetDataEntry(shmInvalBuffer, &newInvalid)) { + /* buffer full */ + /* release a message, mark process cache states to be invalid */ + SISetProcStateInvalid(shmInvalBuffer); + + if (!SIDelDataEntry(shmInvalBuffer)) { + /* inconsistent buffer state -- shd never happen */ + SpinRelease(SInvalLock); + elog(FATAL, "RegisterSharedInvalid: inconsistent buffer state"); + } + + /* write again */ + (void) SISetDataEntry(shmInvalBuffer, &newInvalid); + } + SpinRelease(SInvalLock); +} + +/* + * InvalidateSharedInvalid -- + * Processes all entries in a shared cache invalidation state. + */ +/****************************************************************************/ +/* InvalidateSharedInvalid(invalFunction, resetFunction) */ +/* */ +/* invalidate a message in the buffer (read and clean up) */ +/* should be called by a backend */ +/****************************************************************************/ +void +InvalidateSharedInvalid(void (*invalFunction)(), + void (*resetFunction)()) +{ + SpinAcquire(SInvalLock); + SIReadEntryData(shmInvalBuffer, MyBackendId, + invalFunction, resetFunction); + + SIDelExpiredDataEntries(shmInvalBuffer); + SpinRelease(SInvalLock); +} diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c new file mode 100644 index 00000000000..a30afdb6fed --- /dev/null +++ b/src/backend/storage/ipc/sinvaladt.c @@ -0,0 +1,797 @@ +/*------------------------------------------------------------------------- + * + * sinvaladt.c-- + * POSTGRES shared cache invalidation segment definitions. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.1.1.1 1996/07/09 06:21:54 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "storage/ipc.h" +#include "storage/sinvaladt.h" +#include "storage/lmgr.h" +#include "utils/elog.h" +#include "utils/palloc.h" + +/* ---------------- + * global variable notes + * + * SharedInvalidationSemaphore + * + * shmInvalBuffer + * the shared buffer segment, set by SISegmentAttach() + * + * MyBackendId + * might be removed later, used only for + * debugging in debug routines (end of file) + * + * SIDbId + * identification of buffer (disappears) + * + * SIRelId \ + * SIDummyOid \ identification of buffer + * SIXidData / + * SIXid / + * + * XXX This file really needs to be cleaned up. We switched to using + * spinlocks to protect critical sections (as opposed to using fake + * relations and going through the lock manager) and some of the old + * cruft was 'ifdef'ed out, while other parts (now unused) are still + * compiled into the system. -mer 5/24/92 + * ---------------- + */ +#ifdef HAS_TEST_AND_SET +int SharedInvalidationLockId; +#else +IpcSemaphoreId SharedInvalidationSemaphore; +#endif + +SISeg *shmInvalBuffer; +extern BackendId MyBackendId; + +static void CleanupInvalidationState(int status, SISeg *segInOutP); +static BackendId SIAssignBackendId(SISeg *segInOutP, BackendTag backendTag); +static int SIGetNumEntries(SISeg *segP); + +/************************************************************************/ +/* SISetActiveProcess(segP, backendId) set the backend status active */ +/* should be called only by the postmaster when creating a backend */ +/************************************************************************/ +/* XXX I suspect that the segP parameter is extraneous. -hirohama */ +static void +SISetActiveProcess(SISeg *segInOutP, BackendId backendId) +{ + /* mark all messages as read */ + + /* Assert(segP->procState[backendId - 1].tag == MyBackendTag); */ + + segInOutP->procState[backendId - 1].resetState = false; + segInOutP->procState[backendId - 1].limit = SIGetNumEntries(segInOutP); +} + +/****************************************************************************/ +/* SIBackendInit() initializes a backend to operate on the buffer */ +/****************************************************************************/ +int +SIBackendInit(SISeg *segInOutP) +{ + LRelId LtCreateRelId(); + TransactionId LMITransactionIdCopy(); + + Assert(MyBackendTag > 0); + + MyBackendId = SIAssignBackendId(segInOutP, MyBackendTag); + if (MyBackendId == InvalidBackendTag) + return 0; + +#ifdef INVALIDDEBUG + elog(DEBUG, "SIBackendInit: backend tag %d; backend id %d.", + MyBackendTag, MyBackendId); +#endif /* INVALIDDEBUG */ + + SISetActiveProcess(segInOutP, MyBackendId); + on_exitpg(CleanupInvalidationState, (caddr_t)segInOutP); + return 1; +} + +/* ---------------- + * SIAssignBackendId + * ---------------- + */ +static BackendId +SIAssignBackendId(SISeg *segInOutP, BackendTag backendTag) +{ + Index index; + ProcState *stateP; + + stateP = NULL; + + for (index = 0; index < MaxBackendId; index += 1) { + if (segInOutP->procState[index].tag == InvalidBackendTag || + segInOutP->procState[index].tag == backendTag) + { + stateP = &segInOutP->procState[index]; + break; + } + + if (!PointerIsValid(stateP) || + (segInOutP->procState[index].resetState && + (!stateP->resetState || + stateP->tag < backendTag)) || + (!stateP->resetState && + (segInOutP->procState[index].limit < + stateP->limit || + stateP->tag < backendTag))) + { + stateP = &segInOutP->procState[index]; + } + } + + /* verify that all "procState" entries checked for matching tags */ + + for (index += 1; index < MaxBackendId; index += 1) { + if (segInOutP->procState[index].tag == backendTag) { + elog (FATAL, "SIAssignBackendId: tag %d found twice", + backendTag); + } + } + + if (stateP->tag != InvalidBackendTag) { + if (stateP->tag == backendTag) { + elog(NOTICE, "SIAssignBackendId: reusing tag %d", + backendTag); + } else { + elog(NOTICE, + "SIAssignBackendId: discarding tag %d", + stateP->tag); + return InvalidBackendTag; + } + } + + stateP->tag = backendTag; + + return (1 + stateP - &segInOutP->procState[0]); +} + + +/************************************************************************/ +/* The following function should be called only by the postmaster !! */ +/************************************************************************/ + +/************************************************************************/ +/* SISetDeadProcess(segP, backendId) set the backend status DEAD */ +/* should be called only by the postmaster when a backend died */ +/************************************************************************/ +static void +SISetDeadProcess(SISeg *segP, int backendId) +{ + /* XXX call me.... */ + + segP->procState[backendId - 1].resetState = false; + segP->procState[backendId - 1].limit = -1; + segP->procState[backendId - 1].tag = InvalidBackendTag; +} + +/* + * CleanupInvalidationState -- + * Note: + * This is a temporary hack. ExitBackend should call this instead + * of exit (via on_exitpg). + */ +static void +CleanupInvalidationState(int status, /* XXX */ + SISeg *segInOutP) /* XXX style */ +{ + Assert(PointerIsValid(segInOutP)); + + SISetDeadProcess(segInOutP, MyBackendId); +} + + +/************************************************************************/ +/* SIComputeSize() - retuns the size of a buffer segment */ +/************************************************************************/ +static SISegOffsets * +SIComputeSize(int *segSize) +{ + int A, B, a, b, totalSize; + SISegOffsets *oP; + + A = 0; + a = SizeSISeg; /* offset to first data entry */ + b = SizeOfOneSISegEntry * MAXNUMMESSAGES; + B = A + a + b; + totalSize = B - A; + *segSize = totalSize; + + oP = (SISegOffsets *) palloc(sizeof(SISegOffsets)); + oP->startSegment = A; + oP->offsetToFirstEntry = a; /* relatiove to A */ + oP->offsetToEndOfSegemnt = totalSize; /* relative to A */ + return(oP); +} + + +/************************************************************************/ +/* SISetStartEntrySection(segP, offset) - sets the offset */ +/************************************************************************/ +static void +SISetStartEntrySection(SISeg *segP, Offset offset) +{ + segP->startEntrySection = offset; +} + +/************************************************************************/ +/* SIGetStartEntrySection(segP) - returnss the offset */ +/************************************************************************/ +static Offset +SIGetStartEntrySection(SISeg *segP) +{ + return(segP->startEntrySection); +} + + +/************************************************************************/ +/* SISetEndEntrySection(segP, offset) - sets the offset */ +/************************************************************************/ +static void +SISetEndEntrySection(SISeg *segP, Offset offset) +{ + segP->endEntrySection = offset; +} + +/************************************************************************/ +/* SISetEndEntryChain(segP, offset) - sets the offset */ +/************************************************************************/ +static void +SISetEndEntryChain(SISeg *segP, Offset offset) +{ + segP->endEntryChain = offset; +} + +/************************************************************************/ +/* SIGetEndEntryChain(segP) - returnss the offset */ +/************************************************************************/ +static Offset +SIGetEndEntryChain(SISeg *segP) +{ + return(segP->endEntryChain); +} + +/************************************************************************/ +/* SISetStartEntryChain(segP, offset) - sets the offset */ +/************************************************************************/ +static void +SISetStartEntryChain(SISeg *segP, Offset offset) +{ + segP->startEntryChain = offset; +} + +/************************************************************************/ +/* SIGetStartEntryChain(segP) - returns the offset */ +/************************************************************************/ +static Offset +SIGetStartEntryChain(SISeg *segP) +{ + return(segP->startEntryChain); +} + +/************************************************************************/ +/* SISetNumEntries(segP, num) sets the current nuber of entries */ +/************************************************************************/ +static bool +SISetNumEntries(SISeg *segP, int num) +{ + if ( num <= MAXNUMMESSAGES) { + segP->numEntries = num; + return(true); + } else { + return(false); /* table full */ + } +} + +/************************************************************************/ +/* SIGetNumEntries(segP) - returns the current nuber of entries */ +/************************************************************************/ +static int +SIGetNumEntries(SISeg *segP) +{ + return(segP->numEntries); +} + + +/************************************************************************/ +/* SISetMaxNumEntries(segP, num) sets the maximal number of entries */ +/************************************************************************/ +static bool +SISetMaxNumEntries(SISeg *segP, int num) +{ + if ( num <= MAXNUMMESSAGES) { + segP->maxNumEntries = num; + return(true); + } else { + return(false); /* wrong number */ + } +} + + +/************************************************************************/ +/* SIGetProcStateLimit(segP, i) returns the limit of read messages */ +/************************************************************************/ +static int +SIGetProcStateLimit(SISeg *segP, int i) +{ + return(segP->procState[i].limit); +} + +/************************************************************************/ +/* SIIncNumEntries(segP, num) increments the current nuber of entries */ +/************************************************************************/ +static bool +SIIncNumEntries(SISeg *segP, int num) +{ + if ((segP->numEntries + num) <= MAXNUMMESSAGES) { + segP->numEntries = segP->numEntries + num; + return(true); + } else { + return(false); /* table full */ + } +} + +/************************************************************************/ +/* SIDecNumEntries(segP, num) decrements the current nuber of entries */ +/************************************************************************/ +static bool +SIDecNumEntries(SISeg *segP, int num) +{ + if ((segP->numEntries - num) >= 0) { + segP->numEntries = segP->numEntries - num; + return(true); + } else { + return(false); /* not enough entries in table */ + } +} + +/************************************************************************/ +/* SISetStartFreeSpace(segP, offset) - sets the offset */ +/************************************************************************/ +static void +SISetStartFreeSpace(SISeg *segP, Offset offset) +{ + segP->startFreeSpace = offset; +} + +/************************************************************************/ +/* SIGetStartFreeSpace(segP) - returns the offset */ +/************************************************************************/ +static Offset +SIGetStartFreeSpace(SISeg *segP) +{ + return(segP->startFreeSpace); +} + + + +/************************************************************************/ +/* SIGetFirstDataEntry(segP) returns first data entry */ +/************************************************************************/ +static SISegEntry * +SIGetFirstDataEntry(SISeg *segP) +{ + SISegEntry *eP; + Offset startChain; + + startChain = SIGetStartEntryChain(segP); + + if (startChain == InvalidOffset) + return(NULL); + + eP = (SISegEntry *) ((Pointer) segP + + SIGetStartEntrySection(segP) + + startChain ); + return(eP); +} + + +/************************************************************************/ +/* SIGetLastDataEntry(segP) returns last data entry in the chain */ +/************************************************************************/ +static SISegEntry * +SIGetLastDataEntry(SISeg *segP) +{ + SISegEntry *eP; + Offset endChain; + + endChain = SIGetEndEntryChain(segP); + + if (endChain == InvalidOffset) + return(NULL); + + eP = (SISegEntry *) ((Pointer) segP + + SIGetStartEntrySection(segP) + + endChain ); + return(eP); +} + +/************************************************************************/ +/* SIGetNextDataEntry(segP, offset) returns next data entry */ +/************************************************************************/ +static SISegEntry * +SIGetNextDataEntry(SISeg *segP, Offset offset) +{ + SISegEntry *eP; + + if (offset == InvalidOffset) + return(NULL); + + eP = (SISegEntry *) ((Pointer) segP + + SIGetStartEntrySection(segP) + + offset); + return(eP); +} + + +/************************************************************************/ +/* SIGetNthDataEntry(segP, n) returns the n-th data entry in chain */ +/************************************************************************/ +static SISegEntry * +SIGetNthDataEntry(SISeg *segP, + int n) /* must range from 1 to MaxMessages */ +{ + SISegEntry *eP; + int i; + + if (n <= 0) return(NULL); + + eP = SIGetFirstDataEntry(segP); + for (i = 1; i < n; i++) { + /* skip one and get the next */ + eP = SIGetNextDataEntry(segP, eP->next); + } + + return(eP); +} + +/************************************************************************/ +/* SIEntryOffset(segP, entryP) returns the offset for an pointer */ +/************************************************************************/ +static Offset +SIEntryOffset(SISeg *segP, SISegEntry *entryP) +{ + /* relative to B !! */ + return ((Offset) ((Pointer) entryP - + (Pointer) segP - + SIGetStartEntrySection(segP) )); +} + + +/************************************************************************/ +/* SISetDataEntry(segP, data) - sets a message in the segemnt */ +/************************************************************************/ +bool +SISetDataEntry(SISeg *segP, SharedInvalidData *data) +{ + Offset offsetToNewData; + SISegEntry *eP, *lastP; + bool SISegFull(); + Offset SIEntryOffset(); + Offset SIGetStartFreeSpace(); + SISegEntry *SIGetFirstDataEntry(); + SISegEntry *SIGetNextDataEntry(); + SISegEntry *SIGetLastDataEntry(); + + if (!SIIncNumEntries(segP, 1)) + return(false); /* no space */ + + /* get a free entry */ + offsetToNewData = SIGetStartFreeSpace(segP); + eP = SIGetNextDataEntry(segP, offsetToNewData); /* it's a free one */ + SISetStartFreeSpace(segP, eP->next); + /* fill it up */ + eP->entryData = *data; + eP->isfree = false; + eP->next = InvalidOffset; + + /* handle insertion point at the end of the chain !!*/ + lastP = SIGetLastDataEntry(segP); + if (lastP == NULL) { + /* there is no chain, insert the first entry */ + SISetStartEntryChain(segP, SIEntryOffset(segP, eP)); + } else { + /* there is a last entry in the chain */ + lastP->next = SIEntryOffset(segP, eP); + } + SISetEndEntryChain(segP, SIEntryOffset(segP, eP)); + return(true); +} + + +/************************************************************************/ +/* SIDecProcLimit(segP, num) decrements all process limits */ +/************************************************************************/ +static void +SIDecProcLimit(SISeg *segP, int num) +{ + int i; + for (i=0; i < MaxBackendId; i++) { + /* decrement only, if there is a limit > 0 */ + if (segP->procState[i].limit > 0) { + segP->procState[i].limit = segP->procState[i].limit - num; + if (segP->procState[i].limit < 0) { + /* limit was not high enough, reset to zero */ + /* negative means it's a dead backend */ + segP->procState[i].limit = 0; + } + } + } +} + + +/************************************************************************/ +/* SIDelDataEntry(segP) - free the FIRST entry */ +/************************************************************************/ +bool +SIDelDataEntry(SISeg *segP) +{ + SISegEntry *e1P; + SISegEntry *SIGetFirstDataEntry(); + + if (!SIDecNumEntries(segP, 1)) { + /* no entries in buffer */ + return(false); + } + + e1P = SIGetFirstDataEntry(segP); + SISetStartEntryChain(segP, e1P->next); + if (SIGetStartEntryChain(segP) == InvalidOffset) { + /* it was the last entry */ + SISetEndEntryChain(segP, InvalidOffset); + } + /* free the entry */ + e1P->isfree = true; + e1P->next = SIGetStartFreeSpace(segP); + SISetStartFreeSpace(segP, SIEntryOffset(segP, e1P)); + SIDecProcLimit(segP, 1); + return(true); +} + + + +/************************************************************************/ +/* SISetProcStateInvalid(segP) checks and marks a backends state as */ +/* invalid */ +/************************************************************************/ +void +SISetProcStateInvalid(SISeg *segP) +{ + int i; + + for (i=0; i < MaxBackendId; i++) { + if (segP->procState[i].limit == 0) { + /* backend i didn't read any message */ + segP->procState[i].resetState = true; + /*XXX signal backend that it has to reset its internal cache ? */ + } + } +} + +/************************************************************************/ +/* SIReadEntryData(segP, backendId, function) */ +/* - marks messages to be read by id */ +/* and executes function */ +/************************************************************************/ +void +SIReadEntryData(SISeg *segP, + int backendId, + void (*invalFunction)(), + void (*resetFunction)()) +{ + int i = 0; + SISegEntry *data; + + Assert(segP->procState[backendId - 1].tag == MyBackendTag); + + if (!segP->procState[backendId - 1].resetState) { + /* invalidate data, but only those, you have not seen yet !!*/ + /* therefore skip read messages */ + data = SIGetNthDataEntry(segP, + SIGetProcStateLimit(segP, backendId - 1) + 1); + while (data != NULL) { + i++; + segP->procState[backendId - 1].limit++; /* one more message read */ + invalFunction(data->entryData.cacheId, + data->entryData.hashIndex, + &data->entryData.pointerData); + data = SIGetNextDataEntry(segP, data->next); + } + /* SIDelExpiredDataEntries(segP); */ + } else { + /*backend must not read messages, its own state has to be reset */ + elog(NOTICE, "SIMarkEntryData: cache state reset"); + resetFunction(); /* XXXX call it here, parameters? */ + + /* new valid state--mark all messages "read" */ + segP->procState[backendId - 1].resetState = false; + segP->procState[backendId - 1].limit = SIGetNumEntries(segP); + } + /* check whether we can remove dead messages */ + if (i > MAXNUMMESSAGES) { + elog(FATAL, "SIReadEntryData: Invalid segment state"); + } +} + +/************************************************************************/ +/* SIDelExpiredDataEntries (segP) - removes irrelevant messages */ +/************************************************************************/ +void +SIDelExpiredDataEntries(SISeg *segP) +{ + int min, i, h; + + min = 9999999; + for (i = 0; i < MaxBackendId; i++) { + h = SIGetProcStateLimit(segP, i); + if (h >= 0) { /* backend active */ + if (h < min ) min = h; + } + } + if (min != 9999999) { + /* we can remove min messages */ + for (i = 1; i <= min; i++) { + /* this adjusts also the state limits!*/ + if (!SIDelDataEntry(segP)) { + elog(FATAL, "SIDelExpiredDataEntries: Invalid segment state"); + } + } + } +} + + + +/************************************************************************/ +/* SISegInit(segP) - initializes the segment */ +/************************************************************************/ +static void +SISegInit(SISeg *segP) +{ + SISegOffsets *oP; + int segSize, i; + SISegEntry *eP; + + oP = SIComputeSize(&segSize); + /* set sempahore ids in the segment */ + /* XXX */ + SISetStartEntrySection(segP, oP->offsetToFirstEntry); + SISetEndEntrySection(segP, oP->offsetToEndOfSegemnt); + SISetStartFreeSpace(segP, 0); + SISetStartEntryChain(segP, InvalidOffset); + SISetEndEntryChain(segP, InvalidOffset); + (void) SISetNumEntries(segP, 0); + (void) SISetMaxNumEntries(segP, MAXNUMMESSAGES); + for (i = 0; i < MaxBackendId; i++) { + segP->procState[i].limit = -1; /* no backend active !!*/ + segP->procState[i].resetState = false; + segP->procState[i].tag = InvalidBackendTag; + } + /* construct a chain of free entries */ + for (i = 1; i < MAXNUMMESSAGES; i++) { + eP = (SISegEntry *) ((Pointer) segP + + SIGetStartEntrySection(segP) + + (i - 1) * sizeof(SISegEntry)); + eP->isfree = true; + eP->next = i * sizeof(SISegEntry); /* relative to B */ + } + /* handle the last free entry separate */ + eP = (SISegEntry *) ((Pointer) segP + + SIGetStartEntrySection(segP) + + (MAXNUMMESSAGES - 1) * sizeof(SISegEntry)); + eP->isfree = true; + eP->next = InvalidOffset; /* it's the end of the chain !! */ + /* + * Be tidy + */ + pfree(oP); + +} + + + +/************************************************************************/ +/* SISegmentKill(key) - kill any segment */ +/************************************************************************/ +static void +SISegmentKill(int key) /* the corresponding key for the segment */ +{ + IpcMemoryKill(key); +} + + +/************************************************************************/ +/* SISegmentGet(key, size) - get a shared segment of size <size> */ +/* returns a segment id */ +/************************************************************************/ +static IpcMemoryId +SISegmentGet(int key, /* the corresponding key for the segment */ + int size, /* size of segment in bytes */ + bool create) +{ + IpcMemoryId shmid; + + if (create) { + shmid = IpcMemoryCreate(key, size, IPCProtection); + } else { + shmid = IpcMemoryIdGet(key, size); + } + return(shmid); +} + +/************************************************************************/ +/* SISegmentAttach(shmid) - attach a shared segment with id shmid */ +/************************************************************************/ +static void +SISegmentAttach(IpcMemoryId shmid) +{ + shmInvalBuffer = (struct SISeg *) IpcMemoryAttach(shmid); + if (shmInvalBuffer == IpcMemAttachFailed) { + /* XXX use validity function */ + elog(NOTICE, "SISegmentAttach: Could not attach segment"); + elog(FATAL, "SISegmentAttach: %m"); + } +} + + +/************************************************************************/ +/* SISegmentInit(killExistingSegment, key) initialize segment */ +/************************************************************************/ +int +SISegmentInit(bool killExistingSegment, IPCKey key) +{ + SISegOffsets *oP; + int segSize; + IpcMemoryId shmId; + bool create; + + if (killExistingSegment) { + /* Kill existing segment */ + /* set semaphore */ + SISegmentKill(key); + + /* Get a shared segment */ + + oP = SIComputeSize(&segSize); + /* + * Be tidy + */ + pfree(oP); + + create = true; + shmId = SISegmentGet(key,segSize, create); + if (shmId < 0) { + perror("SISegmentGet: failed"); + return(-1); /* an error */ + } + + /* Attach the shared cache invalidation segment */ + /* sets the global variable shmInvalBuffer */ + SISegmentAttach(shmId); + + /* Init shared memory table */ + SISegInit(shmInvalBuffer); + } else { + /* use an existing segment */ + create = false; + shmId = SISegmentGet(key, 0, create); + if (shmId < 0) { + perror("SISegmentGet: getting an existent segment failed"); + return(-1); /* an error */ + } + /* Attach the shared cache invalidation segment */ + SISegmentAttach(shmId); + } + return(1); +} + diff --git a/src/backend/storage/ipc/spin.c b/src/backend/storage/ipc/spin.c new file mode 100644 index 00000000000..7ff2561f237 --- /dev/null +++ b/src/backend/storage/ipc/spin.c @@ -0,0 +1,247 @@ +/*------------------------------------------------------------------------- + * + * spin.c-- + * routines for managing spin locks + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/Attic/spin.c,v 1.1.1.1 1996/07/09 06:21:55 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +/* + * POSTGRES has two kinds of locks: semaphores (which put the + * process to sleep) and spinlocks (which are supposed to be + * short term locks). Currently both are implemented as SysV + * semaphores, but presumably this can change if we move to + * a machine with a test-and-set (TAS) instruction. Its probably + * a good idea to think about (and allocate) short term and long + * term semaphores separately anyway. + * + * NOTE: These routines are not supposed to be widely used in Postgres. + * They are preserved solely for the purpose of porting Mark Sullivan's + * buffer manager to Postgres. + */ +#include <errno.h> +#include "postgres.h" +#include "storage/ipc.h" +#include "storage/shmem.h" +#include "storage/spin.h" +#include "storage/proc.h" +#include "utils/elog.h" + +/* globals used in this file */ +IpcSemaphoreId SpinLockId; + +#ifdef HAS_TEST_AND_SET +/* real spin lock implementations */ + +bool +CreateSpinlocks(IPCKey key) +{ + /* the spin lock shared memory must have been created by now */ + return(TRUE); +} + +bool +AttachSpinLocks(IPCKey key) +{ + /* the spin lock shared memory must have been attached by now */ + return(TRUE); +} + +bool +InitSpinLocks(int init, IPCKey key) +{ + extern SPINLOCK ShmemLock; + extern SPINLOCK BindingLock; + extern SPINLOCK BufMgrLock; + extern SPINLOCK LockMgrLock; + extern SPINLOCK ProcStructLock; + extern SPINLOCK SInvalLock; + extern SPINLOCK OidGenLockId; + +#ifdef MAIN_MEMORY + extern SPINLOCK MMCacheLock; +#endif /* SONY_JUKEBOX */ + + /* These six spinlocks have fixed location is shmem */ + ShmemLock = (SPINLOCK) SHMEMLOCKID; + BindingLock = (SPINLOCK) BINDINGLOCKID; + BufMgrLock = (SPINLOCK) BUFMGRLOCKID; + LockMgrLock = (SPINLOCK) LOCKMGRLOCKID; + ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID; + SInvalLock = (SPINLOCK) SINVALLOCKID; + OidGenLockId = (SPINLOCK) OIDGENLOCKID; + +#ifdef MAIN_MEMORY + MMCacheLock = (SPINLOCK) MMCACHELOCKID; +#endif /* MAIN_MEMORY */ + + return(TRUE); +} + +void +SpinAcquire(SPINLOCK lock) +{ + ExclusiveLock(lock); + PROC_INCR_SLOCK(lock); +} + +void +SpinRelease(SPINLOCK lock) +{ + PROC_DECR_SLOCK(lock); + ExclusiveUnlock(lock); +} + +bool +SpinIsLocked(SPINLOCK lock) +{ + return(!LockIsFree(lock)); +} + +#else /* HAS_TEST_AND_SET */ +/* Spinlocks are implemented using SysV semaphores */ + + +/* + * SpinAcquire -- try to grab a spinlock + * + * FAILS if the semaphore is corrupted. + */ +void +SpinAcquire(SPINLOCK lock) +{ + IpcSemaphoreLock(SpinLockId, lock, IpcExclusiveLock); + PROC_INCR_SLOCK(lock); +} + +/* + * SpinRelease -- release a spin lock + * + * FAILS if the semaphore is corrupted + */ +void +SpinRelease(SPINLOCK lock) +{ + Assert(SpinIsLocked(lock)) + PROC_DECR_SLOCK(lock); + IpcSemaphoreUnlock(SpinLockId, lock, IpcExclusiveLock); +} + +bool +SpinIsLocked(SPINLOCK lock) +{ + int semval; + + semval = IpcSemaphoreGetValue(SpinLockId, lock); + return(semval < IpcSemaphoreDefaultStartValue); +} + +/* + * CreateSpinlocks -- Create a sysV semaphore array for + * the spinlocks + * + */ +bool +CreateSpinlocks(IPCKey key) +{ + + int status; + IpcSemaphoreId semid; + semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection, + IpcSemaphoreDefaultStartValue, 1, &status); + if (status == IpcSemIdExist) { + IpcSemaphoreKill(key); + elog(NOTICE,"Destroying old spinlock semaphore"); + semid = IpcSemaphoreCreate(key, MAX_SPINS, IPCProtection, + IpcSemaphoreDefaultStartValue, 1, &status); + } + + if (semid >= 0) { + SpinLockId = semid; + return(TRUE); + } + /* cannot create spinlocks */ + elog(FATAL,"CreateSpinlocks: cannot create spin locks"); + return(FALSE); +} + +/* + * Attach to existing spinlock set + */ +bool +AttachSpinLocks(IPCKey key) +{ + IpcSemaphoreId id; + + id = semget (key, MAX_SPINS, 0); + if (id < 0) { + if (errno == EEXIST) { + /* key is the name of someone else's semaphore */ + elog (FATAL,"AttachSpinlocks: SPIN_KEY belongs to someone else"); + } + /* cannot create spinlocks */ + elog(FATAL,"AttachSpinlocks: cannot create spin locks"); + return(FALSE); + } + SpinLockId = id; + return(TRUE); +} + +/* + * InitSpinLocks -- Spinlock bootstrapping + * + * We need several spinlocks for bootstrapping: + * BindingLock (for the shmem binding table) and + * ShmemLock (for the shmem allocator), BufMgrLock (for buffer + * pool exclusive access), LockMgrLock (for the lock table), and + * ProcStructLock (a spin lock for the shared process structure). + * If there's a Sony WORM drive attached, we also have a spinlock + * (SJCacheLock) for it. Same story for the main memory storage mgr. + * + */ +bool +InitSpinLocks(int init, IPCKey key) +{ + extern SPINLOCK ShmemLock; + extern SPINLOCK BindingLock; + extern SPINLOCK BufMgrLock; + extern SPINLOCK LockMgrLock; + extern SPINLOCK ProcStructLock; + extern SPINLOCK SInvalLock; + extern SPINLOCK OidGenLockId; + +#ifdef MAIN_MEMORY + extern SPINLOCK MMCacheLock; +#endif /* MAIN_MEMORY */ + + if (!init || key != IPC_PRIVATE) { + /* if bootstrap and key is IPC_PRIVATE, it means that we are running + * backend by itself. no need to attach spinlocks + */ + if (! AttachSpinLocks(key)) { + elog(FATAL,"InitSpinLocks: couldnt attach spin locks"); + return(FALSE); + } + } + + /* These five (or six) spinlocks have fixed location is shmem */ + ShmemLock = (SPINLOCK) SHMEMLOCKID; + BindingLock = (SPINLOCK) BINDINGLOCKID; + BufMgrLock = (SPINLOCK) BUFMGRLOCKID; + LockMgrLock = (SPINLOCK) LOCKMGRLOCKID; + ProcStructLock = (SPINLOCK) PROCSTRUCTLOCKID; + SInvalLock = (SPINLOCK) SINVALLOCKID; + OidGenLockId = (SPINLOCK) OIDGENLOCKID; + +#ifdef MAIN_MEMORY + MMCacheLock = (SPINLOCK) MMCACHELOCKID; +#endif /* MAIN_MEMORY */ + + return(TRUE); +} +#endif /* HAS_TEST_AND_SET */ |