aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/common/heaptuple.c10
-rw-r--r--src/backend/executor/execExpr.c5
-rw-r--r--src/backend/executor/execTuples.c1
-rw-r--r--src/backend/executor/nodeForeignscan.c6
-rw-r--r--src/backend/jit/jit.c1
-rw-r--r--src/backend/jit/llvm/Makefile2
-rw-r--r--src/backend/jit/llvm/llvmjit.c2
-rw-r--r--src/backend/jit/llvm/llvmjit_deform.c729
-rw-r--r--src/backend/jit/llvm/llvmjit_expr.c45
-rw-r--r--src/backend/jit/llvm/llvmjit_types.c1
-rw-r--r--src/backend/optimizer/plan/planner.c2
-rw-r--r--src/backend/utils/misc/guc.c11
-rw-r--r--src/include/access/htup_details.h1
-rw-r--r--src/include/executor/execExpr.h1
-rw-r--r--src/include/jit/jit.h2
-rw-r--r--src/include/jit/llvmjit.h3
-rw-r--r--src/include/nodes/execnodes.h8
17 files changed, 827 insertions, 3 deletions
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index c45a48812bf..d4478a2cbad 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -1556,3 +1556,13 @@ minimal_tuple_from_heap_tuple(HeapTuple htup)
result->t_len = len;
return result;
}
+
+/*
+ * This mainly exists so JIT can inline the definition, but it's also
+ * sometimes useful in debugging sessions.
+ */
+size_t
+varsize_any(void *p)
+{
+ return VARSIZE_ANY(p);
+}
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 13bf891cea7..e284fd71d75 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -2287,18 +2287,21 @@ ExecPushExprSlots(ExprState *state, LastAttnumInfo *info)
{
scratch.opcode = EEOP_INNER_FETCHSOME;
scratch.d.fetch.last_var = info->last_inner;
+ scratch.d.fetch.known_desc = NULL;
ExprEvalPushStep(state, &scratch);
}
if (info->last_outer > 0)
{
scratch.opcode = EEOP_OUTER_FETCHSOME;
scratch.d.fetch.last_var = info->last_outer;
+ scratch.d.fetch.known_desc = NULL;
ExprEvalPushStep(state, &scratch);
}
if (info->last_scan > 0)
{
scratch.opcode = EEOP_SCAN_FETCHSOME;
scratch.d.fetch.last_var = info->last_scan;
+ scratch.d.fetch.known_desc = NULL;
ExprEvalPushStep(state, &scratch);
}
}
@@ -3250,10 +3253,12 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
/* push deform steps */
scratch.opcode = EEOP_INNER_FETCHSOME;
scratch.d.fetch.last_var = maxatt;
+ scratch.d.fetch.known_desc = ldesc;
ExprEvalPushStep(state, &scratch);
scratch.opcode = EEOP_OUTER_FETCHSOME;
scratch.d.fetch.last_var = maxatt;
+ scratch.d.fetch.known_desc = rdesc;
ExprEvalPushStep(state, &scratch);
/*
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index c46d65cf938..acd1b97b0e6 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -896,6 +896,7 @@ ExecInitScanTupleSlot(EState *estate, ScanState *scanstate, TupleDesc tupledesc)
{
scanstate->ss_ScanTupleSlot = ExecAllocTableSlot(&estate->es_tupleTable,
tupledesc);
+ scanstate->ps.scandesc = tupledesc;
}
/* ----------------
diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c
index 0084234b350..a2a28b7ec26 100644
--- a/src/backend/executor/nodeForeignscan.c
+++ b/src/backend/executor/nodeForeignscan.c
@@ -186,7 +186,11 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
}
else
{
- ExecInitScanTupleSlot(estate, &scanstate->ss, RelationGetDescr(currentRelation));
+ TupleDesc scan_tupdesc;
+
+ /* don't trust FDWs to return tuples fulfilling NOT NULL constraints */
+ scan_tupdesc = CreateTupleDescCopy(RelationGetDescr(currentRelation));
+ ExecInitScanTupleSlot(estate, &scanstate->ss, scan_tupdesc);
/* Node's targetlist will contain Vars with varno = scanrelid */
tlistvarno = scanrelid;
}
diff --git a/src/backend/jit/jit.c b/src/backend/jit/jit.c
index 971df4f8a51..67a015fb35e 100644
--- a/src/backend/jit/jit.c
+++ b/src/backend/jit/jit.c
@@ -38,6 +38,7 @@ bool jit_debugging_support = false;
bool jit_dump_bitcode = false;
bool jit_expressions = true;
bool jit_profiling_support = false;
+bool jit_tuple_deforming = true;
double jit_above_cost = 100000;
double jit_optimize_above_cost = 500000;
diff --git a/src/backend/jit/llvm/Makefile b/src/backend/jit/llvm/Makefile
index 79097662d5f..d6a1f5f02db 100644
--- a/src/backend/jit/llvm/Makefile
+++ b/src/backend/jit/llvm/Makefile
@@ -39,7 +39,7 @@ OBJS=$(WIN32RES)
# Infrastructure
OBJS += llvmjit.o llvmjit_error.o llvmjit_wrap.o
# Code generation
-OBJS += llvmjit_expr.o
+OBJS += llvmjit_expr.o llvmjit_deform.o
all: all-shared-lib llvmjit_types.bc
diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c
index cd3c40c5f1b..d73237d002d 100644
--- a/src/backend/jit/llvm/llvmjit.c
+++ b/src/backend/jit/llvm/llvmjit.c
@@ -74,6 +74,7 @@ LLVMTypeRef StructAggStatePerTransData;
LLVMValueRef AttributeTemplate;
LLVMValueRef FuncStrlen;
+LLVMValueRef FuncVarsizeAny;
LLVMValueRef FuncSlotGetsomeattrs;
LLVMValueRef FuncHeapGetsysattr;
LLVMValueRef FuncMakeExpandedObjectReadOnlyInternal;
@@ -784,6 +785,7 @@ llvm_create_types(void)
AttributeTemplate = LLVMGetNamedFunction(mod, "AttributeTemplate");
FuncStrlen = LLVMGetNamedFunction(mod, "strlen");
+ FuncVarsizeAny = LLVMGetNamedFunction(mod, "varsize_any");
FuncSlotGetsomeattrs = LLVMGetNamedFunction(mod, "slot_getsomeattrs");
FuncHeapGetsysattr = LLVMGetNamedFunction(mod, "heap_getsysattr");
FuncMakeExpandedObjectReadOnlyInternal = LLVMGetNamedFunction(mod, "MakeExpandedObjectReadOnlyInternal");
diff --git a/src/backend/jit/llvm/llvmjit_deform.c b/src/backend/jit/llvm/llvmjit_deform.c
new file mode 100644
index 00000000000..0762ab67862
--- /dev/null
+++ b/src/backend/jit/llvm/llvmjit_deform.c
@@ -0,0 +1,729 @@
+/*-------------------------------------------------------------------------
+ *
+ * llvmjit_deform.c
+ * Generate code for deforming a heap tuple.
+ *
+ * This gains performance benefits over unJITed deforming from compile-time
+ * knowledge of the tuple descriptor. Fixed column widths, NOT NULLness, etc
+ * can be taken advantage of.
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/jit/llvm/llvmjit_deform.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <llvm-c/Core.h>
+
+#include "access/htup_details.h"
+#include "executor/tuptable.h"
+#include "jit/llvmjit.h"
+#include "jit/llvmjit_emit.h"
+
+
+static LLVMValueRef get_memset(LLVMModuleRef mod);
+
+
+/*
+ * Create a function that deforms a tuple of type desc up to natts columns.
+ */
+LLVMValueRef
+slot_compile_deform(LLVMJitContext *context, TupleDesc desc, int natts)
+{
+ char *funcname;
+
+ LLVMModuleRef mod;
+ LLVMBuilderRef b;
+
+ LLVMTypeRef deform_sig;
+ LLVMValueRef v_deform_fn;
+
+ LLVMBasicBlockRef b_entry;
+ LLVMBasicBlockRef b_adjust_unavail_cols;
+ LLVMBasicBlockRef b_find_start;
+
+ LLVMBasicBlockRef b_out;
+ LLVMBasicBlockRef b_dead;
+ LLVMBasicBlockRef *attcheckattnoblocks;
+ LLVMBasicBlockRef *attstartblocks;
+ LLVMBasicBlockRef *attisnullblocks;
+ LLVMBasicBlockRef *attcheckalignblocks;
+ LLVMBasicBlockRef *attalignblocks;
+ LLVMBasicBlockRef *attstoreblocks;
+
+ LLVMValueRef v_offp;
+
+ LLVMValueRef v_tupdata_base;
+ LLVMValueRef v_tts_values;
+ LLVMValueRef v_tts_nulls;
+ LLVMValueRef v_slotoffp;
+ LLVMValueRef v_slowp;
+ LLVMValueRef v_nvalidp;
+ LLVMValueRef v_nvalid;
+ LLVMValueRef v_maxatt;
+
+ LLVMValueRef v_slot;
+
+ LLVMValueRef v_tupleheaderp;
+ LLVMValueRef v_tuplep;
+ LLVMValueRef v_infomask1;
+ LLVMValueRef v_infomask2;
+ LLVMValueRef v_bits;
+
+ LLVMValueRef v_hoff;
+
+ LLVMValueRef v_hasnulls;
+
+ /* last column (0 indexed) guaranteed to exist */
+ int guaranteed_column_number = -1;
+
+ /* current known alignment */
+ int known_alignment = 0;
+
+ /* if true, known_alignment describes definite offset of column */
+ bool attguaranteedalign = true;
+
+ int attnum;
+
+ mod = llvm_mutable_module(context);
+
+ funcname = llvm_expand_funcname(context, "deform");
+
+ /*
+ * Check which columns do have to exist, so we don't have to check the
+ * rows natts unnecessarily.
+ */
+ for (attnum = 0; attnum < desc->natts; attnum++)
+ {
+ if (TupleDescAttr(desc, attnum)->attnotnull)
+ {
+ guaranteed_column_number = attnum;
+ }
+ }
+
+ /* Create the signature and function */
+ {
+ LLVMTypeRef param_types[1];
+
+ param_types[0] = l_ptr(StructTupleTableSlot);
+
+ deform_sig = LLVMFunctionType(LLVMVoidType(), param_types,
+ lengthof(param_types), 0);
+ }
+ v_deform_fn = LLVMAddFunction(mod, funcname, deform_sig);
+ LLVMSetLinkage(v_deform_fn, LLVMInternalLinkage);
+ LLVMSetParamAlignment(LLVMGetParam(v_deform_fn, 0), MAXIMUM_ALIGNOF);
+ llvm_copy_attributes(AttributeTemplate, v_deform_fn);
+
+ b_entry =
+ LLVMAppendBasicBlock(v_deform_fn, "entry");
+ b_adjust_unavail_cols =
+ LLVMAppendBasicBlock(v_deform_fn, "adjust_unavail_cols");
+ b_find_start =
+ LLVMAppendBasicBlock(v_deform_fn, "find_startblock");
+ b_out =
+ LLVMAppendBasicBlock(v_deform_fn, "outblock");
+ b_dead =
+ LLVMAppendBasicBlock(v_deform_fn, "deadblock");
+
+ b = LLVMCreateBuilder();
+
+ attcheckattnoblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+ attstartblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+ attisnullblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+ attcheckalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+ attalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+ attstoreblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+
+ known_alignment = 0;
+
+ LLVMPositionBuilderAtEnd(b, b_entry);
+
+ /* perform allocas first, llvm only converts those to registers */
+ v_offp = LLVMBuildAlloca(b, TypeSizeT, "v_offp");
+
+ v_slot = LLVMGetParam(v_deform_fn, 0);
+
+ v_tts_values =
+ l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_VALUES,
+ "tts_values");
+ v_tts_nulls =
+ l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_ISNULL,
+ "tts_ISNULL");
+
+ v_slotoffp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_OFF, "");
+ v_slowp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_SLOW, "");
+ v_nvalidp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_NVALID, "");
+
+ v_tupleheaderp =
+ l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_TUPLE,
+ "tupleheader");
+ v_tuplep =
+ l_load_struct_gep(b, v_tupleheaderp, FIELDNO_HEAPTUPLEDATA_DATA,
+ "tuple");
+ v_bits =
+ LLVMBuildBitCast(b,
+ LLVMBuildStructGEP(b, v_tuplep,
+ FIELDNO_HEAPTUPLEHEADERDATA_BITS,
+ ""),
+ l_ptr(LLVMInt8Type()),
+ "t_bits");
+ v_infomask1 =
+ l_load_struct_gep(b, v_tuplep,
+ FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK,
+ "infomask1");
+ v_infomask2 =
+ l_load_struct_gep(b,
+ v_tuplep, FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK2,
+ "infomask2");
+
+ /* t_infomask & HEAP_HASNULL */
+ v_hasnulls =
+ LLVMBuildICmp(b, LLVMIntNE,
+ LLVMBuildAnd(b,
+ l_int16_const(HEAP_HASNULL),
+ v_infomask1, ""),
+ l_int16_const(0),
+ "hasnulls");
+
+ /* t_infomask2 & HEAP_NATTS_MASK */
+ v_maxatt = LLVMBuildAnd(b,
+ l_int16_const(HEAP_NATTS_MASK),
+ v_infomask2,
+ "maxatt");
+
+ v_hoff =
+ l_load_struct_gep(b, v_tuplep,
+ FIELDNO_HEAPTUPLEHEADERDATA_HOFF,
+ "t_hoff");
+
+ v_tupdata_base =
+ LLVMBuildGEP(b,
+ LLVMBuildBitCast(b,
+ v_tuplep,
+ l_ptr(LLVMInt8Type()),
+ ""),
+ &v_hoff, 1,
+ "v_tupdata_base");
+
+ /*
+ * Load tuple start offset from slot. Will be reset below in case there's
+ * no existing deformed columns in slot.
+ */
+ {
+ LLVMValueRef v_off_start;
+
+ v_off_start = LLVMBuildLoad(b, v_slotoffp, "v_slot_off");
+ v_off_start = LLVMBuildZExt(b, v_off_start, TypeSizeT, "");
+ LLVMBuildStore(b, v_off_start, v_offp);
+ }
+
+ /* build the basic block for each attribute, need them as jump target */
+ for (attnum = 0; attnum < natts; attnum++)
+ {
+ attcheckattnoblocks[attnum] =
+ l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckattno", attnum);
+ attstartblocks[attnum] =
+ l_bb_append_v(v_deform_fn, "block.attr.%d.start", attnum);
+ attisnullblocks[attnum] =
+ l_bb_append_v(v_deform_fn, "block.attr.%d.attisnull", attnum);
+ attcheckalignblocks[attnum] =
+ l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckalign", attnum);
+ attalignblocks[attnum] =
+ l_bb_append_v(v_deform_fn, "block.attr.%d.align", attnum);
+ attstoreblocks[attnum] =
+ l_bb_append_v(v_deform_fn, "block.attr.%d.store", attnum);
+ }
+
+ /*
+ * Check if's guaranteed the all the desired attributes are available in
+ * tuple. If so, we can start deforming. If not, need to make sure
+ * tts_values/isnull is set appropriately for columns not available in the
+ * tuple.
+ */
+ if ((natts - 1) <= guaranteed_column_number)
+ {
+ /* just skip through unnecessary blocks */
+ LLVMBuildBr(b, b_adjust_unavail_cols);
+ LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols);
+ LLVMBuildBr(b, b_find_start);
+ }
+ else
+ {
+ LLVMValueRef v_set;
+ LLVMValueRef v_startset;
+ LLVMValueRef v_params[5];
+
+ /* branch if not all columns available */
+ LLVMBuildCondBr(b,
+ LLVMBuildICmp(b, LLVMIntULT,
+ v_maxatt,
+ l_int16_const(natts),
+ ""),
+ b_adjust_unavail_cols,
+ b_find_start);
+
+ /* if not, memset tts_isnull of relevant cols to true */
+ LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols);
+
+ v_set = LLVMBuildSub(b,
+ l_int16_const(attnum),
+ v_maxatt, "");
+
+ v_startset = LLVMBuildGEP(b, v_tts_nulls, &v_maxatt, 1, "");
+
+ v_params[0] = v_startset;
+ v_params[1] = l_int8_const(1);
+ v_params[2] = LLVMBuildZExt(b, v_set, LLVMInt32Type(), "");
+ v_params[3] = l_int32_const(1);
+ v_params[4] = LLVMConstInt(LLVMInt1Type(), 0, false);
+
+ LLVMBuildCall(b, get_memset(mod),
+ v_params, lengthof(v_params), "");
+ LLVMBuildBr(b, b_find_start);
+ }
+
+ LLVMPositionBuilderAtEnd(b, b_find_start);
+
+ v_nvalid = LLVMBuildLoad(b, v_nvalidp, "");
+
+ /*
+ * Build switch to go from nvalid to the right startblock. Callers
+ * currently don't have the knowledge, but it'd be good for performance to
+ * avoid this check when it's known that the slot is empty (e.g. in scan
+ * nodes).
+ */
+ if (true)
+ {
+ LLVMValueRef v_switch = LLVMBuildSwitch(b, v_nvalid,
+ b_dead, natts);
+
+ for (attnum = 0; attnum < natts; attnum++)
+ {
+ LLVMValueRef v_attno = l_int32_const(attnum);
+
+ LLVMAddCase(v_switch, v_attno, attcheckattnoblocks[attnum]);
+ }
+
+ }
+ else
+ {
+ /* jump from entry block to first block */
+ LLVMBuildBr(b, attcheckattnoblocks[0]);
+ }
+
+ LLVMPositionBuilderAtEnd(b, b_dead);
+ LLVMBuildUnreachable(b);
+
+ /*
+ * Iterate over each attribute that needs to be deformed, build code to
+ * deform it.
+ */
+ for (attnum = 0; attnum < natts; attnum++)
+ {
+ Form_pg_attribute att = TupleDescAttr(desc, attnum);
+ LLVMValueRef v_incby;
+ int alignto;
+ LLVMValueRef l_attno = l_int16_const(attnum);
+ LLVMValueRef v_attdatap;
+ LLVMValueRef v_resultp;
+
+ /* build block checking whether we did all the necessary attributes */
+ LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]);
+
+ /*
+ * If this is the first attribute, slot->tts_nvalid was 0. Therefore
+ * reset offset to 0 to, it be from a previous execution.
+ */
+ if (attnum == 0)
+ {
+ LLVMBuildStore(b, l_sizet_const(0), v_offp);
+ }
+
+ /*
+ * Build check whether column is available (i.e. whether the tuple has
+ * that many columns stored). We can avoid the branch if we know
+ * there's a subsequent NOT NULL column.
+ */
+ if (attnum <= guaranteed_column_number)
+ {
+ LLVMBuildBr(b, attstartblocks[attnum]);
+ }
+ else
+ {
+ LLVMValueRef v_islast;
+
+ v_islast = LLVMBuildICmp(b, LLVMIntEQ,
+ l_attno,
+ v_maxatt,
+ "heap_natts");
+ LLVMBuildCondBr(b, v_islast, b_out, attstartblocks[attnum]);
+ }
+ LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]);
+
+ /* check for nulls if necessary */
+ if (!att->attnotnull)
+ {
+ LLVMBasicBlockRef b_ifnotnull;
+ LLVMBasicBlockRef b_ifnull;
+ LLVMBasicBlockRef b_next;
+ LLVMValueRef v_attisnull;
+ LLVMValueRef v_nullbyteno;
+ LLVMValueRef v_nullbytemask;
+ LLVMValueRef v_nullbyte;
+ LLVMValueRef v_nullbit;
+
+ b_ifnotnull = attcheckalignblocks[attnum];
+ b_ifnull = attisnullblocks[attnum];
+
+ if (attnum + 1 == natts)
+ b_next = b_out;
+ else
+ b_next = attcheckattnoblocks[attnum + 1];
+
+ v_nullbyteno = l_int32_const(attnum >> 3);
+ v_nullbytemask = l_int8_const(1 << ((attnum) & 0x07));
+ v_nullbyte = l_load_gep1(b, v_bits, v_nullbyteno, "attnullbyte");
+
+ v_nullbit = LLVMBuildICmp(b,
+ LLVMIntEQ,
+ LLVMBuildAnd(b, v_nullbyte, v_nullbytemask, ""),
+ l_int8_const(0),
+ "attisnull");
+
+ v_attisnull = LLVMBuildAnd(b, v_hasnulls, v_nullbit, "");
+
+ LLVMBuildCondBr(b, v_attisnull, b_ifnull, b_ifnotnull);
+
+ LLVMPositionBuilderAtEnd(b, b_ifnull);
+
+ /* store null-byte */
+ LLVMBuildStore(b,
+ l_int8_const(1),
+ LLVMBuildGEP(b, v_tts_nulls, &l_attno, 1, ""));
+ /* store zero datum */
+ LLVMBuildStore(b,
+ l_sizet_const(0),
+ LLVMBuildGEP(b, v_tts_values, &l_attno, 1, ""));
+
+ LLVMBuildBr(b, b_next);
+ attguaranteedalign = false;
+ }
+ else
+ {
+ /* nothing to do */
+ LLVMBuildBr(b, attcheckalignblocks[attnum]);
+ LLVMPositionBuilderAtEnd(b, attisnullblocks[attnum]);
+ LLVMBuildBr(b, attcheckalignblocks[attnum]);
+ }
+ LLVMPositionBuilderAtEnd(b, attcheckalignblocks[attnum]);
+
+ /* determine required alignment */
+ if (att->attalign == 'i')
+ alignto = ALIGNOF_INT;
+ else if (att->attalign == 'c')
+ alignto = 1;
+ else if (att->attalign == 'd')
+ alignto = ALIGNOF_DOUBLE;
+ else if (att->attalign == 's')
+ alignto = ALIGNOF_SHORT;
+ else
+ {
+ elog(ERROR, "unknown alignment");
+ alignto = 0;
+ }
+
+ /* ------
+ * Even if alignment is required, we can skip doing it if provably
+ * unnecessary:
+ * - first column is guaranteed to be aligned
+ * - columns following a NOT NULL fixed width datum have known
+ * alignment, can skip alignment computation if that known alignment
+ * is compatible with current column.
+ * ------
+ */
+ if (alignto > 1 &&
+ (known_alignment < 0 || known_alignment != TYPEALIGN(alignto, known_alignment)))
+ {
+ /*
+ * When accessing a varlena field we have to "peek" to see if we
+ * are looking at a pad byte or the first byte of a 1-byte-header
+ * datum. A zero byte must be either a pad byte, or the first
+ * byte of a correctly aligned 4-byte length word; in either case
+ * we can align safely. A non-zero byte must be either a 1-byte
+ * length word, or the first byte of a correctly aligned 4-byte
+ * length word; in either case we need not align.
+ */
+ if (att->attlen == -1)
+ {
+ LLVMValueRef v_possible_padbyte;
+ LLVMValueRef v_ispad;
+ LLVMValueRef v_off;
+
+ /* don't know if short varlena or not */
+ attguaranteedalign = false;
+
+ v_off = LLVMBuildLoad(b, v_offp, "");
+
+ v_possible_padbyte =
+ l_load_gep1(b, v_tupdata_base, v_off, "padbyte");
+ v_ispad =
+ LLVMBuildICmp(b, LLVMIntEQ,
+ v_possible_padbyte, l_int8_const(0),
+ "ispadbyte");
+ LLVMBuildCondBr(b, v_ispad,
+ attalignblocks[attnum],
+ attstoreblocks[attnum]);
+ }
+ else
+ {
+ LLVMBuildBr(b, attalignblocks[attnum]);
+ }
+
+ LLVMPositionBuilderAtEnd(b, attalignblocks[attnum]);
+
+ /* translation of alignment code (cf TYPEALIGN()) */
+ {
+ LLVMValueRef v_off_aligned;
+ LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, "");
+
+ /* ((ALIGNVAL) - 1) */
+ LLVMValueRef v_alignval = l_sizet_const(alignto - 1);
+
+ /* ((uintptr_t) (LEN) + ((ALIGNVAL) - 1)) */
+ LLVMValueRef v_lh = LLVMBuildAdd(b, v_off, v_alignval, "");
+
+ /* ~((uintptr_t) ((ALIGNVAL) - 1)) */
+ LLVMValueRef v_rh = l_sizet_const(~(alignto - 1));
+
+ v_off_aligned = LLVMBuildAnd(b, v_lh, v_rh, "aligned_offset");
+
+ LLVMBuildStore(b, v_off_aligned, v_offp);
+ }
+
+ /*
+ * As alignment either was unnecessary or has been performed, we
+ * now know the current alignment. This is only safe because this
+ * value isn't used for varlena and nullable columns.
+ */
+ if (known_alignment >= 0)
+ {
+ Assert(known_alignment != 0);
+ known_alignment = TYPEALIGN(alignto, known_alignment);
+ }
+
+ LLVMBuildBr(b, attstoreblocks[attnum]);
+ LLVMPositionBuilderAtEnd(b, attstoreblocks[attnum]);
+ }
+ else
+ {
+ LLVMPositionBuilderAtEnd(b, attcheckalignblocks[attnum]);
+ LLVMBuildBr(b, attalignblocks[attnum]);
+ LLVMPositionBuilderAtEnd(b, attalignblocks[attnum]);
+ LLVMBuildBr(b, attstoreblocks[attnum]);
+ }
+ LLVMPositionBuilderAtEnd(b, attstoreblocks[attnum]);
+
+ /*
+ * Store the current offset if known to be constant. That allows LLVM
+ * to generate better code. Without that LLVM can't figure out that
+ * the offset might be constant due to the jumps for previously
+ * decoded columns.
+ */
+ if (attguaranteedalign)
+ {
+ Assert(known_alignment >= 0);
+ LLVMBuildStore(b, l_sizet_const(known_alignment), v_offp);
+ }
+
+ /* compute what following columns are aligned to */
+ if (att->attlen < 0)
+ {
+ /* can't guarantee any alignment after variable length field */
+ known_alignment = -1;
+ attguaranteedalign = false;
+ }
+ else if (att->attnotnull && attguaranteedalign && known_alignment >= 0)
+ {
+ /*
+ * If the offset to the column was previously known a NOT NULL &
+ * fixed width column guarantees that alignment is just the
+ * previous alignment plus column width.
+ */
+ Assert(att->attlen > 0);
+ known_alignment += att->attlen;
+ }
+ else if (att->attnotnull && (att->attlen % alignto) == 0)
+ {
+ /*
+ * After a NOT NULL fixed-width column with a length that is a
+ * multiple of its alignment requirement, we know the following
+ * column is aligned to at least the current column's alignment.
+ */
+ Assert(att->attlen > 0);
+ known_alignment = alignto;
+ Assert(known_alignment > 0);
+ attguaranteedalign = false;
+ }
+ else
+ {
+ known_alignment = -1;
+ attguaranteedalign = false;
+ }
+
+
+ /* compute address to load data from */
+ {
+ LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, "");
+
+ v_attdatap =
+ LLVMBuildGEP(b, v_tupdata_base, &v_off, 1, "");
+ }
+
+ /* compute address to store value at */
+ v_resultp = LLVMBuildGEP(b, v_tts_values, &l_attno, 1, "");
+
+ /* store null-byte (false) */
+ LLVMBuildStore(b, l_int8_const(0),
+ LLVMBuildGEP(b, v_tts_nulls, &l_attno, 1, ""));
+
+ /*
+ * Store datum. For byval datums copy the value, extend to Datum's
+ * width, and store. For byref types, store pointer to data.
+ */
+ if (att->attbyval)
+ {
+ LLVMValueRef v_tmp_loaddata;
+ LLVMTypeRef vartypep =
+ LLVMPointerType(LLVMIntType(att->attlen * 8), 0);
+
+ v_tmp_loaddata =
+ LLVMBuildPointerCast(b, v_attdatap, vartypep, "");
+ v_tmp_loaddata = LLVMBuildLoad(b, v_tmp_loaddata, "attr_byval");
+ v_tmp_loaddata = LLVMBuildZExt(b, v_tmp_loaddata, TypeSizeT, "");
+
+ LLVMBuildStore(b, v_tmp_loaddata, v_resultp);
+ }
+ else
+ {
+ LLVMValueRef v_tmp_loaddata;
+
+ /* store pointer */
+ v_tmp_loaddata =
+ LLVMBuildPtrToInt(b,
+ v_attdatap,
+ TypeSizeT,
+ "attr_ptr");
+ LLVMBuildStore(b, v_tmp_loaddata, v_resultp);
+ }
+
+ /* increment data pointer */
+ if (att->attlen > 0)
+ {
+ v_incby = l_sizet_const(att->attlen);
+ }
+ else if (att->attlen == -1)
+ {
+ v_incby = LLVMBuildCall(b,
+ llvm_get_decl(mod, FuncVarsizeAny),
+ &v_attdatap, 1,
+ "varsize_any");
+ l_callsite_ro(v_incby);
+ l_callsite_alwaysinline(v_incby);
+ }
+ else if (att->attlen == -2)
+ {
+ v_incby = LLVMBuildCall(b,
+ llvm_get_decl(mod, FuncStrlen),
+ &v_attdatap, 1, "strlen");
+
+ l_callsite_ro(v_incby);
+
+ /* add 1 for NUL byte */
+ v_incby = LLVMBuildAdd(b, v_incby, l_sizet_const(1), "");
+ }
+ else
+ {
+ Assert(false);
+ v_incby = NULL; /* silence compiler */
+ }
+
+ if (attguaranteedalign)
+ {
+ Assert(known_alignment >= 0);
+ LLVMBuildStore(b, l_sizet_const(known_alignment), v_offp);
+ }
+ else
+ {
+ LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, "");
+
+ v_off = LLVMBuildAdd(b, v_off, v_incby, "increment_offset");
+ LLVMBuildStore(b, v_off, v_offp);
+ }
+
+ /*
+ * jump to next block, unless last possible column, or all desired
+ * (available) attributes have been fetched.
+ */
+ if (attnum + 1 == natts)
+ {
+ /* jump out */
+ LLVMBuildBr(b, b_out);
+ }
+ else
+ {
+ LLVMBuildBr(b, attcheckattnoblocks[attnum + 1]);
+ }
+ }
+
+
+ /* build block that returns */
+ LLVMPositionBuilderAtEnd(b, b_out);
+
+ {
+ LLVMValueRef v_off = LLVMBuildLoad(b, v_offp, "");
+
+ LLVMBuildStore(b, l_int32_const(natts), v_nvalidp);
+ v_off = LLVMBuildTrunc(b, v_off, LLVMInt32Type(), "");
+ LLVMBuildStore(b, v_off, v_slotoffp);
+ LLVMBuildStore(b, l_int8_const(1), v_slowp);
+ LLVMBuildRetVoid(b);
+ }
+
+ LLVMDisposeBuilder(b);
+
+ return v_deform_fn;
+}
+
+static LLVMValueRef
+get_memset(LLVMModuleRef mod)
+{
+ LLVMTypeRef sig;
+ LLVMValueRef v_fn;
+ LLVMTypeRef param_types[5];
+ const char *nm = "llvm.memset.p0i8.i32";
+
+ v_fn = LLVMGetNamedFunction(mod, nm);
+ if (v_fn)
+ return v_fn;
+
+ param_types[0] = LLVMPointerType(LLVMInt8Type(), 0); /* addr */
+ param_types[1] = LLVMInt8Type(); /* val */
+ param_types[2] = LLVMInt32Type(); /* len */
+ param_types[3] = LLVMInt32Type(); /* align */
+ param_types[4] = LLVMInt1Type(); /* volatile */
+
+ sig = LLVMFunctionType(LLVMVoidType(), param_types, lengthof(param_types), 0);
+ v_fn = LLVMAddFunction(mod, nm, sig);
+
+ LLVMSetFunctionCallConv(v_fn, LLVMCCallConv);
+
+ Assert(LLVMGetIntrinsicID(v_fn));
+
+ return v_fn;
+}
diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c
index 667fb01d3be..2074b067bab 100644
--- a/src/backend/jit/llvm/llvmjit_expr.c
+++ b/src/backend/jit/llvm/llvmjit_expr.c
@@ -152,7 +152,7 @@ llvm_compile_expr(ExprState *state)
param_types[0] = l_ptr(StructExprState); /* state */
param_types[1] = l_ptr(StructExprContext); /* econtext */
- param_types[2] = l_ptr(TypeParamBool); /* isnull */
+ param_types[2] = l_ptr(TypeParamBool); /* isnull */
eval_sig = LLVMFunctionType(TypeSizeT,
param_types, lengthof(param_types),
@@ -272,6 +272,7 @@ llvm_compile_expr(ExprState *state)
case EEOP_OUTER_FETCHSOME:
case EEOP_SCAN_FETCHSOME:
{
+ TupleDesc desc = NULL;
LLVMValueRef v_slot;
LLVMBasicBlockRef b_fetch;
LLVMValueRef v_nvalid;
@@ -279,17 +280,38 @@ llvm_compile_expr(ExprState *state)
b_fetch = l_bb_before_v(opblocks[i + 1],
"op.%d.fetch", i);
+ if (op->d.fetch.known_desc)
+ desc = op->d.fetch.known_desc;
+
if (opcode == EEOP_INNER_FETCHSOME)
{
+ PlanState *is = innerPlanState(parent);
+
v_slot = v_innerslot;
+
+ if (!desc &&
+ is &&
+ is->ps_ResultTupleSlot &&
+ is->ps_ResultTupleSlot->tts_fixedTupleDescriptor)
+ desc = is->ps_ResultTupleSlot->tts_tupleDescriptor;
}
else if (opcode == EEOP_OUTER_FETCHSOME)
{
+ PlanState *os = outerPlanState(parent);
+
v_slot = v_outerslot;
+
+ if (!desc &&
+ os &&
+ os->ps_ResultTupleSlot &&
+ os->ps_ResultTupleSlot->tts_fixedTupleDescriptor)
+ desc = os->ps_ResultTupleSlot->tts_tupleDescriptor;
}
else
{
v_slot = v_scanslot;
+ if (!desc && parent)
+ desc = parent->scandesc;
}
/*
@@ -308,6 +330,27 @@ llvm_compile_expr(ExprState *state)
LLVMPositionBuilderAtEnd(b, b_fetch);
+ /*
+ * If the tupledesc of the to-be-deformed tuple is known,
+ * and JITing of deforming is enabled, build deform
+ * function specific to tupledesc and the exact number of
+ * to-be-extracted attributes.
+ */
+ if (desc && (context->base.flags & PGJIT_DEFORM))
+ {
+ LLVMValueRef params[1];
+ LLVMValueRef l_jit_deform;
+
+ l_jit_deform =
+ slot_compile_deform(context, desc,
+ op->d.fetch.last_var);
+ params[0] = v_slot;
+
+ LLVMBuildCall(b, l_jit_deform,
+ params, lengthof(params), "");
+
+ }
+ else
{
LLVMValueRef params[2];
diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c
index 84bc1407373..ad29bafa8f6 100644
--- a/src/backend/jit/llvm/llvmjit_types.c
+++ b/src/backend/jit/llvm/llvmjit_types.c
@@ -96,6 +96,7 @@ FunctionReturningBool(void)
void *referenced_functions[] =
{
strlen,
+ varsize_any,
slot_getsomeattrs,
heap_getsysattr,
MakeExpandedObjectReadOnlyInternal,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 50f858e420f..52c21e68705 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -550,6 +550,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
*/
if (jit_expressions)
result->jitFlags |= PGJIT_EXPR;
+ if (jit_tuple_deforming)
+ result->jitFlags |= PGJIT_DEFORM;
}
return result;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e6d79873dd2..d075cb139a3 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1788,6 +1788,17 @@ static struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"jit_tuple_deforming", PGC_USERSET, DEVELOPER_OPTIONS,
+ gettext_noop("Allow JIT compilation of tuple deforming."),
+ NULL,
+ GUC_NOT_IN_SAMPLE
+ },
+ &jit_tuple_deforming,
+ true,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index 3616a17b6fa..67342ef63dc 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -829,5 +829,6 @@ extern void heap_free_minimal_tuple(MinimalTuple mtup);
extern MinimalTuple heap_copy_minimal_tuple(MinimalTuple mtup);
extern HeapTuple heap_tuple_from_minimal_tuple(MinimalTuple mtup);
extern MinimalTuple minimal_tuple_from_heap_tuple(HeapTuple htup);
+extern size_t varsize_any(void *p);
#endif /* HTUP_DETAILS_H */
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index 6fc4ed640b2..f4617a28fa2 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -262,6 +262,7 @@ typedef struct ExprEvalStep
{
/* attribute number up to which to fetch (inclusive) */
int last_var;
+ TupleDesc known_desc;
} fetch;
/* for EEOP_INNER/OUTER/SCAN_[SYS]VAR[_FIRST] */
diff --git a/src/include/jit/jit.h b/src/include/jit/jit.h
index 703c5011dae..efcd6a52cfd 100644
--- a/src/include/jit/jit.h
+++ b/src/include/jit/jit.h
@@ -21,6 +21,7 @@
#define PGJIT_OPT3 1 << 1
/* reserved for PGJIT_INLINE */
#define PGJIT_EXPR 1 << 3
+#define PGJIT_DEFORM 1 << 4
typedef struct JitContext
@@ -67,6 +68,7 @@ extern bool jit_debugging_support;
extern bool jit_dump_bitcode;
extern bool jit_expressions;
extern bool jit_profiling_support;
+extern bool jit_tuple_deforming;
extern double jit_above_cost;
extern double jit_optimize_above_cost;
diff --git a/src/include/jit/llvmjit.h b/src/include/jit/llvmjit.h
index cc908477e8e..9443a568d85 100644
--- a/src/include/jit/llvmjit.h
+++ b/src/include/jit/llvmjit.h
@@ -32,6 +32,7 @@ extern "C"
#include "fmgr.h"
#include "jit/jit.h"
#include "nodes/pg_list.h"
+#include "access/tupdesc.h"
typedef struct LLVMJitContext
@@ -75,6 +76,7 @@ extern LLVMTypeRef StructAggStatePerGroupData;
extern LLVMValueRef AttributeTemplate;
extern LLVMValueRef FuncStrlen;
+extern LLVMValueRef FuncVarsizeAny;
extern LLVMValueRef FuncSlotGetsomeattrs;
extern LLVMValueRef FuncHeapGetsysattr;
extern LLVMValueRef FuncMakeExpandedObjectReadOnlyInternal;
@@ -107,6 +109,7 @@ extern LLVMValueRef llvm_function_reference(LLVMJitContext *context,
****************************************************************************
*/
extern bool llvm_compile_expr(struct ExprState *state);
+extern LLVMValueRef slot_compile_deform(struct LLVMJitContext *context, TupleDesc desc, int natts);
/*
****************************************************************************
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 2c2d2823c05..6070a42b6fe 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -920,6 +920,7 @@ typedef struct PlanState
ExprState *qual; /* boolean qual condition */
struct PlanState *lefttree; /* input plan tree(s) */
struct PlanState *righttree;
+
List *initPlan; /* Init SubPlanState nodes (un-correlated expr
* subselects) */
List *subPlan; /* SubPlanState nodes in my expressions */
@@ -935,6 +936,13 @@ typedef struct PlanState
TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */
ExprContext *ps_ExprContext; /* node's expression-evaluation context */
ProjectionInfo *ps_ProjInfo; /* info for doing tuple projection */
+
+ /*
+ * Scanslot's descriptor if known. This is a bit of a hack, but otherwise
+ * it's hard for expression compilation to optimize based on the
+ * descriptor, without encoding knowledge about all executor nodes.
+ */
+ TupleDesc scandesc;
} PlanState;
/* ----------------