aboutsummaryrefslogtreecommitdiff
path: root/src/backend/lib/hyperloglog.c
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2016-01-19 17:40:15 -0300
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2016-01-19 17:40:15 -0300
commit948c97958bf37adb2a9c2d6d92c255abfc7499ba (patch)
treee32489c56c51f7f580b36d161e07d3e4313af537 /src/backend/lib/hyperloglog.c
parent9ff60273e35cad6e9d3a4adf59d5c2455afe9d9e (diff)
downloadpostgresql-948c97958bf37adb2a9c2d6d92c255abfc7499ba.tar.gz
postgresql-948c97958bf37adb2a9c2d6d92c255abfc7499ba.zip
Add two HyperLogLog functions
New functions initHyperLogLogError() and freeHyperLogLog() simplify using this module from elsewhere. Author: Tomáš Vondra Review: Peter Geoghegan
Diffstat (limited to 'src/backend/lib/hyperloglog.c')
-rw-r--r--src/backend/lib/hyperloglog.c48
1 files changed, 47 insertions, 1 deletions
diff --git a/src/backend/lib/hyperloglog.c b/src/backend/lib/hyperloglog.c
index 094bc09a44c..fa7f05a2411 100644
--- a/src/backend/lib/hyperloglog.c
+++ b/src/backend/lib/hyperloglog.c
@@ -56,7 +56,7 @@
static inline uint8 rho(uint32 x, uint8 b);
/*
- * Initialize HyperLogLog track state
+ * Initialize HyperLogLog track state, by bit width
*
* bwidth is bit width (so register size will be 2 to the power of bwidth).
* Must be between 4 and 16 inclusive.
@@ -108,6 +108,52 @@ initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
}
/*
+ * Initialize HyperLogLog track state, by error rate
+ *
+ * Instead of specifying bwidth (number of bits used for addressing the
+ * register), this method allows sizing the counter for particular error
+ * rate using a simple formula from the paper:
+ *
+ * e = 1.04 / sqrt(m)
+ *
+ * where 'm' is the number of registers, i.e. (2^bwidth). The method
+ * finds the lowest bwidth with 'e' below the requested error rate, and
+ * then uses it to initialize the counter.
+ *
+ * As bwidth has to be between 4 and 16, the worst possible error rate
+ * is between ~25% (bwidth=4) and 0.4% (bwidth=16).
+ */
+void
+initHyperLogLogError(hyperLogLogState *cState, double error)
+{
+ uint8 bwidth = 4;
+
+ while (bwidth < 16)
+ {
+ double m = (Size) 1 << bwidth;
+
+ if (1.04 / sqrt(m) < error)
+ break;
+ bwidth++;
+ }
+
+ initHyperLogLog(cState, bwidth);
+}
+
+/*
+ * Free HyperLogLog track state
+ *
+ * Releases allocated resources, but not the state itself (in case it's not
+ * allocated by palloc).
+ */
+void
+freeHyperLogLog(hyperLogLogState *cState)
+{
+ Assert(cState->hashesArr != NULL);
+ pfree(cState->hashesArr);
+}
+
+/*
* Adds element to the estimator, from caller-supplied hash.
*
* It is critical that the hash value passed be an actual hash value, typically