aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Schurhammer <julian.schurhammer@gmail.com>2022-07-23 09:33:25 +1200
committerLouis Pilfold <louis@lpil.uk>2023-03-13 10:48:26 +0000
commit1b305abbecbcd9004439ed9418077b367ff6b0b0 (patch)
treeba61bae239de89ca54e4e5af4f889c5ec43aa18d
parent589a74661749626a4a7b8a5e27b536430ed680af (diff)
downloadgleam_stdlib-1b305abbecbcd9004439ed9418077b367ff6b0b0.tar.gz
gleam_stdlib-1b305abbecbcd9004439ed9418077b367ff6b0b0.zip
initial implementation of 'hash array mapped trie' structure for persistent maps
-rw-r--r--src/gleam_stdlib.mjs97
-rw-r--r--src/persistent-hash-map.mjs527
2 files changed, 543 insertions, 81 deletions
diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs
index 60ab625..9dca761 100644
--- a/src/gleam_stdlib.mjs
+++ b/src/gleam_stdlib.mjs
@@ -15,10 +15,10 @@ import {
} from "./gleam/regex.mjs";
import { DecodeError } from "./gleam/dynamic.mjs";
import { Some, None } from "./gleam/option.mjs";
-
-const HASHCODE_CACHE = new WeakMap();
+import * as pmap from "./persistent-hash-map.mjs";
const Nil = undefined;
+const NOT_FOUND = {};
export function identity(x) {
return x;
@@ -395,71 +395,8 @@ export function regex_scan(regex, string) {
return List.fromArray(matches);
}
-class Map {
- static #hashcode_cache = new WeakMap();
-
- static hash(value) {
- let existing = this.#hashcode_cache.get(value);
- if (existing) {
- return existing;
- } else if (value instanceof Object) {
- let hashcode = inspect(value);
- HASHCODE_CACHE.set(value, hashcode);
- return hashcode;
- } else {
- return value.toString();
- }
- }
-
- constructor() {
- this.entries = new globalThis.Map();
- }
-
- get size() {
- return this.entries.size;
- }
-
- inspect() {
- let entries = [...this.entries.values()]
- .map((pair) => inspect(pair))
- .join(", ");
- return `map.from_list([${entries}])`;
- }
-
- copy() {
- let map = new Map();
- map.entries = new globalThis.Map(this.entries);
- return map;
- }
-
- toList() {
- return List.fromArray([...this.entries.values()]);
- }
-
- insert(k, v) {
- let map = this.copy();
- map.entries.set(Map.hash(k), [k, v]);
- return map;
- }
-
- delete(k) {
- let map = this.copy();
- map.entries.delete(Map.hash(k));
- return map;
- }
-
- get(key) {
- let code = Map.hash(key);
- if (this.entries.has(code)) {
- return new Ok(this.entries.get(code)[1]);
- } else {
- return new Error(Nil);
- }
- }
-}
-
export function new_map() {
- return new Map();
+ return pmap.create();
}
export function map_size(map) {
@@ -467,19 +404,23 @@ export function map_size(map) {
}
export function map_to_list(map) {
- return map.toList();
+ return List.fromArray(pmap.entries(map));
}
-export function map_remove(k, map) {
- return map.delete(k);
+export function map_remove(key, map) {
+ return pmap.remove(map, key);
}
export function map_get(map, key) {
- return map.get(key);
+ const value = pmap.getWithDefault(map, key, NOT_FOUND);
+ if (value === NOT_FOUND) {
+ return new Error(Nil);
+ }
+ return new Ok(value);
}
export function map_insert(key, value, map) {
- return map.insert(key, value);
+ return pmap.set(map, key, value);
}
function unsafe_percent_decode(string) {
@@ -610,7 +551,7 @@ export function classify_dynamic(data) {
return `Tuple of ${data.length} elements`;
} else if (BitString.isBitString(data)) {
return "BitString";
- } else if (data instanceof Map) {
+ } else if (data instanceof pmap.Map) {
return "Map";
} else if (typeof data === "number") {
return "Float";
@@ -680,13 +621,7 @@ export function decode_result(data) {
}
export function decode_map(data) {
- if (data instanceof Map) {
- return new Ok(data)
- }
- if (typeof data === 'object' && data !== null && Object.getPrototypeOf(data) == Object.getPrototypeOf({})) {
- return new Ok(new Map(Object.entries(data)))
- }
- return decoder_error("Map", data);
+ return data instanceof pmap.Map ? new Ok(data) : decoder_error("Map", data);
}
export function decode_option(data, decoder) {
@@ -703,8 +638,8 @@ export function decode_option(data, decoder) {
export function decode_field(value, name) {
let error = () => decoder_error_no_classify("field", "nothing");
- if (value instanceof Map) {
- let entry = value.get(name);
+ if (value instanceof pmap.Map) {
+ let entry = map_get(value, name);
return entry.isOk() ? entry : error();
}
try {
diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs
new file mode 100644
index 0000000..b65621d
--- /dev/null
+++ b/src/persistent-hash-map.mjs
@@ -0,0 +1,527 @@
+import {
+ inspect,
+ isEqual
+} from "./gleam.mjs";
+function getHash(s) {
+ if (typeof s === "number") return s;
+ if (typeof s !== "string") s = inspect(s);
+ let hash = 0;
+ const len = s.length;
+ for (let i = 0; i < len; i++) {
+ hash = (Math.imul(31, hash) + s.charCodeAt(i)) | 0;
+ }
+ return hash;
+}
+const SHIFT = 5; // number of bits you need to shift by to get the next bucket
+const BUCKET_SIZE = Math.pow(2, SHIFT);
+const MASK = BUCKET_SIZE - 1; // used to zero out all bits not in the bucket
+const MAX_INDEX_NODE = BUCKET_SIZE / 2; // when does index node grow into array node
+const MIN_ARRAY_NODE = BUCKET_SIZE / 4; // when does array node shrink to index node
+const ENTRY = 0;
+const ARRAY_NODE = 1;
+const INDEX_NODE = 2;
+const COLLISION_NODE = 3;
+const EMPTY = {
+ type: INDEX_NODE,
+ bitmap: 0,
+ array: [],
+};
+/** Mask the hash to get only the bucket corresponding to shift */
+function mask(hash, shift) {
+ return (hash >>> shift) & MASK;
+}
+/** Set only the Nth bit where N is the masked hash */
+function bitpos(hash, shift) {
+ return 1 << mask(hash, shift);
+}
+/** Count the number of 1 bits in a number */
+function bitcount(x) {
+ x -= (x >> 1) & 0x55555555;
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x + (x >> 4)) & 0x0f0f0f0f;
+ x += x >> 8;
+ x += x >> 16;
+ return x & 0x7f;
+}
+/** Calculate the array index of an item in a bitmap index node */
+function index(bitmap, bit) {
+ return bitcount(bitmap & (bit - 1));
+}
+/** Efficiently copy an array and set one value at an index */
+function cloneAndSet(arr, at, val) {
+ const len = arr.length;
+ const out = new Array(len);
+ for (let i = 0; i < len; ++i) {
+ out[i] = arr[i];
+ }
+ out[at] = val;
+ return out;
+}
+/** Efficiently copy an array and insert one value at an index */
+function spliceIn(arr, at, val) {
+ const len = arr.length;
+ const out = new Array(len + 1);
+ let i = 0;
+ let g = 0;
+ while (i < at) {
+ out[g++] = arr[i++];
+ }
+ out[g++] = val;
+ while (i < len) {
+ out[g++] = arr[i++];
+ }
+ return out;
+}
+/** Efficiently copy an array and remove one value at an index */
+function spliceOut(arr, at) {
+ const len = arr.length;
+ const out = new Array(len - 1);
+ let i = 0;
+ let g = 0;
+ while (i < at) {
+ out[g++] = arr[i++];
+ }
+ ++i;
+ while (i < len) {
+ out[g++] = arr[i++];
+ }
+ return out;
+}
+/** Create a new node containing two entries */
+function createNode(shift, key1, val1, key2hash, key2, val2) {
+ const key1hash = getHash(key1);
+ if (key1hash === key2hash) {
+ return {
+ type: COLLISION_NODE,
+ hash: key1hash,
+ array: [
+ { type: ENTRY, k: key1, v: val1 },
+ { type: ENTRY, k: key2, v: val2 },
+ ],
+ };
+ }
+ const addedLeaf = { val: false };
+ return assoc(assocIndex(EMPTY, shift, key1hash, key1, val1, addedLeaf), shift, key2hash, key2, val2, addedLeaf);
+}
+/** Associate a node with a new entry, creating a new node. */
+function assoc(root, shift, hash, key, val, addedLeaf) {
+ switch (root.type) {
+ case ARRAY_NODE:
+ return assocArray(root, shift, hash, key, val, addedLeaf);
+ case INDEX_NODE:
+ return assocIndex(root, shift, hash, key, val, addedLeaf);
+ case COLLISION_NODE:
+ return assocCollision(root, shift, hash, key, val, addedLeaf);
+ }
+}
+function assocArray(root, shift, hash, key, val, addedLeaf) {
+ const idx = mask(hash, shift);
+ const node = root.array[idx];
+ // if the corresponding index is empty set the index to a newly created node
+ if (node === undefined) {
+ return {
+ type: ARRAY_NODE,
+ size: root.size + 1,
+ array: cloneAndSet(root.array, idx, assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf)),
+ };
+ }
+ // otherwise call assoc on the child node
+ const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf);
+ // if the child node hasn't changed just return the old root
+ if (n === node) {
+ return root;
+ }
+ // otherwise set the index to the new node
+ return {
+ type: ARRAY_NODE,
+ size: root.size,
+ array: cloneAndSet(root.array, idx, n),
+ };
+}
+function assocIndex(root, shift, hash, key, val, addedLeaf) {
+ const bit = bitpos(hash, shift);
+ const idx = index(root.bitmap, bit);
+ // if there is already a item at this hash index..
+ if ((root.bitmap & bit) !== 0) {
+ // if there is a node at the index (not an entry), call assoc on the child node
+ const node = root.array[idx];
+ if (node.type !== ENTRY) {
+ const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf);
+ if (n === node) {
+ return root;
+ }
+ return {
+ type: INDEX_NODE,
+ bitmap: root.bitmap,
+ array: cloneAndSet(root.array, idx, n),
+ };
+ }
+ // otherwise there is an entry at the index
+ // if the keys are equal replace the entry with the updated value
+ const keyOrNull = node.k;
+ if (isEqual(key, keyOrNull)) {
+ if (val === node.v) {
+ return root;
+ }
+ return {
+ type: INDEX_NODE,
+ bitmap: root.bitmap,
+ array: cloneAndSet(root.array, idx, { type: ENTRY, k: keyOrNull, v: val }),
+ };
+ }
+ // if the keys are not equal, replace the entry with a new child node
+ addedLeaf.val = true;
+ return {
+ type: INDEX_NODE,
+ bitmap: root.bitmap,
+ array: cloneAndSet(root.array, idx, createNode(shift + SHIFT, keyOrNull, node.v, hash, key, val)),
+ };
+ }
+ else {
+ // else there is currently no item at the hash index
+ const n = root.array.length;
+ // if the number of nodes is at the maximum, expand this node into an array node
+ if (n >= MAX_INDEX_NODE) {
+ // create a 32 length array for the new array node (one for each bit in the hash)
+ const nodes = new Array(32);
+ // create and insert a node for the new entry
+ const jdx = mask(hash, shift);
+ nodes[jdx] = assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf);
+ let j = 0;
+ let bitmap = root.bitmap;
+ // place each item in the index node into the correct spot in the array node
+ // loop through all 32 bits / array positions
+ for (let i = 0; i < 32; i++) {
+ if ((bitmap & 1) !== 0) {
+ const node = root.array[j++];
+ // turn any entries into index nodes
+ // since array nodes should only contain other nodes, not entries
+ if (node.type !== ENTRY) {
+ nodes[i] = node;
+ }
+ else {
+ nodes[i] = assocIndex(EMPTY, shift + SHIFT, getHash(node.k), node.k, node.v, addedLeaf);
+ }
+ }
+ // shift the bitmap to process the next bit
+ bitmap >>>= 1;
+ }
+ return {
+ type: ARRAY_NODE,
+ size: n + 1,
+ array: nodes,
+ };
+ }
+ else {
+ // else there is still space in this index node
+ // simply insert a new entry at the hash index
+ const newArray = spliceIn(root.array, idx, { type: ENTRY, k: key, v: val });
+ addedLeaf.val = true;
+ return {
+ type: INDEX_NODE,
+ bitmap: root.bitmap | bit,
+ array: newArray,
+ };
+ }
+ }
+}
+function assocCollision(root, shift, hash, key, val, addedLeaf) {
+ // if there is a hash collision
+ if (hash === root.hash) {
+ const idx = collisionIndexOf(root, key);
+ // if this key already exists replace the entry with the new value
+ if (idx !== -1) {
+ const entry = root.array[idx];
+ if (entry.v === val) {
+ return root;
+ }
+ return {
+ type: COLLISION_NODE,
+ hash: hash,
+ array: cloneAndSet(root.array, idx, { type: ENTRY, k: key, v: val }),
+ };
+ }
+ // otherwise insert the entry at the end of the array
+ const size = root.array.length;
+ addedLeaf.val = true;
+ return {
+ type: COLLISION_NODE,
+ hash: hash,
+ array: cloneAndSet(root.array, size, { type: ENTRY, k: key, v: val }),
+ };
+ }
+ // if there is no hash collision, upgrade to an index node
+ return assoc({
+ type: INDEX_NODE,
+ bitmap: bitpos(root.hash, shift),
+ array: [root],
+ }, shift, hash, key, val, addedLeaf);
+}
+/** Find the index of a key in the collision node's array */
+function collisionIndexOf(root, key) {
+ const size = root.array.length;
+ for (let i = 0; i < size; i++) {
+ if (isEqual(key, root.array[i].k)) {
+ return i;
+ }
+ }
+ return -1;
+}
+/** Return the found entry or undefined if not present in the root */
+function find(root, shift, hash, key) {
+ switch (root.type) {
+ case ARRAY_NODE:
+ return findArray(root, shift, hash, key);
+ case INDEX_NODE:
+ return findIndex(root, shift, hash, key);
+ case COLLISION_NODE:
+ return findCollision(root, shift, hash, key);
+ }
+}
+function findArray(root, shift, hash, key) {
+ const idx = mask(hash, shift);
+ const node = root.array[idx];
+ if (node === undefined) {
+ return undefined;
+ }
+ return find(node, shift + SHIFT, hash, key);
+}
+function findIndex(root, shift, hash, key) {
+ const bit = bitpos(hash, shift);
+ if ((root.bitmap & bit) === 0) {
+ return undefined;
+ }
+ const idx = index(root.bitmap, bit);
+ const node = root.array[idx];
+ if (node.type !== ENTRY) {
+ return find(node, shift + SHIFT, hash, key);
+ }
+ if (isEqual(key, node.k)) {
+ return node;
+ }
+ return undefined;
+}
+function findCollision(root, _shift, _hash, key) {
+ const idx = collisionIndexOf(root, key);
+ if (idx < 0) {
+ return undefined;
+ }
+ return root.array[idx];
+}
+/**
+* Remove an entry from the root, returning the updated root.
+* Returns undefined if the node should be removed from the parent.
+* */
+function without(root, shift, hash, key) {
+ switch (root.type) {
+ case ARRAY_NODE:
+ return withoutArray(root, shift, hash, key);
+ case INDEX_NODE:
+ return withoutIndex(root, shift, hash, key);
+ case COLLISION_NODE:
+ return withoutCollision(root, shift, hash, key);
+ }
+}
+function withoutArray(root, shift, hash, key) {
+ const idx = mask(hash, shift);
+ const node = root.array[idx];
+ if (node === undefined) {
+ return root; // already empty
+ }
+ const n = without(node, shift + SHIFT, hash, key);
+ if (n === node) {
+ return root; // no changes
+ }
+ // if the recursive call returned undefined the node should be removed
+ if (n === undefined) {
+ // if the number of child nodes is at the minimum, pack into an index node
+ if (root.size <= MIN_ARRAY_NODE) {
+ const arr = root.array;
+ const out = new Array(root.size - 1);
+ let i = 0;
+ let j = 0;
+ let bitmap = 0;
+ while (i < idx) {
+ const nv = arr[i];
+ if (nv !== undefined) {
+ out[j] = nv;
+ bitmap |= 1 << i;
+ ++j;
+ }
+ ++i;
+ }
+ ++i; // skip copying the removed node
+ while (i < arr.length) {
+ const nv = arr[i];
+ if (nv !== undefined) {
+ out[j] = nv;
+ bitmap |= 1 << i;
+ ++j;
+ }
+ ++i;
+ }
+ return {
+ type: INDEX_NODE,
+ bitmap: bitmap,
+ array: out,
+ };
+ }
+ return {
+ type: ARRAY_NODE,
+ size: root.size - 1,
+ array: cloneAndSet(root.array, idx, n),
+ };
+ }
+ return {
+ type: ARRAY_NODE,
+ size: root.size,
+ array: cloneAndSet(root.array, idx, n),
+ };
+}
+function withoutIndex(root, shift, hash, key) {
+ const bit = bitpos(hash, shift);
+ if ((root.bitmap & bit) === 0) {
+ return root; // already empty
+ }
+ const idx = index(root.bitmap, bit);
+ const node = root.array[idx];
+ // if the item is not an entry
+ if (node.type !== ENTRY) {
+ const n = without(node, shift + SHIFT, hash, key);
+ if (n === node) {
+ return root; // no changes
+ }
+ // if not undefined, the child node still has items, so update it
+ if (n !== undefined) {
+ return {
+ type: INDEX_NODE,
+ bitmap: root.bitmap,
+ array: cloneAndSet(root.array, idx, n),
+ };
+ }
+ // otherwise the child node should be removed
+ // if it was the only child node, remove this node from the parent
+ if (root.bitmap === bit) {
+ return undefined;
+ }
+ // otherwise just remove the child node
+ return {
+ type: INDEX_NODE,
+ bitmap: root.bitmap ^ bit,
+ array: spliceOut(root.array, idx),
+ };
+ }
+ // otherwise the item is an entry, remove it if the key matches
+ if (isEqual(key, node.k)) {
+ if (root.bitmap === bit) {
+ return undefined;
+ }
+ return {
+ type: INDEX_NODE,
+ bitmap: root.bitmap ^ bit,
+ array: spliceOut(root.array, idx),
+ };
+ }
+ return root;
+}
+function withoutCollision(root, _shift, _hash, key) {
+ const idx = collisionIndexOf(root, key);
+ // if the key not found, no changes
+ if (idx === -1) {
+ return root;
+ }
+ // otherwise the entry was found, remove it
+ // if it was the only entry in this node, remove the whole node
+ if (root.array.length === 1) {
+ return undefined;
+ }
+ // otherwise just remove the entry
+ return {
+ type: COLLISION_NODE,
+ hash: root.hash,
+ array: spliceOut(root.array, idx),
+ };
+}
+function toArray(root, result) {
+ if (root === undefined) {
+ return;
+ }
+ const items = root.array;
+ const size = items.length;
+ for (let i = 0; i < size; i++) {
+ const item = items[i];
+ if (item === undefined) {
+ continue;
+ }
+ if (item.type === ENTRY) {
+ result.push([item.k, item.v]);
+ continue;
+ }
+ toArray(item, result);
+ }
+}
+export class Map {
+ constructor(root, size) {
+ this.root = root;
+ this.size = size;
+ }
+}
+/** Extra wrapper to keep track of map size */
+export function create() {
+ return new Map(undefined, 0);
+}
+export function get(map, key) {
+ if (map.root === undefined) {
+ return undefined;
+ }
+ return find(map.root, 0, getHash(key), key)?.v;
+}
+export function getWithDefault(map, key, notFound) {
+ if (map.root === undefined) {
+ return notFound;
+ }
+ const found = find(map.root, 0, getHash(key), key);
+ if (found === undefined) {
+ return notFound;
+ }
+ return found.v;
+}
+export function set(map, key, val) {
+ const addedLeaf = { val: false };
+ const root = map.root === undefined ? EMPTY : map.root;
+ const newRoot = assoc(root, 0, getHash(key), key, val, addedLeaf);
+ if (newRoot === map.root) {
+ return map;
+ }
+ return new Map(newRoot, addedLeaf.val ? map.size + 1 : map.size);
+}
+export function remove(map, key) {
+ if (map.root === undefined) {
+ return map;
+ }
+ const newRoot = without(map.root, 0, getHash(key), key);
+ if (newRoot === map.root) {
+ return map;
+ }
+ if (newRoot === undefined) {
+ return create();
+ }
+ return new Map(newRoot, map.size - 1);
+}
+export function has(map, key) {
+ if (map.root === undefined) {
+ return false;
+ }
+ return find(map.root, 0, getHash(key), key) !== undefined;
+}
+export function entries(map) {
+ if (map.root === undefined) {
+ return [];
+ }
+ const result = [];
+ toArray(map.root, result);
+ return result;
+}
+export function __include_me() {
+ // blank
+}