diff options
author | Julian Schurhammer <julian.schurhammer@gmail.com> | 2022-07-23 09:33:25 +1200 |
---|---|---|
committer | Louis Pilfold <louis@lpil.uk> | 2023-03-13 10:48:26 +0000 |
commit | 1b305abbecbcd9004439ed9418077b367ff6b0b0 (patch) | |
tree | ba61bae239de89ca54e4e5af4f889c5ec43aa18d | |
parent | 589a74661749626a4a7b8a5e27b536430ed680af (diff) | |
download | gleam_stdlib-1b305abbecbcd9004439ed9418077b367ff6b0b0.tar.gz gleam_stdlib-1b305abbecbcd9004439ed9418077b367ff6b0b0.zip |
initial implementation of 'hash array mapped trie' structure for persistent maps
-rw-r--r-- | src/gleam_stdlib.mjs | 97 | ||||
-rw-r--r-- | src/persistent-hash-map.mjs | 527 |
2 files changed, 543 insertions, 81 deletions
diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 60ab625..9dca761 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -15,10 +15,10 @@ import { } from "./gleam/regex.mjs"; import { DecodeError } from "./gleam/dynamic.mjs"; import { Some, None } from "./gleam/option.mjs"; - -const HASHCODE_CACHE = new WeakMap(); +import * as pmap from "./persistent-hash-map.mjs"; const Nil = undefined; +const NOT_FOUND = {}; export function identity(x) { return x; @@ -395,71 +395,8 @@ export function regex_scan(regex, string) { return List.fromArray(matches); } -class Map { - static #hashcode_cache = new WeakMap(); - - static hash(value) { - let existing = this.#hashcode_cache.get(value); - if (existing) { - return existing; - } else if (value instanceof Object) { - let hashcode = inspect(value); - HASHCODE_CACHE.set(value, hashcode); - return hashcode; - } else { - return value.toString(); - } - } - - constructor() { - this.entries = new globalThis.Map(); - } - - get size() { - return this.entries.size; - } - - inspect() { - let entries = [...this.entries.values()] - .map((pair) => inspect(pair)) - .join(", "); - return `map.from_list([${entries}])`; - } - - copy() { - let map = new Map(); - map.entries = new globalThis.Map(this.entries); - return map; - } - - toList() { - return List.fromArray([...this.entries.values()]); - } - - insert(k, v) { - let map = this.copy(); - map.entries.set(Map.hash(k), [k, v]); - return map; - } - - delete(k) { - let map = this.copy(); - map.entries.delete(Map.hash(k)); - return map; - } - - get(key) { - let code = Map.hash(key); - if (this.entries.has(code)) { - return new Ok(this.entries.get(code)[1]); - } else { - return new Error(Nil); - } - } -} - export function new_map() { - return new Map(); + return pmap.create(); } export function map_size(map) { @@ -467,19 +404,23 @@ export function map_size(map) { } export function map_to_list(map) { - return map.toList(); + return List.fromArray(pmap.entries(map)); } -export function map_remove(k, map) { - return map.delete(k); +export function map_remove(key, map) { + return pmap.remove(map, key); } export function map_get(map, key) { - return map.get(key); + const value = pmap.getWithDefault(map, key, NOT_FOUND); + if (value === NOT_FOUND) { + return new Error(Nil); + } + return new Ok(value); } export function map_insert(key, value, map) { - return map.insert(key, value); + return pmap.set(map, key, value); } function unsafe_percent_decode(string) { @@ -610,7 +551,7 @@ export function classify_dynamic(data) { return `Tuple of ${data.length} elements`; } else if (BitString.isBitString(data)) { return "BitString"; - } else if (data instanceof Map) { + } else if (data instanceof pmap.Map) { return "Map"; } else if (typeof data === "number") { return "Float"; @@ -680,13 +621,7 @@ export function decode_result(data) { } export function decode_map(data) { - if (data instanceof Map) { - return new Ok(data) - } - if (typeof data === 'object' && data !== null && Object.getPrototypeOf(data) == Object.getPrototypeOf({})) { - return new Ok(new Map(Object.entries(data))) - } - return decoder_error("Map", data); + return data instanceof pmap.Map ? new Ok(data) : decoder_error("Map", data); } export function decode_option(data, decoder) { @@ -703,8 +638,8 @@ export function decode_option(data, decoder) { export function decode_field(value, name) { let error = () => decoder_error_no_classify("field", "nothing"); - if (value instanceof Map) { - let entry = value.get(name); + if (value instanceof pmap.Map) { + let entry = map_get(value, name); return entry.isOk() ? entry : error(); } try { diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs new file mode 100644 index 0000000..b65621d --- /dev/null +++ b/src/persistent-hash-map.mjs @@ -0,0 +1,527 @@ +import { + inspect, + isEqual +} from "./gleam.mjs"; +function getHash(s) { + if (typeof s === "number") return s; + if (typeof s !== "string") s = inspect(s); + let hash = 0; + const len = s.length; + for (let i = 0; i < len; i++) { + hash = (Math.imul(31, hash) + s.charCodeAt(i)) | 0; + } + return hash; +} +const SHIFT = 5; // number of bits you need to shift by to get the next bucket +const BUCKET_SIZE = Math.pow(2, SHIFT); +const MASK = BUCKET_SIZE - 1; // used to zero out all bits not in the bucket +const MAX_INDEX_NODE = BUCKET_SIZE / 2; // when does index node grow into array node +const MIN_ARRAY_NODE = BUCKET_SIZE / 4; // when does array node shrink to index node +const ENTRY = 0; +const ARRAY_NODE = 1; +const INDEX_NODE = 2; +const COLLISION_NODE = 3; +const EMPTY = { + type: INDEX_NODE, + bitmap: 0, + array: [], +}; +/** Mask the hash to get only the bucket corresponding to shift */ +function mask(hash, shift) { + return (hash >>> shift) & MASK; +} +/** Set only the Nth bit where N is the masked hash */ +function bitpos(hash, shift) { + return 1 << mask(hash, shift); +} +/** Count the number of 1 bits in a number */ +function bitcount(x) { + x -= (x >> 1) & 0x55555555; + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x + (x >> 4)) & 0x0f0f0f0f; + x += x >> 8; + x += x >> 16; + return x & 0x7f; +} +/** Calculate the array index of an item in a bitmap index node */ +function index(bitmap, bit) { + return bitcount(bitmap & (bit - 1)); +} +/** Efficiently copy an array and set one value at an index */ +function cloneAndSet(arr, at, val) { + const len = arr.length; + const out = new Array(len); + for (let i = 0; i < len; ++i) { + out[i] = arr[i]; + } + out[at] = val; + return out; +} +/** Efficiently copy an array and insert one value at an index */ +function spliceIn(arr, at, val) { + const len = arr.length; + const out = new Array(len + 1); + let i = 0; + let g = 0; + while (i < at) { + out[g++] = arr[i++]; + } + out[g++] = val; + while (i < len) { + out[g++] = arr[i++]; + } + return out; +} +/** Efficiently copy an array and remove one value at an index */ +function spliceOut(arr, at) { + const len = arr.length; + const out = new Array(len - 1); + let i = 0; + let g = 0; + while (i < at) { + out[g++] = arr[i++]; + } + ++i; + while (i < len) { + out[g++] = arr[i++]; + } + return out; +} +/** Create a new node containing two entries */ +function createNode(shift, key1, val1, key2hash, key2, val2) { + const key1hash = getHash(key1); + if (key1hash === key2hash) { + return { + type: COLLISION_NODE, + hash: key1hash, + array: [ + { type: ENTRY, k: key1, v: val1 }, + { type: ENTRY, k: key2, v: val2 }, + ], + }; + } + const addedLeaf = { val: false }; + return assoc(assocIndex(EMPTY, shift, key1hash, key1, val1, addedLeaf), shift, key2hash, key2, val2, addedLeaf); +} +/** Associate a node with a new entry, creating a new node. */ +function assoc(root, shift, hash, key, val, addedLeaf) { + switch (root.type) { + case ARRAY_NODE: + return assocArray(root, shift, hash, key, val, addedLeaf); + case INDEX_NODE: + return assocIndex(root, shift, hash, key, val, addedLeaf); + case COLLISION_NODE: + return assocCollision(root, shift, hash, key, val, addedLeaf); + } +} +function assocArray(root, shift, hash, key, val, addedLeaf) { + const idx = mask(hash, shift); + const node = root.array[idx]; + // if the corresponding index is empty set the index to a newly created node + if (node === undefined) { + return { + type: ARRAY_NODE, + size: root.size + 1, + array: cloneAndSet(root.array, idx, assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf)), + }; + } + // otherwise call assoc on the child node + const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf); + // if the child node hasn't changed just return the old root + if (n === node) { + return root; + } + // otherwise set the index to the new node + return { + type: ARRAY_NODE, + size: root.size, + array: cloneAndSet(root.array, idx, n), + }; +} +function assocIndex(root, shift, hash, key, val, addedLeaf) { + const bit = bitpos(hash, shift); + const idx = index(root.bitmap, bit); + // if there is already a item at this hash index.. + if ((root.bitmap & bit) !== 0) { + // if there is a node at the index (not an entry), call assoc on the child node + const node = root.array[idx]; + if (node.type !== ENTRY) { + const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf); + if (n === node) { + return root; + } + return { + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet(root.array, idx, n), + }; + } + // otherwise there is an entry at the index + // if the keys are equal replace the entry with the updated value + const keyOrNull = node.k; + if (isEqual(key, keyOrNull)) { + if (val === node.v) { + return root; + } + return { + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet(root.array, idx, { type: ENTRY, k: keyOrNull, v: val }), + }; + } + // if the keys are not equal, replace the entry with a new child node + addedLeaf.val = true; + return { + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet(root.array, idx, createNode(shift + SHIFT, keyOrNull, node.v, hash, key, val)), + }; + } + else { + // else there is currently no item at the hash index + const n = root.array.length; + // if the number of nodes is at the maximum, expand this node into an array node + if (n >= MAX_INDEX_NODE) { + // create a 32 length array for the new array node (one for each bit in the hash) + const nodes = new Array(32); + // create and insert a node for the new entry + const jdx = mask(hash, shift); + nodes[jdx] = assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf); + let j = 0; + let bitmap = root.bitmap; + // place each item in the index node into the correct spot in the array node + // loop through all 32 bits / array positions + for (let i = 0; i < 32; i++) { + if ((bitmap & 1) !== 0) { + const node = root.array[j++]; + // turn any entries into index nodes + // since array nodes should only contain other nodes, not entries + if (node.type !== ENTRY) { + nodes[i] = node; + } + else { + nodes[i] = assocIndex(EMPTY, shift + SHIFT, getHash(node.k), node.k, node.v, addedLeaf); + } + } + // shift the bitmap to process the next bit + bitmap >>>= 1; + } + return { + type: ARRAY_NODE, + size: n + 1, + array: nodes, + }; + } + else { + // else there is still space in this index node + // simply insert a new entry at the hash index + const newArray = spliceIn(root.array, idx, { type: ENTRY, k: key, v: val }); + addedLeaf.val = true; + return { + type: INDEX_NODE, + bitmap: root.bitmap | bit, + array: newArray, + }; + } + } +} +function assocCollision(root, shift, hash, key, val, addedLeaf) { + // if there is a hash collision + if (hash === root.hash) { + const idx = collisionIndexOf(root, key); + // if this key already exists replace the entry with the new value + if (idx !== -1) { + const entry = root.array[idx]; + if (entry.v === val) { + return root; + } + return { + type: COLLISION_NODE, + hash: hash, + array: cloneAndSet(root.array, idx, { type: ENTRY, k: key, v: val }), + }; + } + // otherwise insert the entry at the end of the array + const size = root.array.length; + addedLeaf.val = true; + return { + type: COLLISION_NODE, + hash: hash, + array: cloneAndSet(root.array, size, { type: ENTRY, k: key, v: val }), + }; + } + // if there is no hash collision, upgrade to an index node + return assoc({ + type: INDEX_NODE, + bitmap: bitpos(root.hash, shift), + array: [root], + }, shift, hash, key, val, addedLeaf); +} +/** Find the index of a key in the collision node's array */ +function collisionIndexOf(root, key) { + const size = root.array.length; + for (let i = 0; i < size; i++) { + if (isEqual(key, root.array[i].k)) { + return i; + } + } + return -1; +} +/** Return the found entry or undefined if not present in the root */ +function find(root, shift, hash, key) { + switch (root.type) { + case ARRAY_NODE: + return findArray(root, shift, hash, key); + case INDEX_NODE: + return findIndex(root, shift, hash, key); + case COLLISION_NODE: + return findCollision(root, shift, hash, key); + } +} +function findArray(root, shift, hash, key) { + const idx = mask(hash, shift); + const node = root.array[idx]; + if (node === undefined) { + return undefined; + } + return find(node, shift + SHIFT, hash, key); +} +function findIndex(root, shift, hash, key) { + const bit = bitpos(hash, shift); + if ((root.bitmap & bit) === 0) { + return undefined; + } + const idx = index(root.bitmap, bit); + const node = root.array[idx]; + if (node.type !== ENTRY) { + return find(node, shift + SHIFT, hash, key); + } + if (isEqual(key, node.k)) { + return node; + } + return undefined; +} +function findCollision(root, _shift, _hash, key) { + const idx = collisionIndexOf(root, key); + if (idx < 0) { + return undefined; + } + return root.array[idx]; +} +/** +* Remove an entry from the root, returning the updated root. +* Returns undefined if the node should be removed from the parent. +* */ +function without(root, shift, hash, key) { + switch (root.type) { + case ARRAY_NODE: + return withoutArray(root, shift, hash, key); + case INDEX_NODE: + return withoutIndex(root, shift, hash, key); + case COLLISION_NODE: + return withoutCollision(root, shift, hash, key); + } +} +function withoutArray(root, shift, hash, key) { + const idx = mask(hash, shift); + const node = root.array[idx]; + if (node === undefined) { + return root; // already empty + } + const n = without(node, shift + SHIFT, hash, key); + if (n === node) { + return root; // no changes + } + // if the recursive call returned undefined the node should be removed + if (n === undefined) { + // if the number of child nodes is at the minimum, pack into an index node + if (root.size <= MIN_ARRAY_NODE) { + const arr = root.array; + const out = new Array(root.size - 1); + let i = 0; + let j = 0; + let bitmap = 0; + while (i < idx) { + const nv = arr[i]; + if (nv !== undefined) { + out[j] = nv; + bitmap |= 1 << i; + ++j; + } + ++i; + } + ++i; // skip copying the removed node + while (i < arr.length) { + const nv = arr[i]; + if (nv !== undefined) { + out[j] = nv; + bitmap |= 1 << i; + ++j; + } + ++i; + } + return { + type: INDEX_NODE, + bitmap: bitmap, + array: out, + }; + } + return { + type: ARRAY_NODE, + size: root.size - 1, + array: cloneAndSet(root.array, idx, n), + }; + } + return { + type: ARRAY_NODE, + size: root.size, + array: cloneAndSet(root.array, idx, n), + }; +} +function withoutIndex(root, shift, hash, key) { + const bit = bitpos(hash, shift); + if ((root.bitmap & bit) === 0) { + return root; // already empty + } + const idx = index(root.bitmap, bit); + const node = root.array[idx]; + // if the item is not an entry + if (node.type !== ENTRY) { + const n = without(node, shift + SHIFT, hash, key); + if (n === node) { + return root; // no changes + } + // if not undefined, the child node still has items, so update it + if (n !== undefined) { + return { + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet(root.array, idx, n), + }; + } + // otherwise the child node should be removed + // if it was the only child node, remove this node from the parent + if (root.bitmap === bit) { + return undefined; + } + // otherwise just remove the child node + return { + type: INDEX_NODE, + bitmap: root.bitmap ^ bit, + array: spliceOut(root.array, idx), + }; + } + // otherwise the item is an entry, remove it if the key matches + if (isEqual(key, node.k)) { + if (root.bitmap === bit) { + return undefined; + } + return { + type: INDEX_NODE, + bitmap: root.bitmap ^ bit, + array: spliceOut(root.array, idx), + }; + } + return root; +} +function withoutCollision(root, _shift, _hash, key) { + const idx = collisionIndexOf(root, key); + // if the key not found, no changes + if (idx === -1) { + return root; + } + // otherwise the entry was found, remove it + // if it was the only entry in this node, remove the whole node + if (root.array.length === 1) { + return undefined; + } + // otherwise just remove the entry + return { + type: COLLISION_NODE, + hash: root.hash, + array: spliceOut(root.array, idx), + }; +} +function toArray(root, result) { + if (root === undefined) { + return; + } + const items = root.array; + const size = items.length; + for (let i = 0; i < size; i++) { + const item = items[i]; + if (item === undefined) { + continue; + } + if (item.type === ENTRY) { + result.push([item.k, item.v]); + continue; + } + toArray(item, result); + } +} +export class Map { + constructor(root, size) { + this.root = root; + this.size = size; + } +} +/** Extra wrapper to keep track of map size */ +export function create() { + return new Map(undefined, 0); +} +export function get(map, key) { + if (map.root === undefined) { + return undefined; + } + return find(map.root, 0, getHash(key), key)?.v; +} +export function getWithDefault(map, key, notFound) { + if (map.root === undefined) { + return notFound; + } + const found = find(map.root, 0, getHash(key), key); + if (found === undefined) { + return notFound; + } + return found.v; +} +export function set(map, key, val) { + const addedLeaf = { val: false }; + const root = map.root === undefined ? EMPTY : map.root; + const newRoot = assoc(root, 0, getHash(key), key, val, addedLeaf); + if (newRoot === map.root) { + return map; + } + return new Map(newRoot, addedLeaf.val ? map.size + 1 : map.size); +} +export function remove(map, key) { + if (map.root === undefined) { + return map; + } + const newRoot = without(map.root, 0, getHash(key), key); + if (newRoot === map.root) { + return map; + } + if (newRoot === undefined) { + return create(); + } + return new Map(newRoot, map.size - 1); +} +export function has(map, key) { + if (map.root === undefined) { + return false; + } + return find(map.root, 0, getHash(key), key) !== undefined; +} +export function entries(map) { + if (map.root === undefined) { + return []; + } + const result = []; + toArray(map.root, result); + return result; +} +export function __include_me() { + // blank +} |