1 // Copyright © Tavian Barnes <tavianator@tavianator.com>
2 // SPDX-License-Identifier: 0BSD
5 * This is an implementation of a "qp trie," as documented at
6 * https://dotat.at/prog/qp/README.html
8 * An uncompressed trie over the dataset {AAAA, AADD, ABCD, DDAA, DDDD} would
12 * ●───→●───→●───→●───→○
18 * └───→●───→●───→●───→○
22 * A compressed (PATRICIA) trie collapses internal nodes that have only a single
36 * The nodes can be compressed further by dropping the actual compressed
37 * sequences from the nodes, storing it only in the leaves. This is the
38 * technique applied in QP tries, and the crit-bit trees that inspired them
39 * (https://cr.yp.to/critbit.html). Only the index to test, and the values to
40 * branch on, need to be stored in each node.
53 * Nodes are represented very compactly. Rather than a dense array of children,
54 * a sparse array of only the non-NULL children directly follows the node in
55 * memory. A bitmap is used to track which children exist.
58 * │ [4] [3] [2][1][0] ←─ children
60 * │ 14 10 6 3 0 ←─ sparse index
62 * │ 0100010001001001 ←─ bitmap
64 * │ To convert a sparse index to a dense index, mask off the bits above it, and
65 * │ count the remaining bits.
67 * │ 10 ←─ sparse index
69 * │ 0000001111111111 ←─ mask
70 * │ & 0100010001001001 ←─ bitmap
72 * │ = 0000000001001001
74 * │ [3] ←─ dense index
75 * └───────────────────┘
77 * This implementation tests a whole nibble (half byte/hex digit) at every
78 * branch, so the bitmap takes up 16 bits. The remainder of a machine word is
79 * used to hold the offset, which severely constrains its range on 32-bit
80 * platforms. As a workaround, we store relative instead of absolute offsets,
81 * and insert intermediate singleton "jump" nodes when necessary.
95 bfs_static_assert(CHAR_WIDTH == 8);
97 #if BFS_USE_TARGET_CLONES && (__i386__ || __x86_64__)
98 # define TARGET_CLONES_POPCNT __attribute__((target_clones("popcnt", "default")))
100 # define TARGET_CLONES_POPCNT
103 /** Number of bits for the sparse array bitmap, aka the range of a nibble. */
104 #define BITMAP_WIDTH 16
105 /** The number of remaining bits in a word, to hold the offset. */
106 #define OFFSET_WIDTH (SIZE_WIDTH - BITMAP_WIDTH)
107 /** The highest representable offset (only 64k on a 32-bit architecture). */
108 #define OFFSET_MAX (((size_t)1 << OFFSET_WIDTH) - 1)
111 * An internal node of the trie.
115 * A bitmap that hold which indices exist in the sparse children array.
116 * Bit i will be set if a child exists at logical index i, and its index
117 * into the array will be popcount(bitmap & ((1 << i) - 1)).
119 size_t bitmap : BITMAP_WIDTH;
122 * The offset into the key in nibbles. This is relative to the parent
123 * node, to support offsets larger than OFFSET_MAX.
125 size_t offset : OFFSET_WIDTH;
128 * Flexible array of children. Each pointer uses the lowest bit as a
129 * tag to distinguish internal nodes from leaves. This is safe as long
130 * as all dynamic allocations are aligned to more than a single byte.
132 uintptr_t children[];
135 /** Check if an encoded pointer is to a leaf. */
136 static bool trie_is_leaf(uintptr_t ptr) {
140 /** Decode a pointer to a leaf. */
141 static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) {
142 bfs_assert(trie_is_leaf(ptr));
143 return (struct trie_leaf *)(ptr ^ 1);
146 /** Encode a pointer to a leaf. */
147 static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) {
148 uintptr_t ptr = (uintptr_t)leaf ^ 1;
149 bfs_assert(trie_is_leaf(ptr));
153 /** Decode a pointer to an internal node. */
154 static struct trie_node *trie_decode_node(uintptr_t ptr) {
155 bfs_assert(!trie_is_leaf(ptr));
156 return (struct trie_node *)ptr;
159 /** Encode a pointer to an internal node. */
160 static uintptr_t trie_encode_node(const struct trie_node *node) {
161 uintptr_t ptr = (uintptr_t)node;
162 bfs_assert(!trie_is_leaf(ptr));
166 void trie_init(struct trie *trie) {
169 VARENA_INIT(&trie->nodes, struct trie_node, children);
170 VARENA_INIT(&trie->leaves, struct trie_leaf, key);
173 /** Extract the nibble at a certain offset from a byte sequence. */
174 static unsigned char trie_key_nibble(const void *key, size_t offset) {
175 const unsigned char *bytes = key;
176 size_t byte = offset >> 1;
178 // A branchless version of
180 // return bytes[byte] >> 4;
182 // return bytes[byte] & 0xF;
184 unsigned int shift = (offset & 1) << 2;
185 return (bytes[byte] >> shift) & 0xF;
189 * Finds a leaf in the trie that matches the key at every branch. If the key
190 * exists in the trie, the representative will match the searched key. But
191 * since only branch points are tested, it can be different from the key. In
192 * that case, the first mismatch between the key and the representative will be
193 * the depth at which to make a new branch to insert the key.
196 static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) {
197 uintptr_t ptr = trie->root;
203 while (!trie_is_leaf(ptr)) {
204 struct trie_node *node = trie_decode_node(ptr);
205 offset += node->offset;
207 unsigned int index = 0;
208 if ((offset >> 1) < length) {
209 unsigned char nibble = trie_key_nibble(key, offset);
210 unsigned int bit = 1U << nibble;
211 if (node->bitmap & bit) {
212 index = count_ones(node->bitmap & (bit - 1));
215 ptr = node->children[index];
218 return trie_decode_leaf(ptr);
221 struct trie_leaf *trie_find_str(const struct trie *trie, const char *key) {
222 return trie_find_mem(trie, key, strlen(key) + 1);
225 struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) {
226 struct trie_leaf *rep = trie_representative(trie, key, length);
227 if (rep && rep->length == length && memcmp(rep->key, key, length) == 0) {
234 struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) {
235 size_t length = strlen(key);
236 struct trie_leaf *rep = trie_representative(trie, key, length + 1);
237 if (rep && rep->length >= length && memcmp(rep->key, key, length) == 0) {
245 * Find a leaf that may end at the current node.
247 static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) {
248 // Finding a terminating NUL byte may take two nibbles
249 for (int i = 0; i < 2; ++i) {
250 if (!(node->bitmap & 1)) {
254 uintptr_t ptr = node->children[0];
255 if (trie_is_leaf(ptr)) {
256 return trie_decode_leaf(ptr);
258 node = trie_decode_node(ptr);
265 /** Check if a leaf is a prefix of a search key. */
266 static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *key, size_t length) {
267 if (leaf && leaf->length <= length) {
268 return memcmp(key + skip, leaf->key + skip, leaf->length - skip - 1) == 0;
275 static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const char *key) {
276 uintptr_t ptr = trie->root;
281 struct trie_leaf *best = NULL;
283 size_t length = strlen(key) + 1;
286 while (!trie_is_leaf(ptr)) {
287 struct trie_node *node = trie_decode_node(ptr);
288 offset += node->offset;
289 if ((offset >> 1) >= length) {
293 struct trie_leaf *leaf = trie_terminal_leaf(node);
294 if (trie_check_prefix(leaf, skip, key, length)) {
299 unsigned char nibble = trie_key_nibble(key, offset);
300 unsigned int bit = 1U << nibble;
301 if (node->bitmap & bit) {
302 unsigned int index = count_ones(node->bitmap & (bit - 1));
303 ptr = node->children[index];
309 struct trie_leaf *leaf = trie_decode_leaf(ptr);
310 if (trie_check_prefix(leaf, skip, key, length)) {
317 struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key) {
318 return trie_find_prefix_impl(trie, key);
321 /** Create a new leaf, holding a copy of the given key. */
322 static struct trie_leaf *trie_leaf_alloc(struct trie *trie, const void *key, size_t length) {
323 struct trie_leaf *leaf = varena_alloc(&trie->leaves, length);
328 LIST_APPEND(trie, leaf);
331 leaf->length = length;
332 memcpy(leaf->key, key, length);
338 static void trie_leaf_free(struct trie *trie, struct trie_leaf *leaf) {
339 LIST_REMOVE(trie, leaf);
340 varena_free(&trie->leaves, leaf, leaf->length);
343 /** Create a new node. */
344 static struct trie_node *trie_node_alloc(struct trie *trie, size_t size) {
345 bfs_assert(has_single_bit(size));
346 return varena_alloc(&trie->nodes, size);
349 /** Reallocate a trie node. */
350 static struct trie_node *trie_node_realloc(struct trie *trie, struct trie_node *node, size_t old_size, size_t new_size) {
351 bfs_assert(has_single_bit(old_size));
352 bfs_assert(has_single_bit(new_size));
353 return varena_realloc(&trie->nodes, node, old_size, new_size);
357 static void trie_node_free(struct trie *trie, struct trie_node *node, size_t size) {
358 bfs_assert(size == (size_t)count_ones(node->bitmap));
359 varena_free(&trie->nodes, node, size);
362 #if ENDIAN_NATIVE == ENDIAN_LITTLE
363 # define TRIE_BSWAP(n) (n)
364 #elif ENDIAN_NATIVE == ENDIAN_BIG
365 # define TRIE_BSWAP(n) bswap(n)
368 /** Find the offset of the first nibble that differs between two keys. */
369 static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t length) {
374 if (rep->length < length) {
375 length = rep->length;
378 const char *rep_bytes = rep->key;
379 const char *key_bytes = key;
382 for (size_t chunk = sizeof(chunk); i + chunk <= length; i += chunk) {
383 size_t rep_chunk, key_chunk;
384 memcpy(&rep_chunk, rep_bytes + i, sizeof(rep_chunk));
385 memcpy(&key_chunk, key_bytes + i, sizeof(key_chunk));
387 if (rep_chunk != key_chunk) {
389 size_t diff = TRIE_BSWAP(rep_chunk ^ key_chunk);
391 i += trailing_zeros(diff) / 4;
399 for (; i < length; ++i) {
400 unsigned char diff = rep_bytes[i] ^ key_bytes[i];
402 return 2 * i + !(diff & 0xF);
410 * Insert a leaf into a node. The node must not have a child in that position
411 * already. Effectively takes a subtrie like this:
420 * and transforms it to:
432 static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, unsigned char nibble) {
433 struct trie_node *node = trie_decode_node(*ptr);
434 unsigned int size = count_ones(node->bitmap);
436 // Double the capacity every power of two
437 if (has_single_bit(size)) {
438 node = trie_node_realloc(trie, node, size, 2 * size);
440 trie_leaf_free(trie, leaf);
443 *ptr = trie_encode_node(node);
446 unsigned int bit = 1U << nibble;
448 // The child must not already be present
449 bfs_assert(!(node->bitmap & bit));
452 unsigned int target = count_ones(node->bitmap & (bit - 1));
453 for (size_t i = size; i > target; --i) {
454 node->children[i] = node->children[i - 1];
456 node->children[target] = trie_encode_leaf(leaf);
461 * When the current offset exceeds OFFSET_MAX, insert "jump" nodes that bridge
462 * the gap. This function takes a subtrie like this:
476 * so that a new key can be inserted like:
485 static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key, size_t *offset) {
486 // We only ever need to jump to leaf nodes, since internal nodes are
487 // guaranteed to be within OFFSET_MAX anyway
488 bfs_assert(trie_is_leaf(*ptr));
490 struct trie_node *node = trie_node_alloc(trie, 1);
495 *offset += OFFSET_MAX;
496 node->offset = OFFSET_MAX;
498 unsigned char nibble = trie_key_nibble(key, *offset);
499 node->bitmap = 1 << nibble;
501 node->children[0] = *ptr;
502 *ptr = trie_encode_node(node);
503 return node->children;
507 * Split a node in the trie. Changes a subtrie like this:
523 static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, struct trie_leaf *rep, size_t offset, size_t mismatch) {
524 unsigned char key_nibble = trie_key_nibble(leaf->key, mismatch);
525 unsigned char rep_nibble = trie_key_nibble(rep->key, mismatch);
526 bfs_assert(key_nibble != rep_nibble);
528 struct trie_node *node = trie_node_alloc(trie, 2);
530 trie_leaf_free(trie, leaf);
534 node->bitmap = (1 << key_nibble) | (1 << rep_nibble);
536 size_t delta = mismatch - offset;
537 if (!trie_is_leaf(*ptr)) {
538 struct trie_node *child = trie_decode_node(*ptr);
539 child->offset -= delta;
541 node->offset = delta;
543 unsigned int key_index = key_nibble > rep_nibble;
544 node->children[key_index] = trie_encode_leaf(leaf);
545 node->children[key_index ^ 1] = *ptr;
546 *ptr = trie_encode_node(node);
550 struct trie_leaf *trie_insert_str(struct trie *trie, const char *key) {
551 return trie_insert_mem(trie, key, strlen(key) + 1);
555 static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key, size_t length) {
556 struct trie_leaf *rep = trie_representative(trie, key, length);
557 size_t mismatch = trie_mismatch(rep, key, length);
558 if (mismatch >= (length << 1)) {
562 struct trie_leaf *leaf = trie_leaf_alloc(trie, key, length);
568 trie->root = trie_encode_leaf(leaf);
573 uintptr_t *ptr = &trie->root;
574 while (!trie_is_leaf(*ptr)) {
575 struct trie_node *node = trie_decode_node(*ptr);
576 if (offset + node->offset > mismatch) {
579 offset += node->offset;
581 unsigned char nibble = trie_key_nibble(key, offset);
582 unsigned int bit = 1U << nibble;
583 if (node->bitmap & bit) {
584 bfs_assert(offset < mismatch);
585 unsigned int index = count_ones(node->bitmap & (bit - 1));
586 ptr = &node->children[index];
588 bfs_assert(offset == mismatch);
589 return trie_node_insert(trie, ptr, leaf, nibble);
593 while (mismatch - offset > OFFSET_MAX) {
594 ptr = trie_jump(trie, ptr, key, &offset);
596 trie_leaf_free(trie, leaf);
601 return trie_split(trie, ptr, leaf, rep, offset, mismatch);
604 struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length) {
605 return trie_insert_mem_impl(trie, key, length);
608 /** Free a chain of singleton nodes. */
609 static void trie_free_singletons(struct trie *trie, uintptr_t ptr) {
610 while (!trie_is_leaf(ptr)) {
611 struct trie_node *node = trie_decode_node(ptr);
613 // Make sure the bitmap is a power of two, i.e. it has just one child
614 bfs_assert(has_single_bit(node->bitmap));
616 ptr = node->children[0];
617 trie_node_free(trie, node, 1);
620 trie_leaf_free(trie, trie_decode_leaf(ptr));
624 * Try to collapse a two-child node like:
629 * *----->*----->*----->leaf
640 static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_node *parent_node, unsigned int child_index) {
641 uintptr_t other = parent_node->children[child_index ^ 1];
642 if (!trie_is_leaf(other)) {
643 struct trie_node *other_node = trie_decode_node(other);
644 if (other_node->offset + parent_node->offset <= OFFSET_MAX) {
645 other_node->offset += parent_node->offset;
652 trie_node_free(trie, parent_node, 1);
657 static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) {
658 uintptr_t *child = &trie->root;
659 uintptr_t *parent = NULL;
660 unsigned int child_bit = 0, child_index = 0;
662 while (!trie_is_leaf(*child)) {
663 struct trie_node *node = trie_decode_node(*child);
664 offset += node->offset;
665 bfs_assert((offset >> 1) < leaf->length);
667 unsigned char nibble = trie_key_nibble(leaf->key, offset);
668 unsigned int bit = 1U << nibble;
669 unsigned int bitmap = node->bitmap;
670 bfs_assert(bitmap & bit);
671 unsigned int index = count_ones(bitmap & (bit - 1));
673 // Advance the parent pointer, unless this node had only one child
674 if (!has_single_bit(bitmap)) {
680 child = &node->children[index];
683 bfs_assert(trie_decode_leaf(*child) == leaf);
686 trie_free_singletons(trie, trie->root);
691 struct trie_node *node = trie_decode_node(*parent);
692 child = node->children + child_index;
693 trie_free_singletons(trie, *child);
695 node->bitmap ^= child_bit;
696 unsigned int parent_size = count_ones(node->bitmap);
697 bfs_assert(parent_size > 0);
698 if (parent_size == 1 && trie_collapse_node(trie, parent, node, child_index) == 0) {
702 if (child_index < parent_size) {
703 memmove(child, child + 1, (parent_size - child_index) * sizeof(*child));
706 if (has_single_bit(parent_size)) {
707 node = trie_node_realloc(trie, node, 2 * parent_size, parent_size);
709 *parent = trie_encode_node(node);
714 void trie_remove(struct trie *trie, struct trie_leaf *leaf) {
715 trie_remove_impl(trie, leaf);
718 void trie_clear(struct trie *trie) {
722 varena_clear(&trie->leaves);
723 varena_clear(&trie->nodes);
726 void trie_destroy(struct trie *trie) {
727 varena_destroy(&trie->leaves);
728 varena_destroy(&trie->nodes);