2 * simple program which outputs a hash-table of `icons_ext` with low collusion.
3 * the hash function is case-insensitive, it also doesn't hash beyond the
4 * length of the longest extension.
11 #define GOLDEN_RATIO_32 UINT32_C(2654442313) /* golden ratio for 32bits: (2^32) / 1.61803 */
12 #define GOLDEN_RATIO_64 UINT64_C(0x9E3793492EEDC3F7)
13 #define ICONS_TABLE_SIZE 8 /* size in bits. 8 = 256 */
16 #define TOUPPER(ch) (((ch) >= 'a' && (ch) <= 'z') ? ((ch) - 'a' + 'A') : (ch))
19 /* all of this is just for the static hash-table generation. only the hash
20 * function gets included in `nnn` binary.
31 #error "The hash-table generator relies on assert() to verify correctness."
34 #define ASSERT(X) assert(X)
35 #define ARRLEN(X) (sizeof(X) / sizeof((X)[0]))
36 #define MAX(A, B) ((A) > (B) ? (A) : (B))
37 #define HGEN_ITERARATION (1ul << 13)
38 #define ICONS_PROBE_MAX_ALLOWED 6
39 #define ICONS_MATCH_MAX (512)
41 #if 0 /* for logging some interesting info to stderr */
42 #define log(...) fprintf(stderr, "[INFO]: " __VA_ARGS__)
44 #define log(...) ((void)0)
47 static uint32_t icon_ext_hash(const char *s);
49 /* change ICONS_TABLE_SIZE to increase the size of the table */
50 static struct icon_pair table[1u << ICONS_TABLE_SIZE];
51 static uint8_t seen[ARRLEN(table)];
52 /* arbitrarily picked starting position. change if needed.
53 * but ensure they're above 1 and prefer prime numbers.
55 static uint32_t hash_start = 7;
56 static uint32_t hash_mul = 251; /* unused as of now */
59 * use robin-hood insertion to reduce the max probe length
62 rh_insert(const struct icon_pair item, uint32_t idx, uint32_t n)
65 for (uint32_t tries = 0; tries < ARRLEN(table); ++tries, ++n) {
67 struct icon_pair tmp_item = table[idx];
68 uint32_t tmp_n = seen[idx];
70 assert(n < (uint8_t)-1);
74 if (tmp_n != 0) /* the slot we inserted to wasn't empty */
75 rh_insert(tmp_item, idx, tmp_n);
78 idx = (idx + 1) % ARRLEN(table);
80 assert(0); /* unreachable */
83 enum { PROBE_MAX, PROBE_TOTAL, PROBE_CNT };
85 table_populate(unsigned int p[static PROBE_CNT])
87 memset(seen, 0x0, sizeof seen);
88 memset(table, 0x0, sizeof table);
89 for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
90 if (icons_ext[i].icon[0] == '\0') /* skip empty entries */
92 uint32_t h = icon_ext_hash(icons_ext[i].match);
93 rh_insert(icons_ext[i], h, 1);
96 p[PROBE_MAX] = p[PROBE_TOTAL] = 0;
97 for (size_t i = 0; i < ARRLEN(seen); ++i) {
98 p[PROBE_MAX] = MAX(p[PROBE_MAX], seen[i]);
99 p[PROBE_TOTAL] += seen[i];
104 /* permuted congruential generator */
108 uint64_t oldstate = *state;
109 *state *= GOLDEN_RATIO_64;
110 uint32_t r = (oldstate >> 59);
111 uint32_t v = (oldstate ^ (oldstate >> 18)) >> 27;
112 return (v >> (-r & 31)) | (v << r);
118 assert(ARRLEN(icons_ext) <= ARRLEN(table));
119 assert(ICONS_TABLE_SIZE < 16);
120 assert(1u << ICONS_TABLE_SIZE == ARRLEN(table));
121 assert((GOLDEN_RATIO_32 & 1) == 1); /* must be odd */
122 assert((GOLDEN_RATIO_64 & 1) == 1); /* must be odd */
123 assert(hash_start > 1);
124 assert(hash_mul > 1);
125 /* ensure power of 2 hashtable size which allows compiler to optimize
126 * away mod (`%`) operations
128 assert((ARRLEN(table) & (ARRLEN(table) - 1)) == 0);
130 unsigned int max_probe = (unsigned)-1;
131 uint32_t best_hash_start = 0, best_hash_mul = 0, best_total_probe = 9999;
132 uint64_t hash_start_rng = hash_start, hash_mul_rng = hash_mul;
134 for (size_t i = 0; i < HGEN_ITERARATION; ++i) {
135 unsigned *p = table_populate((unsigned [PROBE_CNT]){0});
136 if (p[PROBE_MAX] < max_probe ||
137 (p[PROBE_MAX] == max_probe && p[PROBE_TOTAL] < best_total_probe))
139 max_probe = p[PROBE_MAX];
140 best_total_probe = p[PROBE_TOTAL];
141 best_hash_start = hash_start;
142 best_hash_mul = hash_mul;
144 hash_start = pcg(&hash_start_rng);
145 hash_mul = pcg(&hash_mul_rng);
147 assert(max_probe < ICONS_PROBE_MAX_ALLOWED);
148 hash_start = best_hash_start;
149 hash_mul = best_hash_mul;
151 unsigned *p = table_populate((unsigned [PROBE_CNT]){0});
152 assert(p[PROBE_MAX] == max_probe);
153 assert(p[PROBE_TOTAL] == best_total_probe);
158 unsigned int total_probe = 0;
159 for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
160 if (icons_ext[i].icon[0] == 0)
162 uint32_t found = 0, h = icon_ext_hash(icons_ext[i].match);
163 for (uint32_t k = 0; k < max_probe; ++k) {
164 uint32_t z = (h + k) % ARRLEN(table);
166 if (table[z].match && strcasecmp(icons_ext[i].match, table[z].match) == 0) {
174 assert(total_probe == best_total_probe);
176 size_t match_max = 0, icon_max = 0;
177 for (size_t i = 0; i < ARRLEN(icons_name); ++i) {
178 match_max = MAX(match_max, strlen(icons_name[i].match) + 1);
179 icon_max = MAX(icon_max, strlen(icons_name[i].icon) + 1);
181 for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
182 match_max = MAX(match_max, strlen(icons_ext[i].match) + 1);
183 icon_max = MAX(icon_max, strlen(icons_ext[i].icon) + 1);
185 icon_max = MAX(icon_max, strlen(dir_icon.icon) + 1);
186 icon_max = MAX(icon_max, strlen(exec_icon.icon) + 1);
187 icon_max = MAX(icon_max, strlen(file_icon.icon) + 1);
188 assert(icon_max < ICONS_MATCH_MAX);
190 const char *uniq[ARRLEN(icons_ext)] = {0};
191 size_t uniq_head = 0;
192 for (size_t i = 0; i < ARRLEN(icons_ext); ++i) {
193 if (icons_ext[i].icon[0] == 0)
196 for (size_t k = 0; k < uniq_head; ++k) {
197 if (strcasecmp(uniq[k], icons_ext[i].icon) == 0) {
203 assert(uniq_head < ARRLEN(uniq));
204 uniq[uniq_head++] = icons_ext[i].icon;
207 assert(uniq_head < (unsigned char)-1);
209 log("load-factor: %.2f (%u/%zu)\n", (nitems * 100.0) / (double)ARRLEN(table),
210 (unsigned int)nitems, ARRLEN(table));
211 log("max_probe : %6u\n", max_probe);
212 log("total_probe: %6u\n", total_probe);
213 log("uniq icons : %6zu\n", uniq_head);
214 log("no-compact : %6zu bytes\n", ARRLEN(table) * icon_max);
215 log("compaction : %6zu bytes\n", uniq_head * icon_max + ARRLEN(table));
216 log("hash_start : %6" PRIu32 "\n", hash_start);
217 log("hash_mul : %6" PRIu32 "\n", hash_mul);
219 printf("#ifndef INCLUDE_ICONS_GENERATED\n");
220 printf("#define INCLUDE_ICONS_GENERATED\n\n");
222 printf("/*\n * NOTE: This file is automatically generated.\n");
223 printf(" * DO NOT EDIT THIS FILE DIRECTLY.\n");
224 printf(" * Use `icons.h` to customize icons\n */\n\n");
226 printf("#define hash_start UINT32_C(%" PRIu32 ")\n", hash_start);
227 printf("#define hash_mul UINT32_C(%" PRIu32 ")\n\n", hash_mul);
228 printf("#define ICONS_PROBE_MAX %uu\n", max_probe);
229 printf("#define ICONS_MATCH_MAX %zuu\n\n", match_max);
230 printf("#define ICONS_STR_MAX %zuu\n\n", icon_max);
232 printf("struct icon_pair { const char match[ICONS_MATCH_MAX]; "
233 "const char icon[ICONS_STR_MAX]; unsigned char color; };\n\n");
235 printf("static const char icons_ext_uniq[%zu][ICONS_STR_MAX] = {\n", uniq_head);
236 for (size_t i = 0; i < uniq_head; ++i)
237 printf("\t\"%s\",\n", uniq[i]);
240 printf("static const struct {\n\tconst char match[ICONS_MATCH_MAX];\n"
241 "\tunsigned char idx;\n\tunsigned char color;\n} icons_ext[%zu] = {\n",
243 for (size_t i = 0; i < ARRLEN(table); ++i) {
244 if (table[i].icon == NULL || table[i].icon[0] == '\0') /* skip empty entries */
247 for (k = 0; k < uniq_head; ++k) {
248 if (strcasecmp(table[i].icon, uniq[k]) == 0)
251 assert(k < uniq_head);
252 printf("\t[%3zu] = {\"%s\", %zu, %hhu },\n",
253 i, table[i].match, k, table[i].color);
257 printf("#endif /* INCLUDE_ICONS_GENERATED */\n");
261 #define ASSERT(X) ((void)0)
262 #endif /* ICONS_GENERATE */
264 #if defined(ICONS_GENERATE) || defined(ICONS_ENABLED)
266 icon_ext_hash(const char *str)
268 uint32_t i, hash = hash_start;
269 enum { wsz = sizeof hash * CHAR_BIT, z = wsz - ICONS_TABLE_SIZE, r = 5 };
271 /* just an xor-rotate hash. in general, this is a horrible hash
272 * function but for our specific input it works fine while being
273 * computationally cheap.
275 for (i = 0; i < ICONS_MATCH_MAX && str[i] != '\0'; ++i) {
276 hash ^= TOUPPER((unsigned char)str[i]);
277 hash = (hash >> (wsz - r)) | (hash << r);
280 /* finalizer: https://probablydance.com/2018/06/16 */
282 hash *= GOLDEN_RATIO_32;
285 ASSERT(hash < ARRLEN(table));