--- /dev/null
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+while (<>) {
+ next if /^#/ or /^$/;
+ my @range = split " ";
+ @range = split /\.\./, $range[0];
+ my $i = hex $range[0];
+ my $end = ($#range > 0) ? hex($range[1]) : $i;
+ for (; $i <= $end; $i++) {
+ printf "\t0x%06x,\n", $i;
+ };
+};
#include <fcntl.h>
#include <limits.h>
#include <pwd.h>
+#include <signal.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <signal.h>
#include <sys/ioctl.h>
#include <sys/select.h>
#include <sys/types.h>
return utfbyte[i] | (u & ~utfmask[i]);
}
+// ./list-emojis <libgrapheme/data/emoji-data.txt >emojis.c.in
+#include "emojis.c.in"
+
+static int
+uint32comparator(const void *a, const void *b)
+{
+ return *(uint32_t *)a - *(uint32_t *)b;
+}
+
+int
+isEmoji(const Rune u)
+{
+ const uint32_t want = (uint32_t)u;
+ return bsearch(
+ &want,
+ emojis,
+ (sizeof emojis)/sizeof(uint32_t),
+ sizeof(uint32_t),
+ uint32comparator) != NULL;
+}
+
size_t
utf8validate(Rune *u, size_t i)
{
- if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
+ if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF) || isEmoji(*u))
*u = UTF_INVALID;
for (i = 1; *u > utfmax[i]; ++i)
;