lib/PublicInbox/Filter/RubyLang.pm
lib/PublicInbox/Filter/SubjectTag.pm
lib/PublicInbox/Filter/Vger.pm
+lib/PublicInbox/Gcf2.pm
lib/PublicInbox/GetlineBody.pm
lib/PublicInbox/Git.pm
lib/PublicInbox/GitAsyncCat.pm
lib/PublicInbox/WwwStream.pm
lib/PublicInbox/WwwText.pm
lib/PublicInbox/Xapcmd.pm
+lib/PublicInbox/gcf2_libgit2.h
sa_config/Makefile
sa_config/README
sa_config/root/etc/spamassassin/public-inbox.pre
t/filter_rubylang.t
t/filter_subjecttag.t
t/filter_vger.t
+t/gcf2.t
t/git-http-backend.psgi
t/git.fast-import-data
t/git.t
--- /dev/null
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# git-cat-file based on libgit2
+package PublicInbox::Gcf2;
+use strict;
+use PublicInbox::Spawn qw(which popen_rd);
+use Fcntl qw(LOCK_EX);
+my (%CFG, $c_src, $lockfh);
+BEGIN {
+ # PublicInbox::Spawn will set PERL_INLINE_DIRECTORY
+ # to ~/.cache/public-inbox/inline-c if it exists
+ my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //
+ die 'PERL_INLINE_DIRECTORY not defined';
+ my $f = "$inline_dir/.public-inbox.lock";
+ open $lockfh, '>', $f or die "failed to open $f: $!\n";
+ my $pc = which($ENV{PKG_CONFIG} // 'pkg-config');
+ my ($dir) = (__FILE__ =~ m!\A(.+?)/[^/]+\z!);
+ my $rdr = {};
+ open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!";
+ for my $x (qw(libgit2)) {
+ my $l = popen_rd([$pc, '--libs', $x], undef, $rdr);
+ $l = do { local $/; <$l> };
+ next if $?;
+ my $c = popen_rd([$pc, '--cflags', $x], undef, $rdr);
+ $c = do { local $/; <$c> };
+ next if $?;
+
+ # note: we name C source files .h to prevent
+ # ExtUtils::MakeMaker from automatically trying to
+ # build them.
+ my $f = "$dir/gcf2_$x.h";
+ if (open(my $fh, '<', $f)) {
+ chomp($l, $c);
+ local $/;
+ $c_src = <$fh>;
+ $CFG{LIBS} = $l;
+ $CFG{CCFLAGSEX} = $c;
+ last;
+ } else {
+ die "E: $f: $!\n";
+ }
+ }
+ die "E: libgit2 not installed\n" unless $c_src;
+
+ # CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
+ flock($lockfh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
+}
+
+# we use Capitalized and ALLCAPS for compatibility with old Inline::C
+use Inline C => Config => %CFG, BOOT => 'git_libgit2_init();';
+use Inline C => $c_src;
+undef $c_src;
+undef %CFG;
+undef $lockfh;
+1;
--- /dev/null
+/*
+ * Copyright (C) 2020 all contributors <meta@public-inbox.org>
+ * License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+ *
+ * libgit2 for Inline::C
+ * Avoiding Git::Raw since it doesn't guarantee a stable API,
+ * while libgit2 itself seems reasonably stable.
+ */
+#include <git2.h>
+#include <sys/uio.h>
+#include <errno.h>
+#include <poll.h>
+
+static void croak_if_err(int rc, const char *msg)
+{
+ if (rc != GIT_OK) {
+ const git_error *e = giterr_last();
+
+ croak("%d %s (%s)", rc, msg, e ? e->message : "unknown");
+ }
+}
+
+SV *new()
+{
+ git_odb *odb;
+ SV *ref, *self;
+ int rc = git_odb_new(&odb);
+ croak_if_err(rc, "git_odb_new");
+
+ ref = newSViv((IV)odb);
+ self = newRV_noinc(ref);
+ sv_bless(self, gv_stashpv("PublicInbox::Gcf2", GV_ADD));
+ SvREADONLY_on(ref);
+
+ return self;
+}
+
+static git_odb *odb_ptr(SV *self)
+{
+ return (git_odb *)SvIV(SvRV(self));
+}
+
+void DESTROY(SV *self)
+{
+ git_odb_free(odb_ptr(self));
+}
+
+/* needs "$GIT_DIR/objects", not $GIT_DIR */
+void add_alternate(SV *self, const char *objects_path)
+{
+ int rc = git_odb_add_disk_alternate(odb_ptr(self), objects_path);
+ croak_if_err(rc, "git_odb_add_disk_alternate");
+}
+
+/* this requires an unabbreviated git OID */
+#define CAPA(v) (sizeof(v) / sizeof((v)[0]))
+void cat_oid(SV *self, int fd, SV *oidsv)
+{
+ /*
+ * adjust when libgit2 gets SHA-256 support, we return the
+ * same header as git-cat-file --batch "$OID $TYPE $SIZE\n"
+ */
+ char hdr[GIT_OID_HEXSZ + sizeof(" commit 18446744073709551615")];
+ struct iovec vec[3];
+ size_t nvec = CAPA(vec);
+ git_oid oid;
+ git_odb_object *object = NULL;
+ int rc, err = 0;
+ STRLEN oidlen;
+ char *oidptr = SvPV(oidsv, oidlen);
+
+ /* same trailer as git-cat-file --batch */
+ vec[2].iov_len = 1;
+ vec[2].iov_base = "\n";
+
+ rc = git_oid_fromstrn(&oid, oidptr, oidlen);
+ if (rc == GIT_OK)
+ rc = git_odb_read(&object, odb_ptr(self), &oid);
+ if (rc == GIT_OK) {
+ vec[0].iov_base = hdr;
+ vec[1].iov_base = (void *)git_odb_object_data(object);
+ vec[1].iov_len = git_odb_object_size(object);
+
+ git_oid_nfmt(hdr, GIT_OID_HEXSZ, git_odb_object_id(object));
+ vec[0].iov_len = GIT_OID_HEXSZ +
+ snprintf(hdr + GIT_OID_HEXSZ,
+ sizeof(hdr) - GIT_OID_HEXSZ,
+ " %s %zu\n",
+ git_object_type2string(
+ git_odb_object_type(object)),
+ vec[1].iov_len);
+ } else {
+ vec[0].iov_base = oidptr;
+ vec[0].iov_len = oidlen;
+ vec[1].iov_base = " missing";
+ vec[1].iov_len = strlen(vec[1].iov_base);
+ }
+ while (nvec && !err) {
+ ssize_t w = writev(fd, vec + CAPA(vec) - nvec, nvec);
+
+ if (w > 0) {
+ size_t done = 0;
+ size_t i;
+
+ for (i = CAPA(vec) - nvec; i < CAPA(vec); i++) {
+ if (w >= vec[i].iov_len) {
+ /* fully written vec */
+ w -= vec[i].iov_len;
+ done++;
+ } else { /* partially written vec */
+ char *p = vec[i].iov_base;
+ vec[i].iov_base = p + w;
+ vec[i].iov_len -= w;
+ break;
+ }
+ }
+ nvec -= done;
+ } else if (w < 0) {
+ err = errno;
+ switch (err) {
+ case EAGAIN: {
+ struct pollfd pfd;
+ pfd.events = POLLOUT;
+ pfd.fd = fd;
+ poll(&pfd, 1, -1);
+ }
+ /* fall-through */
+ case EINTR:
+ err = 0;
+ }
+ } else { /* w == 0 */
+ err = ENOSPC;
+ }
+ }
+ if (object)
+ git_odb_object_free(object);
+ if (err)
+ croak("writev error: %s", strerror(err));
+}
--- /dev/null
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use PublicInbox::TestCommon;
+use Test::More;
+use Fcntl qw(:seek);
+use IO::Handle ();
+use POSIX qw(_exit);
+require_mods('PublicInbox::Gcf2');
+use_ok 'PublicInbox::Gcf2';
+my $gcf2 = PublicInbox::Gcf2::new();
+is(ref($gcf2), 'PublicInbox::Gcf2', '::new works');
+chomp(my $objdir = xqx([qw(git rev-parse --git-path objects)]));
+if ($objdir =~ /\A--git-path\n/) { # git <2.5
+ chomp($objdir = xqx([qw(git rev-parse --git-dir)]));
+ $objdir .= '/objects';
+ $objdir = undef unless -d $objdir;
+}
+
+my $COPYING = 'dba13ed2ddf783ee8118c6a581dbf75305f816a3';
+open my $agpl, '<', 'COPYING' or BAIL_OUT "AGPL-3 missing: $!";
+$agpl = do { local $/; <$agpl> };
+
+SKIP: {
+ skip 'not in git worktree', 15 unless defined($objdir);
+ $gcf2->add_alternate($objdir);
+ open my $fh, '+>', undef or BAIL_OUT "open: $!";
+ my $fd = fileno($fh);
+ $fh->autoflush(1);
+
+ $gcf2->cat_oid($fd, 'invalid');
+ seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+ is(do { local $/; <$fh> }, "invalid missing\n", 'got missing message');
+
+ seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+ $gcf2->cat_oid($fd, '0'x40);
+ seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+ is(do { local $/; <$fh> }, ('0'x40)." missing\n",
+ 'got missing message for 0x40');
+
+ seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+ $gcf2->cat_oid($fd, $COPYING);
+ my $buf;
+ my $ck_copying = sub {
+ my ($desc) = @_;
+ seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+ is(<$fh>, "$COPYING blob 34520\n", 'got expected header');
+ $buf = do { local $/; <$fh> };
+ is(chop($buf), "\n", 'got trailing \\n');
+ is($buf, $agpl, "AGPL matches ($desc)");
+ };
+ $ck_copying->('regular file');
+
+ $^O eq 'linux' or skip('pipe tests are Linux-only', 12);
+ my $size = -s $fh;
+ for my $blk (1, 0) {
+ my ($r, $w);
+ pipe($r, $w) or BAIL_OUT $!;
+ fcntl($w, 1031, 4096) or
+ skip('Linux too old for F_SETPIPE_SZ', 12);
+ $w->blocking($blk);
+ seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+ truncate($fh, 0) or BAIL_OUT "truncate: $!";
+ defined(my $pid = fork) or BAIL_OUT "fork: $!";
+ if ($pid == 0) {
+ close $w;
+ tick; # wait for parent to block on writev
+ $buf = do { local $/; <$r> };
+ print $fh $buf or _exit(1);
+ _exit(0);
+ }
+ $gcf2->cat_oid(fileno($w), $COPYING);
+ close $w or BAIL_OUT "close: $!";
+ is(waitpid($pid, 0), $pid, 'child exited');
+ is($?, 0, 'no error in child');
+ $ck_copying->("pipe blocking($blk)");
+
+ pipe($r, $w) or BAIL_OUT $!;
+ fcntl($w, 1031, 4096) or BAIL_OUT $!;
+ $w->blocking($blk);
+ close $r;
+ local $SIG{PIPE} = 'IGNORE';
+ eval { $gcf2->cat_oid(fileno($w), $COPYING) };
+ like($@, qr/writev error:/, 'got writev error');
+ }
+}
+
+if (my $nr = $ENV{TEST_LEAK_NR}) {
+ open my $null, '>', '/dev/null' or BAIL_OUT "open /dev/null: $!";
+ my $fd = fileno($null);
+ my $cat = $ENV{TEST_LEAK_CAT} // 10;
+ diag "checking for leaks... (TEST_LEAK_NR=$nr TEST_LEAK_CAT=$cat)";
+ local $SIG{PIPE} = 'IGNORE';
+ my ($r, $w);
+ pipe($r, $w);
+ close $r;
+ my $broken = fileno($w);
+ for (1..$nr) {
+ my $obj = PublicInbox::Gcf2::new();
+ if (defined($objdir)) {
+ $obj->add_alternate($objdir);
+ for (1..$cat) {
+ $obj->cat_oid($fd, $COPYING);
+ eval { $obj->cat_oid($broken, $COPYING) };
+ $obj->cat_oid($fd, '0'x40);
+ $obj->cat_oid($fd, 'invalid');
+ }
+ }
+ }
+}
+done_testing;