From: Eric Wong Date: Tue, 22 Oct 2019 07:45:38 +0000 (+0000) Subject: Merge branch 'regen' X-Git-Tag: v1.2.0~17 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=f9fc5cf2e8cefda4a56d937ed217e47689fd7e49;hp=1d6e0dbc3bad19acbfd90a9841a334dcaa0e4641 Merge branch 'regen' * regen: v2writable: use msgmap as multi_mid queue v2writable: move git->cleanup to the correct place v2writable: reindex handles 3-headered monsters v2writable: improve "num_for" API and disambiguate v2writable: set unindexed article number --- diff --git a/Documentation/RelNotes/v1.2.0.wip b/Documentation/RelNotes/v1.2.0.wip index 41236a05..8df3e4f9 100644 --- a/Documentation/RelNotes/v1.2.0.wip +++ b/Documentation/RelNotes/v1.2.0.wip @@ -4,37 +4,68 @@ Subject: [WIP] public-inbox 1.2.0 * first non-pre/rc release with v2 format support for scalability. See public-inbox-v2-format(5) manpage for more details. -* new admin tools for v2 repos: +* new admin tools for v2 inboxes: - public-inbox-convert - converts v1 to v2 repo formats - public-inbox-compact - v2 convenience wrapper for xapian-compact(1) - public-inbox-purge - purges entire messages out of v2 history - public-inbox-edit - edits sensitive data out messages from v2 history - public-inbox-xcpdb - copydatabase(1) wrapper to upgrade Xapian formats (e.g. from "chert" to "glass") and resharding - of v2 repos + of v2 inboxes * SQLite3 support decoupled from Xapian support, and Xapian DBs may be configured without phrase support to save space. See "indexlevel" in public-inbox-config(5) manpage for more info. +* codebase now uses Perl 5.10.1+ features (e.g. "//") + * public-inbox-nntpd - support STARTTLS and NNTPS - support COMPRESS extension - fix several RFC3977 compliance bugs - improved interopability with picky clients such as leafnode + and Alpine * public-inbox-watch - support multiple spam training directories - support mapping multiple inboxes per Maildir + - List-ID header support (see "listid" in public-inbox-config(5)) + +* public-inbox-mda + - List-ID header support (see above) * PublicInbox::WWW - grokmirror-compatible manifest.js.gz endpoint generation - user-configurable color support in $INBOX_URL/_/text/color/ - BOFHs may set default colors via "publicinbox.css" (see public-inbox-config(5)) + - ability to map git code repositories and run cgit + (see "coderepo" and "cgitrc" in public-inbox-config(5)) + - able to recreate blobs with coderepo associations and Xapian + - search results may be reversed + - reduce memory usage when rendering large threads + - syntax highlighting for patches and blobs + +* public-inbox-httpd / public-inbox-nntpd: + - lower memory usage in C10K scenarios + - buffers slow clients to filesystem (TMPDIR) instead of RAM + - improved FreeBSD support + - Danga::Socket is no longer a runtime dependency + +* many documentation updates, new manpages for: + - PublicInbox::SaPlugin::ListMirror + - public-inbox-init + +* workaround memory leaks on Perl 5.16.3 (on CentOS/RHEL 7.x) -* Danga::Socket is no longer a runtime dependency of daemons. +Thanks to Ali Alnubani, Alyssa Ross, Amitai Schleier, Dave Taht, +Dmitry Alexandrov, Eric W. Biederman, Jan Kiszka, Jonathan Corbet, +Kyle Meyer, Leah Neukirchen, Mateusz Łoskot, Nicolás Ojeda Bär, +SZEDER Gábor, Urs Janßen, Wang Kang, and edef for all their help, +bug reports, patches and suggestions. -* improved FreeBSD support +Special thanks to Konstantin Ryabitsev and The Linux Foundation +for their sponsorship and support over the past two years. See archives at https://public-inbox.org/meta/ for all history. +See https://public-inbox.org/TODO for what the future holds. diff --git a/Documentation/include.mk b/Documentation/include.mk index 1460604d..d2357ffc 100644 --- a/Documentation/include.mk +++ b/Documentation/include.mk @@ -188,7 +188,8 @@ rsync-doc: NEWS.atom.gz clean-doc: $(RM) $(man1) $(man5) $(man7) $(man8) $(gz_docs) $(docs_html) $(mantxt) - $(RM) $(gz_xdocs) $(xdocs_html) $(xdocs) NEWS NEWS.atom NEWS.html + $(RM) $(gz_xdocs) $(xdocs_html) $(xdocs) + $(RM) NEWS NEWS.atom NEWS.html Documentation/standards.txt clean :: clean-doc diff --git a/TODO b/TODO index 61c44a84..f9122a5d 100644 --- a/TODO +++ b/TODO @@ -21,6 +21,9 @@ all need to be considered for everything we introduce) yet storing large amounts of data on computers without a public IP behind a home Internet connection. +* DHT (distributed hash table) for mapping Message-IDs to various + archive locations to avoid SPOF. + * optional Cache::FastMmap support so production deployments won't need Varnish (Varnish doesn't protect NNTP, either) diff --git a/examples/grok-pull.post_update_hook.sh b/examples/grok-pull.post_update_hook.sh index ab4e54e7..d003448e 100755 --- a/examples/grok-pull.post_update_hook.sh +++ b/examples/grok-pull.post_update_hook.sh @@ -26,7 +26,13 @@ else fi # run public-inbox-init iff unconfigured -cfg_dir=$(git config -f "$PI_CONFIG" publicinbox."$inbox_name".dir) +cfg_dir=$(git config -f "$PI_CONFIG" publicinbox."$inbox_name".inboxdir) + +# check legacy name for "inboxdir" +case $cfg_dir in +'') cfg_dir=$(git config -f "$PI_CONFIG" publicinbox."$inbox_name".mainrepo) ;; +esac + case $cfg_dir in '') remote_git_url=$(git --git-dir="$full_git_dir" config remote.origin.url) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index ff3838b3..218846f3 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -261,17 +261,6 @@ sub local_nick ($) { wantarray ? ($ret) : $ret; } -# show the blob URL for cgit/gitweb/whatever -sub src_blob_url { - my ($self, $oid) = @_; - # blob_url_format = "https://example.com/foo.git/blob/%s" - if (my $bfu = $self->{blob_url_format}) { - return map { sprintf($_, $oid) } @$bfu if wantarray; - return sprintf($bfu->[0], $oid); - } - local_nick($self); -} - sub host_prefix_url ($$) { my ($env, $url) = @_; return $url if index($url, '//') >= 0; diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 0e290601..c134e297 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -32,14 +32,6 @@ sub new_msgid { $class->new($msgid, mid_escape($msgid)); } -sub new_oneline { - my ($class, $raw) = @_; - $raw = '' unless defined $raw; - $raw =~ tr/\t\n / /s; # squeeze spaces - $raw =~ tr/\r//d; # kill CR - $class->new($raw); -} - # some of these overrides are standard C escapes so they're # easy-to-understand when rendered. my %escape_sequence = ( diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 174e4245..ab7b0ed5 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -134,7 +134,7 @@ sub import_maildir { opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; while (defined(my $fn = readdir($dh))) { next unless is_maildir_basename($fn); - my $mime = maildir_file_load("$dir/$fn") or next; + my $mime = maildir_path_load("$dir/$fn") or next; if (my $filter = $self->filter($im)) { my $ret = $filter->scrub($mime) or return; diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 64277342..01ca6f11 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -320,12 +320,6 @@ INSERT INTO id2num (id, num) VALUES (?,?) } } -sub delete_articles { - my ($self, $nums) = @_; - my $dbh = $self->connect; - $self->delete_by_num($_) foreach @$nums; -} - # returns number of removed messages # $oid may be undef to match only on $mid sub remove_oid { diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm index d7e15c72..da8a6c86 100644 --- a/lib/PublicInbox/Syscall.pm +++ b/lib/PublicInbox/Syscall.pm @@ -21,7 +21,7 @@ use vars qw(@ISA @EXPORT_OK %EXPORT_TAGS $VERSION); $VERSION = "0.25"; @ISA = qw(Exporter); -@EXPORT_OK = qw(sendfile epoll_ctl epoll_create epoll_wait +@EXPORT_OK = qw(epoll_ctl epoll_create epoll_wait EPOLLIN EPOLLOUT EPOLLET EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD EPOLLONESHOT EPOLLEXCLUSIVE); @@ -29,7 +29,6 @@ $VERSION = "0.25"; EPOLLIN EPOLLOUT EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD EPOLLONESHOT EPOLLEXCLUSIVE)], - sendfile => [qw(sendfile)], ); use constant EPOLLIN => 1; @@ -64,7 +63,6 @@ our ( $SYS_epoll_create, $SYS_epoll_ctl, $SYS_epoll_wait, - $SYS_sendfile, ); our $no_deprecated = 0; @@ -90,45 +88,37 @@ if ($^O eq "linux") { $SYS_epoll_create = 254; $SYS_epoll_ctl = 255; $SYS_epoll_wait = 256; - $SYS_sendfile = 187; # or 64: 239 } elsif ($machine eq "x86_64") { $SYS_epoll_create = 213; $SYS_epoll_ctl = 233; $SYS_epoll_wait = 232; - $SYS_sendfile = 40; } elsif ($machine =~ m/^parisc/) { $SYS_epoll_create = 224; $SYS_epoll_ctl = 225; $SYS_epoll_wait = 226; - $SYS_sendfile = 122; # sys_sendfile64=209 $u64_mod_8 = 1; } elsif ($machine =~ m/^ppc64/) { $SYS_epoll_create = 236; $SYS_epoll_ctl = 237; $SYS_epoll_wait = 238; - $SYS_sendfile = 186; # (sys32_sendfile). sys32_sendfile64=226 (64 bit processes: sys_sendfile64=186) $u64_mod_8 = 1; } elsif ($machine eq "ppc") { $SYS_epoll_create = 236; $SYS_epoll_ctl = 237; $SYS_epoll_wait = 238; - $SYS_sendfile = 186; # sys_sendfile64=226 $u64_mod_8 = 1; } elsif ($machine =~ m/^s390/) { $SYS_epoll_create = 249; $SYS_epoll_ctl = 250; $SYS_epoll_wait = 251; - $SYS_sendfile = 187; # sys_sendfile64=223 $u64_mod_8 = 1; } elsif ($machine eq "ia64") { $SYS_epoll_create = 1243; $SYS_epoll_ctl = 1244; $SYS_epoll_wait = 1245; - $SYS_sendfile = 1187; $u64_mod_8 = 1; } elsif ($machine eq "alpha") { # natural alignment, ints are 32-bits - $SYS_sendfile = 370; # (sys_sendfile64) $SYS_epoll_create = 407; $SYS_epoll_ctl = 408; $SYS_epoll_wait = 409; @@ -137,7 +127,6 @@ if ($^O eq "linux") { $SYS_epoll_create = 20; # (sys_epoll_create1) $SYS_epoll_ctl = 21; $SYS_epoll_wait = 22; # (sys_epoll_pwait) - $SYS_sendfile = 71; # (sys_sendfile64) $u64_mod_8 = 1; $no_deprecated = 1; } elsif ($machine =~ m/arm(v\d+)?.*l/) { @@ -145,16 +134,13 @@ if ($^O eq "linux") { $SYS_epoll_create = 250; $SYS_epoll_ctl = 251; $SYS_epoll_wait = 252; - $SYS_sendfile = 187; $u64_mod_8 = 1; } elsif ($machine =~ m/^mips64/) { - $SYS_sendfile = 5039; $SYS_epoll_create = 5207; $SYS_epoll_ctl = 5208; $SYS_epoll_wait = 5209; $u64_mod_8 = 1; } elsif ($machine =~ m/^mips/) { - $SYS_sendfile = 4207; $SYS_epoll_create = 4248; $SYS_epoll_ctl = 4249; $SYS_epoll_wait = 4250; @@ -180,68 +166,9 @@ if ($^O eq "linux") { elsif ($^O eq "freebsd") { if ($ENV{FREEBSD_SENDFILE}) { # this is still buggy and in development - $SYS_sendfile = 393; # old is 336 } } -############################################################################ -# sendfile functions -############################################################################ - -unless ($SYS_sendfile) { - _load_syscall(); - $SYS_sendfile = eval { &SYS_sendfile; } || 0; -} - -sub sendfile_defined { return $SYS_sendfile ? 1 : 0; } - -if ($^O eq "linux" && $SYS_sendfile) { - *sendfile = \&sendfile_linux; -} elsif ($^O eq "freebsd" && $SYS_sendfile) { - *sendfile = \&sendfile_freebsd; -} else { - *sendfile = \&sendfile_noimpl; -} - -sub sendfile_noimpl { - $! = ENOSYS; - return -1; -} - -# C: ssize_t sendfile(int out_fd, int in_fd, off_t *offset, size_t count) -# Perl: sendfile($write_fd, $read_fd, $max_count) --> $actually_sent -sub sendfile_linux { - return syscall( - $SYS_sendfile, - $_[0] + 0, # fd - $_[1] + 0, # fd - 0, # don't keep track of offset. callers can lseek and keep track. - $_[2] + 0 # count - ); -} - -sub sendfile_freebsd { - my $offset = POSIX::lseek($_[1]+0, 0, SEEK_CUR) + 0; - my $ct = $_[2] + 0; - my $sbytes_buf = "\0" x 8; - my $rv = syscall( - $SYS_sendfile, - $_[1] + 0, # fd (from) - $_[0] + 0, # socket (to) - $offset, - $ct, - 0, # struct sf_hdtr *hdtr - $sbytes_buf, # off_t *sbytes - 0); # flags - return $rv if $rv < 0; - - - my $set = unpack("L", $sbytes_buf); - POSIX::lseek($_[1]+0, SEEK_CUR, $set); - return $set; -} - - ############################################################################ # epoll functions ############################################################################ diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index d0b9f387..4b7177c1 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -300,13 +300,6 @@ sub get_vcs_object ($$$;$) { PublicInbox::ViewVCS::show($ctx, $oid, $filename); } -sub ctx_get { - my ($ctx, $key) = @_; - my $val = $ctx->{$key}; - (defined $val && $val ne '') or die "BUG: bad ctx, $key unusable"; - $val; -} - sub need { my ($ctx, $extra) = @_; my $msg = <{config}->each_inbox(sub { my ($ibx) = @_; eval { @@ -166,7 +167,7 @@ sub _try_path { $warn_cb->(@_); }; foreach my $ibx (@$inboxes) { - my $mime = _path_to_mime($path) or next; + my $mime = maildir_path_load($path) or next; my $im = _importer_for($self, $ibx); # any header match means it's eligible for the inbox: @@ -259,21 +260,6 @@ sub scan { trigger_scan($self, 'cont') if keys %$opendirs; } -sub _path_to_mime { - my ($path) = @_; - if (open my $fh, '<', $path) { - local $/; - my $str = <$fh>; - $str or return; - return PublicInbox::MIME->new(\$str); - } elsif ($!{ENOENT}) { - return; - } else { - warn "failed to open $path: $!\n"; - return; - } -} - sub _importer_for { my ($self, $ibx) = @_; my $importers = $self->{importers};