From: Eric Wong (Contractor, The Linux Foundation) Date: Wed, 18 Apr 2018 20:58:35 +0000 (+0000) Subject: Merge remote-tracking branch 'origin/master' into v2 X-Git-Tag: v1.1.0-pre1~23 X-Git-Url: http://www.git.stargrave.org/?p=public-inbox.git;a=commitdiff_plain;h=cfb8d16578e7f2f2e300f9f436205e4a8fc7f322;hp=-c Merge remote-tracking branch 'origin/master' into v2 * origin/master: nntp: allow and ignore empty commands mbox: do not barf on queries which return no results nntp: fix NEWNEWS command searchview: fix non-numeric comparison Allow specification of the number of search results to return githttpbackend: avoid infinite loop on generic PSGI servers http: fix modification of read-only value extmsg: use news.gmane.org for Message-ID lookups extmsg: rework partial MID matching to favor current inbox Update the installation instructions with Fedora package names nntp: do not drain rbuf if there is a command pending nntp: improve fairness during XOVER and similar commands searchidx: do not modify Xapian DB while iterating Don't use LIMIT in UPDATE statements --- cfb8d16578e7f2f2e300f9f436205e4a8fc7f322 diff --combined INSTALL index 11d844cf,d57696e7..87aa6961 --- a/INSTALL +++ b/INSTALL @@@ -8,60 -8,98 +8,117 @@@ if they want to import mail into their TODO: this still needs to be documented better, also see the scripts/ and sa_config/ directories in the source tree - It should also be possible to use public-inbox with only IMAP - (or even POP(!)) access to a mailbox. - - standard MakeMaker installation (Perl) - -------------------------------------- - - perl Makefile.PL - make - make test - make install # root permissions may be needed - Requirements ------------ - * git - * Perl and several modules: (Debian package name) - - Date::Parse libtimedate-perl - - Email::MIME libemail-mime-perl - - Email::MIME::ContentType libemail-mime-contenttype-perl - - Encode::MIME::Header perl + public-inbox requires a number of other packages to access its full + functionality. The core tools are, of course: - Optional components: + * Git + * Perl + * SQLite (needed for Xapian use) + + To accept incoming mail into a public inbox, you'll likely want: * MTA - postfix is recommended (for public-inbox-mda) * SpamAssassin (spamc/spamd) (for public-inbox-watch/public-inbox-mda) - Optional Perl modules: - - - Plack[1] libplack-perl - - URI::Escape[1] liburi-perl - - Search::Xapian[2][3] libsearch-xapian-perl - - IO::Compress::Gzip[3] perl-modules (or libio-compress-perl) - - DBI[3] libdbi-perl - - DBD::SQLite[2][3] libdbd-sqlite3-perl - - Danga::Socket[4] libdanga-socket-perl - - Net::Server[5] libnet-server-perl - - Filesys::Notify::Simple[6] libfilesys-notify-simple-perl - - Inline::C[7] libinline-c-perl - - Plack::Middleware::ReverseProxy[8] libplack-middleware-reverseproxy-perl - - Plack::Middleware::Deflater[8] libplack-middleware-deflater-perl - - [1] - Optional, needed for serving/generating Atom and HTML pages - [2] - Optional, only required for NNTP server - [3] - Optional, needed for gzipped mbox support over HTTP - [4] - Optional, needed for bundled HTTP and NNTP servers - [5] - Optional, needed for standalone daemonization of HTTP+NNTP servers - [6] - Optional, needed for public-inbox-watch Maildir watcher - [7] - Optional, allows speeds up spawning on Linux (see public-inbox-daemon(8)) - [8] - Optional, recommended for PSGI interface + Beyond that, there is a long list of Perl modules required, starting with: + + * Date::Parse deb: libdatetime-perl + rpm: perl-Time-ParseDate + + * Email::MIME deb: libemail-mime-perl + rpm: perl-Email-MIME + + * Email::MIME::ContentType deb: libemail-mime-contenttype-perl + rpm: perl-Email-MIME-ContentType + + * Encode::MIME::Header deb: libencode-perl + rpm: perl-Encode + + Where "deb" indicates package names for Debian-derived distributions and + "rpm" is for RPM-based distributions (only known to work on Fedora). + + Numerous optional modules are likely to be useful as well: + + - Plack deb: libplack-perl + rpm: perl-Plack, perl-Plack-Test, - perl-Plack-Middleware-ReverseProxy, - perl-Plack-Middleware-Deflater + (for HTML/Atom generation) + + - URI::Escape deb: liburi-perl + rpm: perl-URI + (for HTML/Atom generation) + + - Search::Xapian deb: libsearch-xapian-perl + rpm: perl-Search-Xapian + (for NNTP service or gzipped mbox over HTTP) + + - IO::Compress::Gzip deb: perl-modules (or libio-compress-perl) + rpm: perl-PerlIO-gzip + (for gzipped mbox over HTTP) + + - DBI deb: libdbi-perl + rpm: perl-DBI + (for gzipped mbox over HTTP) + + - DBD::SQLite deb: libdbd-sqlite3-perl + rpm: perl-DBD-SQLite + (for NNTP service or gzipped mbox over HTTP) + + - Danga::Socket deb: libdanga-socket-perl + rpm: perl-Danga-Socket + (for bundled HTTP and NNTP servers) + + - Net::Server deb: libnet-server-perl + rpm: perl-Net-Server + (for HTTP/NNTP servers as standalone daemons) + + - Filesys::Notify::Simple deb: libfilesys-notify-simple-perl + rpm: perl-Filesys-Notify-Simple + (for public-inbox-watch) + ++ - Inline::C[7] deb: libinline-c-perl ++ (speeds up spawning on Linux ++ (see public-inbox-daemon(8)) ++ ++ - Plack::Middleware::ReverseProxy ++ ++ deb: libplack-middleware-reverseproxy-perl ++ rpm: perl-Plack-Middleware-ReverseProxy ++ (ensures redirects are correct when running ++ behind nginx or Varnish) ++ ++ - Plack::Middleware::Deflater ++ ++ deb: libplack-middleware-deflater-perl ++ rpm: perl-Plack-Middleware-Deflater ++ (saves bandwidth on responses) ++ + + On Fedora systems, you'll probably also end up wanting + perl-Test-HTTP-Server-Simple, perl-Devel-Peek, and perl-IPC-Run to run the -test suite. ++test suite. On Debian systems, libxml-feed-perl and libipc-run-perl(*) ++will aid in running the test suite (XML::Feed and IPC::Run respectively, ++on CPAN). ++ ++(*) we hope to drop this dependency someday + + standard MakeMaker installation (Perl) + -------------------------------------- + + Once the dependencies are installed, you should be able to build and + install the system (into /usr/local) with: + + perl Makefile.PL + make + make test + make install # root permissions may be needed When installing Search::Xapian, make sure the underlying Xapian installation is not affected by an index corruption bug: - https://bugs.debian.org/808610 + https://bugs.debian.org/808610 For Debian 8.x (jessie), this means using Debian 8.5 or later. @@@ -69,13 -107,8 +126,8 @@@ public-inbox will never store unregener or any other search database we might use; Xapian corruption will not destroy critical data. - Optional Perl modules (for developers): - - - XML::Feed[9] libxml-feed-perl - - IPC::Run[10] libipc-run-perl - - [9] - Optional, for testing Atom feeds - [10] - Optional, for some tests (we hope to drop this dependency someday) + See the public-inbox-overview(7) man page for the next steps once the + installation is complete. Copyright --------- diff --combined lib/PublicInbox/ExtMsg.pm index a6f516df,760614df..04cb4062 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@@ -17,7 -17,7 +17,7 @@@ our @EXT_URL = # leading "//" denotes protocol-relative (http:// or https://) '//marc.info/?i=%s', '//www.mail-archive.com/search?l=mid&q=%s', - 'http://mid.gmane.org/%s', + 'nntp://news.gmane.org/%s', 'https://lists.debian.org/msgid-search/%s', '//docs.FreeBSD.org/cgi/mid.cgi?db=mid&id=%s', 'https://www.w3.org/mid/%s', @@@ -31,19 -31,30 +31,19 @@@ sub ext_msg my $cur = $ctx->{-inbox}; my $mid = $ctx->{mid}; - eval { require PublicInbox::Search }; - my $have_xap = $@ ? 0 : 1; - my (@nox, @ibx, @found); + eval { require PublicInbox::Msgmap }; + my $have_mm = $@ ? 0 : 1; + my (@ibx, @found); $ctx->{www}->{pi_config}->each_inbox(sub { my ($other) = @_; return if $other->{name} eq $cur->{name} || !$other->base_url; - my $s = $other->search; - if (!$s) { - push @nox, $other; - return; - } - - # try to find the URL with Xapian to avoid forking - my $doc_id = eval { $s->find_unique_doc_id('mid', $mid) }; - if ($@) { - # xapian not configured properly for this repo - push @nox, $other; - return; - } + my $mm = $other->mm or return; - # maybe we found it! - if (defined $doc_id) { + # try to find the URL with Msgmap to avoid forking + my $num = $mm->num_for($mid); + if (defined $num) { push @found, $other; } else { # no point in trying the fork fallback if we @@@ -55,6 -66,20 +55,6 @@@ return exact($ctx, \@found, $mid) if @found; - # Xapian not installed or configured for some repos, - # do a full MID check (this is expensive...): - if (@nox) { - my $path = mid2path($mid); - foreach my $other (@nox) { - my (undef, $type, undef) = $other->path_check($path); - - if ($type && $type eq 'blob') { - push @found, $other; - } - } - } - return exact($ctx, \@found, $mid) if @found; - # fall back to partial MID matching my $n_partial = 0; my @partial; diff --combined lib/PublicInbox/Msgmap.pm index 3237a5ed,6b6d1c6e..ec3d4f9d --- a/lib/PublicInbox/Msgmap.pm +++ b/lib/PublicInbox/Msgmap.pm @@@ -12,7 -12,6 +12,7 @@@ use strict use warnings; use DBI; use DBD::SQLite; +use File::Temp qw(tempfile); sub new { my ($class, $git_dir, $writable) = @_; @@@ -24,8 -23,9 +24,8 @@@ new_file($class, "$d/msgmap.sqlite3", $writable); } -sub new_file { - my ($class, $f, $writable) = @_; - +sub dbh_new { + my ($f, $writable) = @_; my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', { AutoCommit => 1, RaiseError => 1, @@@ -34,14 -34,6 +34,14 @@@ sqlite_use_immediate_transaction => 1, }); $dbh->do('PRAGMA case_sensitive_like = ON'); + $dbh; +} + +sub new_file { + my ($class, $f, $writable) = @_; + return if !$writable && !-r $f; + + my $dbh = dbh_new($f, $writable); my $self = bless { dbh => $dbh }, $class; if ($writable) { @@@ -53,19 -45,6 +53,19 @@@ $self; } +# used to keep track of used numeric mappings for v2 reindex +sub tmp_clone { + my ($self) = @_; + my ($fh, $fn) = tempfile('msgmap-XXXXXXXX', EXLOCK => 0, TMPDIR => 1); + $self->{dbh}->sqlite_backup_to_file($fn); + my $tmp = ref($self)->new_file($fn, 1); + $tmp->{dbh}->do('PRAGMA synchronous = OFF'); + $tmp->{tmp_name} = $fn; # SQLite won't work if unlinked, apparently + $tmp->{pid} = $$; + close $fh or die "failed to close $fn: $!"; + $tmp; +} + # n.b. invoked directly by scripts/xhdr-num2mid sub meta_accessor { my ($self, $key, $value) = @_; @@@ -78,7 -57,7 +78,7 @@@ $prev = $dbh->selectrow_array($sql, undef, $key); if (defined $prev) { - $sql = 'UPDATE meta SET val = ? WHERE key = ? LIMIT 1'; + $sql = 'UPDATE meta SET val = ? WHERE key = ?'; $dbh->do($sql, undef, $value, $key); } else { $sql = 'INSERT INTO meta (key,val) VALUES (?,?)'; @@@ -92,14 -71,6 +92,14 @@@ sub last_commit $self->meta_accessor('last_commit', $commit); } +# v2 uses this to keep track of how up-to-date Xapian is +# old versions may be automatically GC'ed away in the future, +# but it's a trivial amount of storage. +sub last_commit_xap { + my ($self, $version, $i, $commit) = @_; + $self->meta_accessor("last_xap$version-$i", $commit); +} + sub created_at { my ($self, $second) = @_; $self->meta_accessor('created_at', $second); @@@ -108,10 -79,10 +108,10 @@@ sub mid_insert { my ($self, $mid) = @_; my $dbh = $self->{dbh}; - my $sql = 'INSERT OR IGNORE INTO msgmap (mid) VALUES (?)'; - my $sth = $self->{mid_insert} ||= $dbh->prepare($sql); - $sth->bind_param(1, $mid); - return if $sth->execute == 0; + my $sth = $dbh->prepare_cached(<<''); +INSERT OR IGNORE INTO msgmap (mid) VALUES (?) + + return if $sth->execute($mid) == 0; $dbh->last_insert_id(undef, undef, 'msgmap', 'num'); } @@@ -138,14 -109,10 +138,14 @@@ sub num_for sub minmax { my ($self) = @_; my $dbh = $self->{dbh}; - my $sth = $self->{num_minmax} ||= - $dbh->prepare('SELECT MIN(num),MAX(num) FROM msgmap'); + # breaking MIN and MAX into separate queries speeds up from 250ms + # to around 700us with 2.7million messages. + my $sth = $dbh->prepare_cached('SELECT MIN(num) FROM msgmap', undef, 1); $sth->execute; - $sth->fetchrow_array; + my $min = $sth->fetchrow_array; + $sth = $dbh->prepare_cached('SELECT MAX(num) FROM msgmap', undef, 1); + $sth->execute; + ($min, $sth->fetchrow_array); } sub mid_prefixes { @@@ -173,14 -140,6 +173,14 @@@ sub mid_delete $sth->execute; } +sub num_delete { + my ($self, $num) = @_; + my $dbh = $self->{dbh}; + my $sth = $dbh->prepare('DELETE FROM msgmap WHERE num = ?'); + $sth->bind_param(1, $num); + $sth->execute; +} + sub create_tables { my ($dbh) = @_; my $e; @@@ -198,26 -157,17 +198,26 @@@ } # used by NNTP.pm -sub id_batch { - my ($self, $num, $cb) = @_; +sub ids_after { + my ($self, $num) = @_; + my $ids = $self->{dbh}->selectcol_arrayref(<<'', undef, $$num); +SELECT num FROM msgmap WHERE num > ? +ORDER BY num ASC LIMIT 1000 + + $$num = $ids->[-1] if @$ids; + $ids; +} + +sub msg_range { + my ($self, $beg, $end) = @_; my $dbh = $self->{dbh}; - my $sth = $dbh->prepare('SELECT num FROM msgmap WHERE num > ? '. - 'ORDER BY num ASC LIMIT 1000'); - $sth->execute($num); - my $ary = $sth->fetchall_arrayref; - @$ary = map { $_->[0] } @$ary; - my $nr = scalar @$ary; - $cb->($ary) if $nr; - $nr; + my $attr = { Columns => [] }; + my $mids = $dbh->selectall_arrayref(<<'', $attr, $$beg, $end); +SELECT num,mid FROM msgmap WHERE num >= ? AND num <= ? +ORDER BY num ASC + + $$beg = $mids->[-1]->[0] + 1 if @$mids; + $mids } # only used for mapping external serial numbers (e.g. articles from gmane) @@@ -231,31 -181,4 +231,31 @@@ sub mid_set $sth->execute($num, $mid); } +sub DESTROY { + my ($self) = @_; + delete $self->{dbh}; + my $f = delete $self->{tmp_name}; + if (defined $f && $self->{pid} == $$) { + unlink $f or warn "failed to unlink $f: $!\n"; + } +} + +sub atfork_parent { + my ($self) = @_; + my $f = $self->{tmp_name} or die "not a temporary clone\n"; + delete $self->{dbh} and die "tmp_clone dbh not prepared for parent"; + my $dbh = $self->{dbh} = dbh_new($f, 1); + $dbh->do('PRAGMA synchronous = OFF'); +} + +sub atfork_prepare { + my ($self) = @_; + my $f = $self->{tmp_name} or die "not a temporary clone\n"; + $self->{pid} == $$ or + die "BUG: atfork_prepare not called from $self->{pid}\n"; + $self->{dbh} or die "temporary clone not open\n"; + # must clobber prepared statements + %$self = (tmp_name => $f, pid => $$); +} + 1; diff --combined lib/PublicInbox/NNTP.pm index ace56e7a,c574c9e6..cdbd8e98 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@@ -34,6 -34,7 +34,6 @@@ my $LIST_HEADERS = join("\r\n", @OVERVI qw(:bytes :lines Xref To Cc)) . "\r\n"; # disable commands with easy DoS potential: -# LISTGROUP could get pretty bad, too... my %DISABLED; # = map { $_ => 1 } qw(xover list_overview_fmt newnews xhdr); my $EXPMAP; # fd -> [ idle_time, $self ] @@@ -115,6 -116,7 +115,7 @@@ sub args_ok ($$) sub process_line ($$) { my ($self, $l) = @_; my ($req, @args) = split(/\s+/, $l); + return unless defined($req); $req = lc($req); $req = eval { no strict 'refs'; @@@ -224,12 -226,15 +225,12 @@@ sub cmd_listgroup ($;$) } $self->{ng} or return '412 no newsgroup selected'; - long_response($self, 0, long_response_limit, sub { - my ($i) = @_; - my $nr = $self->{ng}->mm->id_batch($$i, sub { - my ($ary) = @_; - more($self, join("\r\n", @$ary)); - }); - - # -1 to adjust for implicit increment in long_response - $$i = $nr ? $$i + $nr - 1 : long_response_limit; + my $n = 0; + long_response($self, sub { + my $ary = $self->{ng}->mm->ids_after(\$n); + scalar @$ary or return; + more($self, join("\r\n", @$ary)); + 1; }); } @@@ -327,22 -332,24 +328,22 @@@ sub cmd_newnews ($$$$;$$) }; return '.' unless @srch; - $ts .= '..'; - my $opts = { asc => 1, limit => 1000, offset => 0 }; - long_response($self, 0, long_response_limit, sub { - my ($i) = @_; + my $prev = 0; + long_response($self, sub { my $srch = $srch[0]; - my $res = $srch->query_ts($ts, $opts); - my $msgs = $res->{msgs}; - if (my $nr = scalar @$msgs) { + my $msgs = $srch->query_ts($ts, $prev); + if (scalar @$msgs) { more($self, '<' . join(">\r\n<", map { $_->mid } @$msgs ). '>'); - $opts->{offset} += $nr; + $prev = $msgs->[-1]->{num}; } else { shift @srch; if (@srch) { # continue onto next newsgroup - $opts->{offset} = 0; + $prev = 0; + return 1; } else { # break out of the long response. - $$i = long_response_limit; + return; } } }); @@@ -407,30 -414,12 +408,30 @@@ sub header_append ($$$) $hdr->header_set($k, @v, $v); } -sub set_nntp_headers { - my ($hdr, $ng, $n, $mid) = @_; +sub xref ($$$$) { + my ($self, $ng, $n, $mid) = @_; + my $ret = "$ng->{domain} $ng->{newsgroup}:$n"; + + # num_for is pretty cheap and sometimes we'll lookup the existence + # of an article without getting even the OVER info. In other words, + # I'm not sure if its worth optimizing by scanning To:/Cc: and + # PublicInbox::ExtMsg on the PSGI end is just as expensive + foreach my $other (@{$self->{nntpd}->{grouplist}}) { + next if $ng eq $other; + my $num = eval { $other->mm->num_for($mid) } or next; + $ret .= " $other->{newsgroup}:$num"; + } + $ret; +} + +sub set_nntp_headers ($$$$$) { + my ($self, $hdr, $ng, $n, $mid) = @_; # clobber some - $hdr->header_set('Newsgroups', $ng->{newsgroup}); - $hdr->header_set('Xref', xref($ng, $n)); + my $xref = xref($self, $ng, $n, $mid); + $hdr->header_set('Xref', $xref); + $xref =~ s/:\d+//g; + $hdr->header_set('Newsgroups', (split(/ /, $xref, 2))[1]); header_append($hdr, 'List-Post', "{-primary_address}>"); if (my $url = $ng->base_url) { $mid = mid_escape($mid); @@@ -475,16 -464,18 +476,16 @@@ find_mid defined $mid or return $err; } found: - my $bytes; - my $s = eval { $ng->msg_by_mid($mid, \$bytes) } or return $err; - $s = Email::Simple->new($s); - my $lines; + my $smsg = $ng->search->{over_ro}->get_art($n) or return $err; + my $msg = $ng->msg_by_smsg($smsg) or return $err; + my $s = Email::Simple->new($msg); if ($set_headers) { - set_nntp_headers($s->header_obj, $ng, $n, $mid); - $lines = $s->body =~ tr!\n!\n!; + set_nntp_headers($self, $s->header_obj, $ng, $n, $mid); # must be last $s->body_set('') if ($set_headers == 2); } - [ $n, $mid, $s, $bytes, $lines, $ng ]; + [ $n, $mid, $s, $smsg->bytes, $smsg->lines, $ng ]; } sub simple_body_write ($$) { @@@ -582,8 -573,8 +583,8 @@@ sub get_range ($$) [ $beg, $end ]; } -sub long_response ($$$$) { - my ($self, $beg, $end, $cb) = @_; +sub long_response ($$) { + my ($self, $cb) = @_; die "BUG: nested long response" if $self->{long_res}; my $fd = $self->{fd}; @@@ -594,14 -585,24 +595,14 @@@ $self->watch_read(0); my $t0 = now(); $self->{long_res} = sub { - # limit our own running time for fairness with other - # clients and to avoid buffering too much: - my $lim = 100; - - my $err; - do { - eval { $cb->(\$beg, \$lim) }; - } until (($err = $@) || $self->{closed} || - ++$beg > $end || --$lim < 0 || - $self->{write_buf_size}); - - if ($err || $self->{closed}) { + my $more = eval { $cb->() }; + if ($@ || $self->{closed}) { $self->{long_res} = undef; - if ($err) { + if ($@) { err($self, "%s during long response[$fd] - %0.6f", - $err, now() - $t0); + $@, now() - $t0); } if ($self->{closed}) { out($self, " deferred[$fd] aborted - %0.6f", @@@ -610,7 -611,7 +611,7 @@@ update_idle_time($self); $self->watch_read(1); } - } elsif ($lim < 0 || $self->{write_buf_size}) { + } elsif ($more) { # $self->{write_buf_size}: # no recursion, schedule another call ASAP # but only after all pending writes are done update_idle_time($self); @@@ -642,17 -643,19 +643,17 @@@ sub hdr_message_id ($$$) { # optimize X my $mm = $self->{ng}->mm; my ($beg, $end) = @$r; more($self, $xhdr ? r221 : r225); - long_response($self, $beg, $end, sub { - my ($i) = @_; - my $mid = $mm->mid_for($$i); - more($self, "$$i <$mid>") if defined $mid; + long_response($self, sub { + my $r = $mm->msg_range(\$beg, $end); + @$r or return; + more($self, join("\r\n", map { + "$_->[0] <$_->[1]>" + } @$r)); + 1; }); } } -sub xref ($$) { - my ($ng, $n) = @_; - "$ng->{domain} $ng->{newsgroup}:$n" -} - sub mid_lookup ($$) { my ($self, $mid) = @_; my $self_ng = $self->{ng}; @@@ -672,11 -675,9 +673,11 @@@ sub hdr_xref ($$$) { # optimize XHDR Xr my ($self, $xhdr, $range) = @_; if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID - my ($ng, $n) = mid_lookup($self, $1); + my $mid = $1; + my ($ng, $n) = mid_lookup($self, $mid); return r430 unless $n; - hdr_mid_response($self, $xhdr, $ng, $n, $range, xref($ng, $n)); + hdr_mid_response($self, $xhdr, $ng, $n, $range, + xref($self, $ng, $n, $mid)); } else { # numeric range $range = $self->{article} unless defined $range; my $r = get_range($self, $range); @@@ -685,31 -686,26 +686,31 @@@ my $mm = $ng->mm; my ($beg, $end) = @$r; more($self, $xhdr ? r221 : r225); - long_response($self, $beg, $end, sub { - my ($i) = @_; - my $mid = $mm->mid_for($$i); - more($self, "$$i ".xref($ng, $$i)) if defined $mid; + long_response($self, sub { + my $r = $mm->msg_range(\$beg, $end); + @$r or return; + more($self, join("\r\n", map { + my $num = $_->[0]; + "$num ".xref($self, $ng, $num, $_->[1]); + } @$r)); + 1; }); } } sub search_header_for { - my ($srch, $mid, $field) = @_; - my $smsg = $srch->lookup_mail($mid) or return; - $smsg->$field; + my ($srch, $num, $field) = @_; + my $smsg = $srch->{over_ro}->get_art($num) or return; + return PublicInbox::SearchMsg::date($smsg) if $field eq 'date'; + $smsg->{$field}; } sub hdr_searchmsg ($$$$) { my ($self, $xhdr, $field, $range) = @_; if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID my ($ng, $n) = mid_lookup($self, $1); - return r430 unless $n; - my $v = search_header_for($ng->search, $range, $field); + return r430 unless defined $n; + my $v = search_header_for($ng->search, $n, $field); hdr_mid_response($self, $xhdr, $ng, $n, $range, $v); } else { # numeric range $range = $self->{article} unless defined $range; @@@ -719,17 -715,22 +720,17 @@@ return $r unless ref $r; my ($beg, $end) = @$r; more($self, $xhdr ? r221 : r225); - my $off = 0; - long_response($self, $beg, $end, sub { - my ($i, $lim) = @_; - my $res = $srch->query_xover($beg, $end, $off); - my $msgs = $res->{msgs}; + my $cur = $beg; + long_response($self, sub { + my $msgs = $srch->query_xover($cur, $end); my $nr = scalar @$msgs or return; - $off += $nr; - $$lim -= $nr; my $tmp = ''; foreach my $s (@$msgs) { - $tmp .= $s->num . ' ' . $s->$field . "\r\n"; + $tmp .= $s->{num} . ' ' . $s->$field . "\r\n"; } utf8::encode($tmp); do_more($self, $tmp); - # -1 to adjust for implicit increment in long_response - $$i = $nr ? $$i + $nr - 1 : long_response_limit; + $cur = $msgs->[-1]->{num} + 1; }); } } @@@ -803,11 -804,11 +804,11 @@@ sub cmd_xrover ($;$) my $mm = $ng->mm; my $srch = $ng->search; more($self, '224 Overview information follows'); - long_response($self, $beg, $end, sub { - my ($i) = @_; - my $mid = $mm->mid_for($$i) or return; - my $h = search_header_for($srch, $mid, 'references'); - more($self, "$$i $h"); + + long_response($self, sub { + my $h = search_header_for($srch, $beg, 'references'); + more($self, "$beg $h") if defined($h); + $beg++ < $end; }); } @@@ -819,10 -820,10 +820,10 @@@ sub over_line ($$) $smsg->{subject}, $smsg->{from}, PublicInbox::SearchMsg::date($smsg), - '<'.PublicInbox::SearchMsg::mid($smsg).'>', + "<$smsg->{mid}>", $smsg->{references}, - PublicInbox::SearchMsg::bytes($smsg), - PublicInbox::SearchMsg::lines($smsg)); + $smsg->{bytes}, + $smsg->{lines}); utf8::encode($s); $s } @@@ -831,8 -832,8 +832,8 @@@ sub cmd_over ($;$) my ($self, $range) = @_; if ($range && $range =~ /\A<(.+)>\z/) { my ($ng, $n) = mid_lookup($self, $1); - my $smsg = $ng->search->lookup_mail($range) or - return '430 No article with that message-id'; + defined $n or return r430; + my $smsg = $ng->search->{over_ro}->get_art($n) or return r430; more($self, '224 Overview information follows (multi-line)'); # Only set article number column if it's the current group @@@ -853,16 -854,22 +854,16 @@@ sub cmd_xover ($;$) my ($beg, $end) = @$r; more($self, "224 Overview information follows for $beg to $end"); my $srch = $self->{ng}->search; - my $off = 0; - long_response($self, $beg, $end, sub { - my ($i, $lim) = @_; - my $res = $srch->query_xover($beg, $end, $off); - my $msgs = $res->{msgs}; + my $cur = $beg; + long_response($self, sub { + my $msgs = $srch->query_xover($cur, $end); my $nr = scalar @$msgs or return; - $off += $nr; - $$lim -= $nr; # OVERVIEW.FMT more($self, join("\r\n", map { - over_line(PublicInbox::SearchMsg::num($_), $_); + over_line($_->{num}, $_); } @$msgs)); - - # -1 to adjust for implicit increment in long_response - $$i = $nr ? $$i + $nr - 1 : long_response_limit; + $cur = $msgs->[-1]->{num} + 1; }); } @@@ -943,11 -950,13 +944,13 @@@ sub event_write sub event_read { my ($self) = @_; use constant LINE_MAX => 512; # RFC 977 section 2.3 - my $r = 1; - my $buf = $self->read(LINE_MAX) or return $self->close; - $self->{rbuf} .= $$buf; - while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]+)\r?\n//) { + if (index($self->{rbuf}, "\n") < 0) { + my $buf = $self->read(LINE_MAX) or return $self->close; + $self->{rbuf} .= $$buf; + } + my $r = 1; + while ($r > 0 && $self->{rbuf} =~ s/\A\s*([^\r\n]*)\r?\n//) { my $line = $1; return $self->close if $line =~ /[[:cntrl:]]/s; my $t0 = now(); @@@ -967,7 -976,7 +970,7 @@@ sub watch_read { my ($self, $bool) = @_; my $rv = $self->SUPER::watch_read($bool); - if ($bool && $self->{rbuf} ne '') { + if ($bool && index($self->{rbuf}, "\n") >= 0) { # Force another read if there is a pipelined request. # We don't know if the socket has anything for us to read, # and we must double-check again by the time the timer fires diff --combined lib/PublicInbox/SearchView.pm index d038dfca,1c4442e4..5d500c1b --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@@ -10,7 -10,7 +10,7 @@@ use PublicInbox::SearchMsg use PublicInbox::Hval qw/ascii_html obfuscate_addrs/; use PublicInbox::View; use PublicInbox::WwwAtomStream; -use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape MID_ESC); +use PublicInbox::MID qw(MID_ESC); use PublicInbox::MIME; require PublicInbox::Git; require PublicInbox::SearchThread; @@@ -22,6 -22,7 +22,7 @@@ sub mbox_results my ($ctx) = @_; my $q = PublicInbox::SearchQuery->new($ctx->{qp}); my $x = $q->{x}; + require PublicInbox::Mbox; return PublicInbox::Mbox::mbox_all($ctx, $q->{'q'}) if $x eq 'm'; sres_top_html($ctx); } @@@ -35,7 -36,7 +36,7 @@@ sub sres_top_html my $code = 200; # double the limit for expanded views: my $opts = { - limit => $LIM, + limit => $q->{l}, offset => $q->{o}, mset => 1, relevance => $q->{r}, @@@ -117,11 -118,11 +118,11 @@@ sub mset_summary obfuscate_addrs($obfs_ibx, $s); obfuscate_addrs($obfs_ibx, $f); } - my $ts = PublicInbox::View::fmt_ts($smsg->ts); + my $date = PublicInbox::View::fmt_ts($smsg->ds); my $mid = PublicInbox::Hval->new_msgid($smsg->mid)->{href}; $$res .= qq{$rank. }. $s . "\n"; - $$res .= "$pfx - by $f @ $ts UTC [$pct%]\n\n"; + $$res .= "$pfx - by $f @ $date UTC [$pct%]\n\n"; } $$res .= search_nav_bot($mset, $q); *noop; @@@ -180,8 -181,10 +181,9 @@@ sub search_nav_top sub search_nav_bot { my ($mset, $q) = @_; my $total = $mset->get_matches_estimated; - my $nr = scalar $mset->items; my $o = $q->{o}; + my $l = $q->{l}; - my $end = $o + $nr; + my $end = $o + $mset->size; my $beg = $o + 1; my $rv = '
';
  	if ($beg <= $end) {
@@@ -190,15 -193,15 +192,15 @@@
  	} else {
  		$rv .= "No more results, only $total";
  	}
- 	my $n = $o + $LIM;
+ 	my $n = $o + $l;
  
  	if ($n < $total) {
- 		my $qs = $q->qs_html(o => $n);
+ 		my $qs = $q->qs_html(o => $n, l => $l);
  		$rv .= qq{  next}
  	}
  	if ($o > 0) {
  		$rv .= $n < $total ? '/' : '       ';
- 		my $p = $o - $LIM;
+ 		my $p = $o - $l;
  		my $qs = $q->qs_html(o => ($p > 0 ? $p : 0));
  		$rv .= qq{prev};
  	}
@@@ -226,8 -229,8 +228,8 @@@ sub mset_thread 
  	} ($mset->items) ]});
  	my $r = $q->{r};
  	my $rootset = PublicInbox::SearchThread::thread($msgs,
 -		$r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ts,
 -		$srch);
 +		$r ? sort_relevance(\%pct) : *PublicInbox::View::sort_ds,
 +		$ctx);
  	my $skel = search_nav_bot($mset, $q). "
";
  	my $inbox = $ctx->{-inbox};
  	$ctx->{-upfx} = '';
@@@ -247,14 -250,15 +249,14 @@@
  		*PublicInbox::View::pre_thread);
  
  	@$msgs = reverse @$msgs if $r;
 -	my $mime;
  	sub {
  		return unless $msgs;
 -		while ($mime = pop @$msgs) {
 -			$mime = $inbox->msg_by_smsg($mime) and last;
 +		my $smsg;
 +		while (my $m = pop @$msgs) {
 +			$smsg = $inbox->smsg_mime($m) and last;
  		}
 -		if ($mime) {
 -			$mime = PublicInbox::MIME->new($mime);
 -			return PublicInbox::View::index_entry($mime, $ctx,
 +		if ($smsg) {
 +			return PublicInbox::View::index_entry($smsg, $ctx,
  				scalar @$msgs);
  		}
  		$msgs = undef;
@@@ -288,7 -292,8 +290,7 @@@ sub adump 
  	PublicInbox::WwwAtomStream->response($ctx, 200, sub {
  		while (my $x = shift @items) {
  			$x = load_doc_retry($srch, $x);
 -			$x = $ibx->msg_by_smsg($x) and
 -					return PublicInbox::MIME->new($x);
 +			$x = $ibx->smsg_mime($x) and return $x;
  		}
  		return undef;
  	});
@@@ -305,10 -310,13 +307,13 @@@ sub new 
  	my ($class, $qp) = @_;
  
  	my $r = $qp->{r};
+ 	my ($l) = (($qp->{l} || '') =~ /(\d+)/);
+ 	$l = $LIM if !$l || $l > $LIM;
  	bless {
  		q => $qp->{'q'},
  		x => $qp->{x} || '',
  		o => (($qp->{o} || '0') =~ /(\d+)/),
+ 		l => $l,
  		r => (defined $r && $r ne '0'),
  	}, $class;
  }
@@@ -331,6 -339,9 +336,9 @@@ sub qs_html 
  	if (my $o = $self->{o}) { # ignore o == 0
  		$qs .= "&o=$o";
  	}
+ 	if (my $l = $self->{l}) {
+ 		$qs .= "&l=$l";
+ 	}
  	if (my $r = $self->{r}) {
  		$qs .= "&r";
  	}
diff --combined t/nntpd.t
index c6e34ed3,20191cb6..3698f98b
--- a/t/nntpd.t
+++ b/t/nntpd.t
@@@ -21,18 -21,14 +21,18 @@@ my $tmpdir = tempdir('pi-nntpd-XXXXXX'
  my $home = "$tmpdir/pi-home";
  my $err = "$tmpdir/stderr.log";
  my $out = "$tmpdir/stdout.log";
 -my $maindir = "$tmpdir/main.git";
 +my $mainrepo = "$tmpdir/main.git";
  my $group = 'test-nntpd';
  my $addr = $group . '@example.com';
  my $nntpd = 'blib/script/public-inbox-nntpd';
  my $init = 'blib/script/public-inbox-init';
  use_ok 'PublicInbox::Import';
 +use_ok 'PublicInbox::Inbox';
  use_ok 'PublicInbox::Git';
 +use_ok 'PublicInbox::V2Writable';
  
 +# XXX FIXME: make it easier to test both versions
 +my $version = int($ENV{PI_VERSION} || 1);
  my %opts = (
  	LocalAddr => '127.0.0.1',
  	ReuseAddr => 1,
@@@ -44,34 -40,14 +44,34 @@@ my $sock = IO::Socket::INET->new(%opts)
  my $pid;
  my $len;
  END { kill 'TERM', $pid if defined $pid };
 +
 +my $ibx = {
 +	mainrepo => $mainrepo,
 +	name => $group,
 +	version => $version,
 +	-primary_address => $addr,
 +};
 +$ibx = PublicInbox::Inbox->new($ibx);
  {
  	local $ENV{HOME} = $home;
 -	system($init, $group, $maindir, 'http://example.com/', $addr);
 +	my @cmd = ($init, $group, $mainrepo, 'http://example.com/', $addr);
 +	push @cmd, "-V$version";
 +	is(system(@cmd), 0, 'init OK');
  	is(system(qw(git config), "--file=$home/.public-inbox/config",
  			"publicinbox.$group.newsgroup", $group),
  		0, 'enabled newsgroup');
  	my $len;
  
 +	my $im;
 +	if ($version == 2) {
 +		$im = PublicInbox::V2Writable->new($ibx);
 +	} elsif ($version == 1) {
 +		my $git = PublicInbox::Git->new($mainrepo);
 +		$im = PublicInbox::Import->new($git, 'test', $addr);
 +	} else {
 +		die "unsupported version: $version";
 +	}
 +
  	# ensure successful message delivery
  	{
  		my $mime = Email::MIME->new(<header_set('List-Id', "<$list_id>");
  		$len = length($mime->as_string);
 -		my $git = PublicInbox::Git->new($maindir);
 -		my $im = PublicInbox::Import->new($git, 'test', $addr);
  		$im->add($mime);
  		$im->done;
 -		my $s = PublicInbox::SearchIdx->new($maindir, 1);
 -		$s->index_sync;
 +		if ($version == 1) {
 +			my $s = PublicInbox::SearchIdx->new($mainrepo, 1);
 +			$s->index_sync;
 +		}
  	}
  
  	ok($sock, 'sock created');
@@@ -123,7 -99,6 +123,7 @@@
  	my $list = $n->list;
  	is_deeply($list, { $group => [ qw(1 1 n) ] }, 'LIST works');
  	is_deeply([$n->group($group)], [ qw(0 1 1), $group ], 'GROUP works');
 +	is_deeply($n->listgroup($group), [1], 'listgroup OK');
  
  	%opts = (
  		PeerAddr => $host_port,
@@@ -147,6 -122,8 +147,8 @@@
  	is($buf, "201 server ready - post via email\r\n", 'got greeting');
  	$s->autoflush(1);
  
+ 	ok(syswrite($s, "   \r\n"), 'wrote spaces');
+ 	ok(syswrite($s, "\r\n"), 'wrote nothing');
  	syswrite($s, "NEWGROUPS\t19990424 000000 \033GMT\007\r\n");
  	is(0, sysread($s, $buf, 4096), 'GOT EOF on cntrl');
  
diff --combined t/psgi_search.t
index 60a44bde,cf5a7e91..2f033016
--- a/t/psgi_search.t
+++ b/t/psgi_search.t
@@@ -30,7 -30,8 +30,7 @@@ EO
  
  my $num = 0;
  # nb. using internal API, fragile!
 -my $xdb = $rw->_xdb_acquire;
 -$xdb->begin_transaction;
 +$rw->begin_txn_lazy;
  
  foreach (reverse split(/\n\n/, $data)) {
  	$_ .= "\n";
@@@ -41,7 -42,8 +41,7 @@@
  	ok($doc_id, 'message added: '. $mid);
  }
  
 -$xdb->commit_transaction;
 -$rw = undef;
 +$rw->commit_txn_lazy;
  
  my $cfgpfx = "publicinbox.test";
  my $config = PublicInbox::Config->new({
@@@ -62,6 -64,16 +62,16 @@@ test_psgi(sub { $www->call(@_) }, sub 
  	is('%C3%86var', (keys %uniq)[0], 'matches original query');
  	ok(index($html, 'by Ævar Arnfjörð Bjarmason') >= 0,
  		"displayed Ævar's name properly in HTML");
+ 
+ 	my $warn = [];
+ 	local $SIG{__WARN__} = sub { push @$warn, @_ };
+ 	$res = $cb->(GET('/test/?q=s:test&l=5e'));
+ 	is($res->code, 200, 'successful search result');
+ 	is_deeply([], $warn, 'no warnings from non-numeric comparison');
+ 
+ 	$res = $cb->(POST('/test/?q=s:bogus&x=m'));
+ 	is($res->code, 404, 'failed search result gives 404');
+ 	is_deeply([], $warn, 'no warnings');
  });
  
  done_testing();