use warnings;
use base qw(Danga::Socket);
use fields qw(nntpd article rbuf ng long_res);
+use PublicInbox::Search;
use PublicInbox::Msgmap;
use PublicInbox::GitCatFile;
use PublicInbox::MID qw(mid2path);
sub now () { clock_gettime(CLOCK_MONOTONIC) };
-my @OVERVIEW = qw(Subject From Date Message-ID References Bytes Lines);
-my $OVERVIEW_FMT = join(":\r\n", @OVERVIEW) . ":\r\n";
-my $LIST_HEADERS = join("\r\n", qw(Subject From Date Message-ID References
- :bytes :lines Xref To Cc)) . "\r\n";
+my @OVERVIEW = qw(Subject From Date Message-ID References);
+my $OVERVIEW_FMT = join(":\r\n", @OVERVIEW, qw(Bytes Lines)) . ":\r\n";
+my $LIST_HEADERS = join("\r\n", @OVERVIEW,
+ qw(:bytes :lines Xref To Cc)) . "\r\n";
# disable commands with easy DoS potential:
# LISTGROUP could get pretty bad, too...
}
}
-sub header_obj_for {
- my ($srch, $mid) = @_;
- eval {
- my $smsg = $srch->lookup_message($mid);
- $smsg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
- $smsg->mini_mime->header_obj;
- };
-};
+sub search_header_for {
+ my ($srch, $mid, $field) = @_;
+ my $smsg = $srch->lookup_message($mid) or return;
+ $smsg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
+ $smsg->$field;
+}
sub hdr_searchmsg ($$$$) {
- my ($self, $xhdr, $hdr, $range) = @_;
- my $filter;
- if ($hdr eq 'date') {
- $hdr = 'X-PI-TS';
- $filter = sub ($) {
- strftime('%a, %d %b %Y %T %z', gmtime($_[0]));
- };
- }
-
+ my ($self, $xhdr, $field, $range) = @_;
if (defined $range && $range =~ /\A<(.+)>\z/) { # Message-ID
my ($ng, $n) = mid_lookup($self, $1);
return r430 unless $n;
- if (my $srch = $ng->search) {
- my $m = header_obj_for($srch, $range);
- my $v = $m->header($hdr);
- $v = $filter->($v) if defined $v && $filter;
- hdr_mid_response($self, $xhdr, $ng, $n, $range, $v);
- } else {
- hdr_slow($self, $xhdr, $hdr, $range);
- }
+ my $v = search_header_for($ng->search, $range, $field);
+ hdr_mid_response($self, $xhdr, $ng, $n, $range, $v);
} else { # numeric range
$range = $self->{article} unless defined $range;
- my $srch = $self->{ng}->search or
- return hdr_slow($self, $xhdr, $hdr, $range);
+ my $srch = $self->{ng}->search;
my $mm = $self->{ng}->mm;
my $r = get_range($self, $range);
return $r unless ref $r;
my ($beg, $end) = @$r;
more($self, $xhdr ? r221 : r225);
+ my $off = 0;
$self->long_response($beg, $end, sub {
my ($i) = @_;
- my $mid = $mm->mid_for($$i) or return;
- my $m = header_obj_for($srch, $mid) or return;
- my $v = $m->header($hdr);
- defined $v or return;
- $v = $filter->($v) if $filter;
- more($self, "$$i $v");
+ my $res = $srch->query_xover($beg, $end, $off);
+ my $msgs = $res->{msgs};
+ my $nr = scalar @$msgs or return;
+ $off += $nr;
+ my $tmp = '';
+ foreach my $s (@$msgs) {
+ $tmp .= $s->num . ' ' . $s->$field . "\r\n";
+ }
+ do_more($self, $tmp);
+ # -1 to adjust for implicit increment in long_response
+ $$i = $nr ? $$i + $nr - 1 : long_response_limit;
});
}
}
hdr_message_id($self, $xhdr, $range);
} elsif ($sub eq 'xref') {
hdr_xref($self, $xhdr, $range);
- } elsif ($sub =~ /\A(subject|references|date)\z/) {
+ } elsif ($sub =~ /\A(?:subject|references|date|from|to|cc|
+ bytes|lines)\z/x) {
hdr_searchmsg($self, $xhdr, $sub, $range);
+ } elsif ($sub =~ /\A:(bytes|lines)\z/) {
+ hdr_searchmsg($self, $xhdr, $1, $range);
} else {
- hdr_slow($self, $xhdr, $header, $range);
+ $xhdr ? (r221 . "\r\n.") : "503 HDR not permitted on $header";
}
}
my $res = '';
if ($xhdr) {
$res .= r221 . "\r\n";
- $res .= "$mid $v\r\n" if defined $v;
+ $res .= "$mid $v\r\n";
} else {
$res .= r225 . "\r\n";
- if (defined $v) {
- my $pfx = hdr_mid_prefix($self, $xhdr, $ng, $n, $mid);
- $res .= "$pfx $v\r\n";
- }
+ my $pfx = hdr_mid_prefix($self, $xhdr, $ng, $n, $mid);
+ $res .= "$pfx $v\r\n";
}
res($self, $res .= '.');
undef;
}
-sub hdr_slow ($$$$) {
- my ($self, $xhdr, $header, $range) = @_;
-
- if (defined $range && $range =~ /\A<.+>\z/) { # Message-ID
- my $r = $self->art_lookup($range, 2);
- return $r unless ref $r;
- my ($n, $ng) = ($r->[0], $r->[5]);
- my $v = hdr_val($r, $header);
- hdr_mid_response($self, $xhdr, $ng, $n, $range, $v);
- } else { # numeric range
- $range = $self->{article} unless defined $range;
- my $r = get_range($self, $range);
- return $r unless ref $r;
- my ($beg, $end) = @$r;
- more($self, $xhdr ? r221 : r225);
- $self->long_response($beg, $end, sub {
- my ($i) = @_;
- $r = $self->art_lookup($$i, 2);
- return unless ref $r;
- defined($r = hdr_val($r, $header)) or return;
- more($self, "$$i $r");
- });
- }
-}
-
sub cmd_xrover ($;$) {
my ($self, $range) = @_;
my $ng = $self->{ng} or return '412 no newsgroup selected';
$self->long_response($beg, $end, sub {
my ($i) = @_;
my $mid = $mm->mid_for($$i) or return;
- my $m = header_obj_for($srch, $mid) or return;
- my $h = $m->header('references');
- more($self, "$$i $h") if defined $h;
+ my $h = search_header_for($srch, $mid, 'references');
+ more($self, "$$i $h");
});
}
sub over_line ($$) {
- my ($self, $r) = @_;
-
- more($self, join("\t", $r->[0], map {
- my $h = hdr_val($r, $_);
- defined $h ? $h : '';
- } @OVERVIEW ));
+ my ($num, $smsg) = @_;
+ # n.b. field access and procedural calls can be
+ # 10%-15% faster than OO method calls:
+ join("\t", $num,
+ $smsg->{subject},
+ $smsg->{from},
+ PublicInbox::SearchMsg::date($smsg),
+ '<'.PublicInbox::SearchMsg::mid($smsg).'>',
+ $smsg->{references},
+ PublicInbox::SearchMsg::bytes($smsg),
+ PublicInbox::SearchMsg::lines($smsg));
}
sub cmd_over ($;$) {
my ($self, $range) = @_;
- if ($range && $range =~ /\A<.+>\z/) {
- my $r = $self->art_lookup($range, 2);
- return '430 No article with that message-id' unless ref $r;
+ if ($range && $range =~ /\A<(.+)>\z/) {
+ my ($ng, $n) = mid_lookup($self, $1);
+ my $smsg = $ng->search->lookup_message($range) or
+ return '430 No article with that message-id';
more($self, '224 Overview information follows (multi-line)');
+ $smsg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
# Only set article number column if it's the current group
- my $ng = $self->{ng};
- $r->[0] = 0 if (!$ng || $ng ne $r->[5]);
- over_line($self, $r);
+ my $self_ng = $self->{ng};
+ $n = 0 if (!$self_ng || $self_ng ne $ng);
+ more($self, over_line($n, $smsg));
'.';
} else {
cmd_xover($self, $range);
return $r unless ref $r;
my ($beg, $end) = @$r;
more($self, "224 Overview information follows for $beg to $end");
+ my $srch = $self->{ng}->search;
+ my $off = 0;
$self->long_response($beg, $end, sub {
my ($i) = @_;
- my $r = $self->art_lookup($$i, 2);
- return unless ref $r;
- over_line($self, $r);
+ my $res = $srch->query_xover($beg, $end, $off);
+ my $msgs = $res->{msgs};
+ my $nr = scalar @$msgs or return;
+ $off += $nr;
+
+ # OVERVIEW.FMT
+ more($self, join("\r\n", map {
+ over_line(PublicInbox::SearchMsg::num($_), $_);
+ } @$msgs));
+
+ # -1 to adjust for implicit increment in long_response
+ $$i = $nr ? $$i + $nr - 1 : long_response_limit;
});
}
use Scalar::Util qw(weaken);
require Danga::Socket;
require PublicInbox::Msgmap;
+require PublicInbox::Search;
require PublicInbox::GitCatFile;
sub new {
};
}
+sub usable {
+ my ($self) = @_;
+ eval {
+ PublicInbox::Msgmap->new($self->{git_dir});
+ PublicInbox::Search->new($self->{git_dir});
+ };
+}
+
sub mm {
- my ($self, $check_only) = @_;
- if ($check_only) {
- return eval { PublicInbox::Msgmap->new($self->{git_dir}) };
- }
+ my ($self) = @_;
$self->{mm} ||= eval {
my $mm = PublicInbox::Msgmap->new($self->{git_dir});
sub search {
my ($self) = @_;
$self->{search} ||= eval {
- require PublicInbox::Search;
my $search = PublicInbox::Search->new($self->{git_dir});
# may be needed if we run low on handles
package PublicInbox::Search;
use strict;
use warnings;
-use constant TS => 0;
+
+# values for searching
+use constant TS => 0; # timestamp
+use constant NUM => 1; # NNTP article number
+use constant BYTES => 2; # :bytes as defined in RFC 3977
+use constant LINES => 3; # :lines as defined in RFC 3977
+
use Search::Xapian qw/:standard/;
use PublicInbox::SearchMsg;
use Email::MIME;
# 6 - preserve References: order in document data
# 7 - remove references and inreplyto terms
# 8 - remove redundant/unneeded document data
- # 9 - disable Message-ID compression
- SCHEMA_VERSION => 9,
+ # 9 - disable Message-ID compression (SHA-1)
+ # 10 - optimize doc for NNTP overviews
+ SCHEMA_VERSION => 10,
# n.b. FLAG_PURE_NOT is expensive not suitable for a public website
# as it could become a denial-of-service vector
$_[0]->{drp} ||= Search::Xapian::DateValueRangeProcessor->new(TS);
}
+sub num_range_processor {
+ $_[0]->{nrp} ||= Search::Xapian::NumberValueRangeProcessor->new(NUM);
+}
+
+# only used for NNTP server
+sub query_xover {
+ my ($self, $beg, $end, $offset) = @_;
+ my $enquire = $self->enquire;
+ my $qp = Search::Xapian::QueryParser->new;
+ $qp->set_database($self->{xdb});
+ $qp->add_valuerangeprocessor($self->num_range_processor);
+ my $query = $qp->parse_query("$beg..$end", QP_FLAGS);
+ $query = Search::Xapian::Query->new(OP_AND, $mail_query, $query);
+ $enquire->set_query($query);
+ $enquire->set_sort_by_value(NUM, 0);
+ my $limit = 200;
+ my $mset = $enquire->get_mset($offset, $limit);
+ my @msgs = map {
+ PublicInbox::SearchMsg->load_doc($_->get_document);
+ } $mset->items;
+
+ { total => $mset->get_matches_estimated, msgs => \@msgs }
+}
+
sub lookup_message {
my ($self, $mid) = @_;
$mid = mid_clean($mid);
$self;
}
+sub add_val {
+ my ($doc, $col, $num) = @_;
+ $num = Search::Xapian::sortable_serialise($num);
+ $doc->add_value($col, $num);
+}
+
sub add_message {
- my ($self, $mime) = @_; # mime = Email::MIME object
+ my ($self, $mime, $bytes, $num) = @_; # mime = Email::MIME object
my $db = $self->{xdb};
my $doc_id;
$doc->add_term(xpfx('path') . mid_compress($path));
}
- my $ts = Search::Xapian::sortable_serialise($smsg->ts);
- $doc->add_value(PublicInbox::Search::TS, $ts);
+ add_val($doc, &PublicInbox::Search::TS, $smsg->ts);
+
+ defined($num) and
+ add_val($doc, &PublicInbox::Search::NUM, $num);
+
+ defined($bytes) and
+ add_val($doc, &PublicInbox::Search::BYTES, $bytes);
+
+ add_val($doc, &PublicInbox::Search::LINES,
+ $mime->body_raw =~ tr!\n!\n!);
my $tg = $self->term_generator;
$tg->index_text($subj) if $subj;
$tg->increase_termpos;
- $tg->index_text($smsg->from->format);
+ $tg->index_text($smsg->from);
$tg->increase_termpos;
$mime->walk_parts(sub {
}
}
if (@refs) {
- $smsg->{references_sorted} = '<'.join('><', @refs).'>';
+ $smsg->{references} = '<'.join('> <', @refs).'>';
# first ref *should* be the thread root,
# but we can never trust clients to do the right thing
}
sub index_blob {
- my ($self, $git, $mime) = @_;
- $self->add_message($mime);
+ my ($self, $git, $mime, $bytes, $num) = @_;
+ $self->add_message($mime, $bytes, $num);
}
sub unindex_blob {
$self->{mm}->mid_delete(mid_clean($mime->header('Message-ID')));
}
-sub index_both {
+sub index_mm2 {
+ my ($self, $git, $mime, $bytes) = @_;
+ my $num = $self->{mm}->num_for(mid_clean($mime->header('Message-ID')));
+ index_blob($self, $git, $mime, $bytes, $num);
+}
+
+sub unindex_mm2 {
my ($self, $git, $mime) = @_;
- index_blob($self, $git, $mime);
- index_mm($self, $git, $mime);
+ $self->{mm}->mid_delete(mid_clean($mime->header('Message-ID')));
+ unindex_blob($self, $git, $mime);
+}
+
+sub index_both {
+ my ($self, $git, $mime, $bytes) = @_;
+ my $num = index_mm($self, $git, $mime);
+ index_blob($self, $git, $mime, $bytes, $num);
}
sub unindex_both {
}
sub do_cat_mail {
- my ($git, $blob) = @_;
+ my ($git, $blob, $sizeref) = @_;
my $mime = eval {
- my $str = $git->cat_file($blob);
+ my $str = $git->cat_file($blob, $sizeref);
Email::MIME->new($str);
};
$@ ? undef : $mime;
qw/--reverse --no-notes --no-color --raw -r --no-abbrev/,
$range);
my $latest;
+ my $bytes;
my $pid = open(my $log, '-|', @cmd) or
die('open` '.join(' ', @cmd) . " pipe failed: $!\n");
while (my $line = <$log>) {
if ($line =~ /$addmsg/o) {
- my $mime = do_cat_mail($git, $1) or next;
- $add_cb->($self, $git, $mime);
+ my $mime = do_cat_mail($git, $1, \$bytes) or next;
+ $add_cb->($self, $git, $mime, $bytes);
} elsif ($line =~ /$delmsg/o) {
my $mime = do_cat_mail($git, $1) or next;
$del_cb->($self, $git, $mime);
$mm->{dbh}->commit;
$mm->last_commit($lm) if defined $lm;
- goto xapian_only;
+ $lx = $self->rlog($range, *index_mm2, *unindex_mm2);
+ $db->set_metadata('last_commit', $lx) if defined $lx;
}
} else {
# user didn't install DBD::SQLite and DBI
-xapian_only:
$lx = $self->rlog($range, *index_blob, *unindex_blob);
$db->set_metadata('last_commit', $lx) if defined $lx;
}
my $enc_utf8 = find_encoding('UTF-8');
our $PFX2TERM_RE = undef;
use constant EPOCH_822 => 'Thu, 01 Jan 1970 00:00:00 +0000';
+use POSIX qw(strftime);
sub new {
my ($class, $mime) = @_;
bless { doc => $doc, mime => undef, mid => $mid }, $class;
}
+sub get_val ($$) {
+ my ($doc, $col) = @_;
+ Search::Xapian::sortable_unserialise($doc->get_value($col));
+}
+
sub load_doc {
my ($class, $doc) = @_;
my $data = $doc->get_data;
- my $ts = eval {
- no strict 'subs';
- $doc->get_value(PublicInbox::Search::TS);
- };
- $ts = Search::Xapian::sortable_unserialise($ts);
+ my $ts = get_val($doc, &PublicInbox::Search::TS);
$data = $enc_utf8->decode($data);
- my ($subj, $from, $refs) = split(/\n/, $data);
+ my ($subj, $from, $refs, $to, $cc) = split(/\n/, $data);
bless {
doc => $doc,
subject => $subj,
ts => $ts,
- from_name => $from,
- references_sorted => $refs,
+ from => $from,
+ references => $refs,
+ to => $to,
+ cc => $cc,
}, $class;
}
-sub subject {
+# :bytes and :lines metadata in RFC 3977
+sub bytes ($) { get_val($_[0]->{doc}, &PublicInbox::Search::BYTES) }
+sub lines ($) { get_val($_[0]->{doc}, &PublicInbox::Search::LINES) }
+sub num ($) { get_val($_[0]->{doc}, &PublicInbox::Search::NUM) }
+
+sub __hdr ($$) {
+ my ($self, $field) = @_;
+ my $val = $self->{$field};
+ return $val if defined $val;
+
+ my $mime = $self->{mime} or return;
+ $val = $mime->header($field);
+ $val = '' unless defined $val;
+ $val =~ tr/\t\r\n/ /;
+ $self->{$field} = $val;
+}
+
+sub subject ($) { __hdr($_[0], 'subject') }
+sub to ($) { __hdr($_[0], 'to') }
+sub cc ($) { __hdr($_[0], 'cc') }
+
+sub date ($) {
my ($self) = @_;
- my $subj = $self->{subject};
- return $subj if defined $subj;
- $subj = $self->{mime}->header('Subject');
- $subj = '' unless defined $subj;
- $subj =~ tr/\n/ /;
- $self->{subject} = $subj;
+ my $date = __hdr($self, 'date');
+ return $date if defined $date;
+ my $ts = $self->{ts};
+ return unless defined $ts;
+ $self->{date} = strftime('%a, %d %b %Y %T %z', gmtime($ts));
}
-sub from {
+sub from ($) {
my ($self) = @_;
- my $from = $self->mime->header('From') || '';
- my @from;
-
- if ($from) {
- $from =~ tr/\n/ /;
- @from = Email::Address->parse($from);
- $self->{from} = $from[0];
- $from = $from[0]->name;
+ my $from = __hdr($self, 'from');
+ if (defined $from && !defined $self->{from_name}) {
+ $from =~ tr/\t\r\n/ /;
+ my @from = Email::Address->parse($from);
+ $self->{from_name} = $from[0]->name;
}
- $self->{from_name} = $from;
- $self->{from};
+ $from;
}
sub from_name {
sub to_doc_data {
my ($self) = @_;
- PublicInbox::Search::subject_summary($self->subject) . "\n" .
- $self->from_name . "\n".
- $self->references_sorted;
+ join("\n", $self->subject, $self->from, $self->references,
+ $self->to, $self->cc);
}
-sub references_sorted {
+sub references {
my ($self) = @_;
- my $x = $self->{references_sorted};
+ my $x = $self->{references};
defined $x ? $x : '';
}
'Message-ID' => "<$self->{mid}>",
'X-PI-TS' => $self->ts,
);
- if (my $refs = $self->{references_sorted}) {
- $refs =~ s/></> </g;
+ if (my $refs = $self->{references}) {
push @h, References => $refs;
}
my $mime = Email::MIME->create(header_str => \@hs, header => \@h);
$mime;
}
-sub mid {
+sub mid ($;$) {
my ($self, $mid) = @_;
if (defined $mid) {
$ng = $old_ng;
}
- # Only valid if Msgmap works
- if ($ng->mm(1)) {
+ # Only valid if msgmap and search works
+ if ($ng->usable) {
$new->{$g} = $ng;
push @list, $ng;
}
'<nntp@example.com>',
'',
'202',
- '1' ] }, "XOVER works");
+ '1' ] }, "XOVER range works");
+
+ is_deeply($n->xover('1'), {
+ '1' => ['hihi',
+ 'Me <me@example.com>',
+ 'Thu, 01 Jan 1970 06:06:06 +0000',
+ '<nntp@example.com>',
+ '',
+ '202',
+ '1' ] }, "XOVER by article works");
+
+ {
+ syswrite($s, "OVER $mid\r\n");
+ $buf = '';
+ do {
+ sysread($s, $buf, 4096, length($buf));
+ } until ($buf =~ /^[^2]../ || $buf =~ /\r\n\.\r\n\z/);
+ my @r = split("\r\n", $buf);
+ like($r[0], qr/^224 /, 'got 224 response for OVER');
+ is($r[1], "0\thihi\tMe <me\@example.com>\t" .
+ "Thu, 01 Jan 1970 06:06:06 +0000\t" .
+ "$mid\t\t202\t1", 'OVER by Message-ID works');
+ is($r[2], '.', 'correctly terminated response');
+ }
ok(kill('TERM', $pid), 'killed nntpd');
$pid = undef;
ok($doc_id > 0, "doc_id defined with circular reference");
my $smsg = $rw->lookup_message('circle@a');
$smsg->ensure_metadata;
- is($smsg->references_sorted, '', "no references created");
+ is($smsg->references, '', "no references created");
}
done_testing();