X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FSearch.pm;h=2b33b395e6340544fc0fa1ff142e9587f89235e1;hb=f76f265a851944b5dedcc3be5f3b5224b6ebda89;hp=4b3830e2fbee6b61cbb5450ceb9b92177dbf1923;hpb=28ee19c32a1ecf8e22f30e8f9de860695f4fb30c;p=public-inbox.git diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 4b3830e2..2b33b395 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -1,11 +1,12 @@ -# Copyright (C) 2015, all contributors +# Copyright (C) 2015 all contributors # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) # based on notmuch, but with no concept of folders, files or flags package PublicInbox::Search; use strict; use warnings; -use PublicInbox::SearchMsg; +use constant TS => 0; use Search::Xapian qw/:standard/; +use PublicInbox::SearchMsg; use Email::MIME; use PublicInbox::MID qw/mid_clean mid_compress/; @@ -15,7 +16,6 @@ our $REPLY_RE = qr/^re:\s+/i; our $LANG = 'english'; use constant { - TS => 0, # SCHEMA_VERSION history # 0 - initial # 1 - subject_path is lower-cased @@ -24,25 +24,31 @@ use constant { # 4 - change "Re: " normalization, avoid circular Reference ghosts # 5 - subject_path drops trailing '.' # 6 - preserve References: order in document data - SCHEMA_VERSION => 6, + # 7 - remove references and inreplyto terms + # 8 - remove redundant/unneeded document data + # 9 - disable Message-ID compression + SCHEMA_VERSION => 9, + + # n.b. FLAG_PURE_NOT is expensive not suitable for a public website + # as it could become a denial-of-service vector QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD, }; # setup prefixes my %bool_pfx_internal = ( type => 'T', # "mail" or "ghost" - mid => 'Q', # uniQue id (Message-ID or mid_compress) + thread => 'G', # newsGroup (or similar entity - e.g. a web forum name) ); my %bool_pfx_external = ( path => 'XPATH', - thread => 'G', # newsGroup (or similar entity - e.g. a web forum name) - references => 'XREFS', - inreplyto => 'XIRT', + mid => 'Q', # uniQue id (Message-ID) ); my %prob_prefix = ( subject => 'S', + s => 'S', # for mairix compatibility + m => 'Q', # 'mid' is exact, 'm' can do partial ); my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix); @@ -74,29 +80,21 @@ sub reopen { $_[0]->{xdb}->reopen } # read-only sub query { my ($self, $query_string, $opts) = @_; - my $query = $self->qp->parse_query($query_string, QP_FLAGS); + my $query; $opts ||= {}; - $opts->{relevance} = 1; + unless ($query_string eq '') { + $query = $self->qp->parse_query($query_string, QP_FLAGS); + $opts->{relevance} = 1 unless exists $opts->{relevance}; + } + $self->do_enquire($query, $opts); } sub get_subject_path { my ($self, $path, $opts) = @_; - my $query = $self->qp->parse_query("path:".mid_compress($path), 0); - $self->do_enquire($query, $opts); -} - -# given a message ID, get followups to a message -sub get_followups { - my ($self, $mid, $opts) = @_; - $mid = mid_clean($mid); - $mid = mid_compress($mid); - my $qp = $self->qp; - my $irt = $qp->parse_query("inreplyto:$mid", 0); - my $ref = $qp->parse_query("references:$mid", 0); - my $query = Search::Xapian::Query->new(OP_OR, $irt, $ref); - $self->do_enquire($query, $opts); + my $q = Search::Xapian::Query->new(xpfx("path").mid_compress($path)); + $self->do_enquire($q, $opts); } sub get_thread { @@ -104,9 +102,9 @@ sub get_thread { my $smsg = eval { $self->lookup_message($mid) }; return { total => 0, msgs => [] } unless $smsg; - my $qp = $self->qp; - my $qtid = $qp->parse_query('thread:'.$smsg->thread_id, 0); - my $qsub = $qp->parse_query('path:'.mid_compress($smsg->path), 0); + my $qtid = Search::Xapian::Query->new(xpfx('thread').$smsg->thread_id); + my $path = mid_compress($smsg->path); + my $qsub = Search::Xapian::Query->new(xpfx('path').$path); my $query = Search::Xapian::Query->new(OP_OR, $qtid, $qsub); $self->do_enquire($query, $opts); } @@ -116,18 +114,22 @@ sub get_thread { sub do_enquire { my ($self, $query, $opts) = @_; my $enquire = $self->enquire; - - $query = Search::Xapian::Query->new(OP_AND, $query, $mail_query); + if (defined $query) { + $query = Search::Xapian::Query->new(OP_AND,$query,$mail_query); + } else { + $query = $mail_query; + } $enquire->set_query($query); if ($opts->{relevance}) { - $enquire->set_sort_by_relevance_then_value(TS, 0); + $enquire->set_sort_by_relevance_then_value(TS, 1); } else { - $enquire->set_sort_by_value(TS, 0); + $enquire->set_sort_by_value_then_relevance(TS, 1); } $opts ||= {}; my $offset = $opts->{offset} || 0; my $limit = $opts->{limit} || 50; my $mset = $enquire->get_mset($offset, $limit); + return $mset if $opts->{mset}; my @msgs = map { PublicInbox::SearchMsg->load_doc($_->get_document); } $mset->items; @@ -176,7 +178,6 @@ sub date_range_processor { sub lookup_message { my ($self, $mid) = @_; $mid = mid_clean($mid); - $mid = mid_compress($mid); my $doc_id = $self->find_unique_doc_id('mid', $mid); my $smsg; @@ -268,4 +269,13 @@ sub enquire { $self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb}); } +sub mid_prefix { + my ($self, $mpfx) = @_; + my $query = eval { $self->qp->parse_query("m:$mpfx", FLAG_PARTIAL) }; + return if $@; + my $res = $self->do_enquire($query, { relevance => 1 }); + return unless $res->{total}; + [ map { $_->mid } @{$res->{msgs}} ]; +} + 1;