1 # Copyright (C) 2020 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
3 # IMAP search query parser. cf RFC 3501
5 # We currently compile Xapian queries to a string which is fed
6 # to Xapian's query parser. However, we may use Xapian-provided
7 # Query object API to build an optree, instead.
8 package PublicInbox::IMAPsearchqp;
10 use Parse::RecDescent;
11 use Time::Local qw(timegm);
12 use POSIX qw(strftime);
13 our $q = bless {}, __PACKAGE__; # singleton, reachable in generated P::RD
14 my @MoY = qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC);
15 my %MM = map {; $MoY[$_-1] => sprintf('%02u', $_) } (1..12);
17 # IMAP to Xapian header search key mapping
21 # TEXT => undef, # => everything
25 # BCC => 'bcc:', # TODO
27 # IMAP allows searching arbitrary headers via
28 # "HEADER $field_name $string" which gets silly expensive.
29 # We only allow the headers we already index.
32 # KEYWORD # TODO ? dfpre,dfpost,...
35 sub uid_set_xap ($$) {
36 my ($self, $seq_set) = @_;
39 my $u = $self->{imap}->range_step(\$seq_set);
40 die $u unless ref($u); # break out of the parser on error
41 push @u, "uid:$u->[0]..$u->[1]";
43 push(@{$q->{xap}}, @u > 1 ? '('.join(' OR ', @u).')' : $u[0]);
47 my ($self, $query) = @_;
48 delete $self->{sql}; # query too complex for over.sqlite3
49 push @{$self->{xap}}, $query if defined($query);
51 # looks like we can't use SQLite-only, convert SQLite UID
53 if (my $uid = delete $self->{uid}) {
54 uid_set_xap($self, $_) for @$uid;
60 my ($self, $field_name, $s) = @_; # $self == $q
61 $s =~ /\A"(.*?)"\z/s and $s = $1;
63 # AFAIK Xapian can't handle [*"] in probabilistic terms,
64 # and it relies on lowercase
65 my $xk = defined($field_name) ? ($IH2X{$field_name} // '') : '';
67 lc(join(' ', map { qq[$xk"$_"] } split(/[\*"\s]+/, $s))));
73 my $old = delete($q->{xap}) // [];
74 my $nr = push @{$q->{stack}}, $old;
75 die 'BAD deep recursion' if $nr > 10;
80 my $child = delete $q->{xap};
81 my $parent = $q->{xap} = pop @{$q->{stack}};
82 push(@$parent, @$child > 1 ? '('.join(' ', @$child).')' : $child->[0]);
88 my ($dd, $mon, $yyyy) = split(/-/, $item->{date}, 3);
89 my $mm = $MM{$mon} // die "BAD month: $mon";
90 wantarray ? ($yyyy, $mm, sprintf('%02u', $dd))
91 : timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
95 my ($self, $item) = @_;
96 my ($yyyy, $mm, $dd) = yyyymmdd($item);
97 push @{$self->{xap}}, "d:$yyyy$mm$dd..";
98 my $sql = $self->{sql} or return 1;
99 my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
100 $$sql .= " AND ds >= $ds";
104 my ($self, $item) = @_;
105 my ($yyyy, $mm, $dd) = yyyymmdd($item);
106 my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
107 my $end = $ds + 86399; # no leap day
108 my $dt_end = strftime('%Y%m%d%H%M%S', gmtime($end));
109 push @{$self->{xap}}, "dt:$yyyy$mm$dd"."000000..$dt_end";
110 my $sql = $self->{sql} or return 1;
111 $$sql .= " AND ds >= $ds AND ds <= $end";
115 my ($self, $item) = @_;
116 my ($yyyy, $mm, $dd) = yyyymmdd($item);
117 push @{$self->{xap}}, "d:..$yyyy$mm$dd";
118 my $sql = $self->{sql} or return 1;
119 my $ds = timegm(0, 0, 0, $dd, $mm - 1, $yyyy);
120 $$sql .= " AND ds <= $ds";
124 my ($self, $item) = @_;
125 my $ts = yyyymmdd($item);
126 my $end = $ts + 86399; # no leap day
127 push @{$self->{xap}}, "ts:$ts..$end";
128 my $sql = $self->{sql} or return 1;
129 $$sql .= " AND ts >= $ts AND ts <= $end";
133 my ($self, $item) = @_;
134 my $ts = yyyymmdd($item);
135 push @{$self->{xap}}, "ts:..$ts";
136 my $sql = $self->{sql} or return 1;
137 $$sql .= " AND ts <= $ts";
141 my ($self, $item) = @_;
142 my $ts = yyyymmdd($item);
143 push @{$self->{xap}}, "ts:$ts..";
144 my $sql = $self->{sql} or return 1;
145 $$sql .= " AND ts >= $ts";
149 my ($self, $seq_set) = @_;
151 push @{$q->{uid}}, $seq_set;
152 } else { # we've gone Xapian-only
153 uid_set_xap($self, $seq_set);
159 my ($self, $seq_set) = @_;
160 PublicInbox::IMAP::msn_to_uid_range(
161 $self->{msn2uid} //= $self->{imap}->msn2uid, $seq_set);
162 uid_set($self, $seq_set);
165 my $prd = Parse::RecDescent->new(<<'EOG');
167 { my $q = $PublicInbox::IMAPsearchqp::q; }
168 search_key : CHARSET(?) search_key1(s) { $return = $q }
169 search_key1 : "ALL" | "RECENT" | "UNSEEN" | "NEW"
181 | HEADER_field_name_string
194 CHARSET : 'CHARSET' charset
195 { $item{charset} =~ /\A(?:UTF-8|US-ASCII)\z/ ? 1 : die('NO [BADCHARSET]'); }
197 SENTSINCE_date : 'SENTSINCE' date { $q->SENTSINCE(\%item) }
198 SENTON_date : 'SENTON' date { $q->SENTON(\%item) }
199 SENTBEFORE_date : 'SENTBEFORE' date { $q->SENTBEFORE(\%item) }
201 SINCE_date : 'SINCE' date { $q->SINCE(\%item) }
202 ON_date : 'ON' date { $q->ON(\%item) }
203 BEFORE_date : 'BEFORE' date { $q->BEFORE(\%item) }
205 MSN_set : sequence_set { $q->msn_set($item{sequence_set}) }
206 UID_set : "UID" sequence_set { $q->uid_set($item{sequence_set}) }
207 LARGER_number : "LARGER" number { $q->xap_only("bytes:$item{number}..") }
208 SMALLER_number : "SMALLER" number { $q->xap_only("bytes:..$item{number}") }
209 # pass "NOT" through XXX is this right?
210 OP_NOT : "NOT" { $q->xap_only('NOT') }
211 NOT_search_key : OP_NOT search_key1
213 $q->xap_only('OP_OR');
214 my $cur = delete $q->{xap};
215 push @{$q->{stack}}, $cur;
218 search_key_a : search_key1
220 my $ka = delete $q->{xap};
222 push @{$q->{stack}}, $ka;
224 OR_search_keys : OP_OR search_key_a search_key1
226 my $kb = delete $q->{xap};
227 my $ka = pop @{$q->{stack}};
228 my $xap = $q->{xap} = pop @{$q->{stack}};
230 $op eq 'OP_OR' or die "BAD expected OR: $op";
231 $ka = @$ka > 1 ? '('.join(' ', @$ka).')' : $ka->[0];
232 $kb = @$kb > 1 ? '('.join(' ', @$kb).')' : $kb->[0];
233 push @$xap, "($ka OR $kb)";
235 HEADER_field_name_string : "HEADER" field_name string
237 $q->ih2x($item{field_name}, $item{string});
239 FROM_string : "FROM" string { $q->ih2x('FROM', $item{string}) }
240 TO_string : "TO" string { $q->ih2x('TO', $item{string}) }
241 CC_string : "CC" string { $q->ih2x('CC', $item{string}) }
242 BCC_string : "BCC" string { $q->ih2x('BCC', $item{string}) }
243 SUBJECT_string : "SUBJECT" string { $q->ih2x('SUBJECT', $item{string}) }
244 BODY_string : "BODY" string { $q->ih2x('BODY', $item{string}) }
245 TEXT_string : "TEXT" string { $q->ih2x(undef, $item{string}) }
246 op_subq_enter : '(' { $q->subq_enter }
247 sub_query : op_subq_enter search_key1(s) ')' { $q->subq_leave }
249 field_name : /[\x21-\x39\x3b-\x7e]+/
250 string : quoted | literal
251 literal : /[^"\(\) \t]+/ # bogus, I know
254 date : /[0123]?[0-9]-[A-Z]{3}-[0-9]{4,}/
255 sequence_set : /\A[0-9][0-9,:]*[0-9\*]?\z/
259 my ($imap, $query) = @_;
261 %$q = (sql => \$sql, imap => $imap); # imap = PublicInbox::IMAP obj
263 my $res = eval { $prd->search_key(uc($query)) };
264 return $@ if $@ && $@ =~ /\A(?:BAD|NO) /;
265 return 'BAD unexpected result' if !$res || $res != $q;
266 if (exists $q->{sql}) {
268 if (my $uid = delete $q->{uid}) {
270 for my $uid_set (@$uid) {
271 my $u = $q->{imap}->range_step(\$uid_set);
272 return $u if !ref($u);
273 push @u, "num >= $u->[0] AND num <= $u->[1]";
275 $sql .= ' AND ('.join(' OR ', @u).')';
278 $q->{xap} = join(' ', @{$q->{xap}});
280 delete @$q{qw(imap msn2uid)};