1 # Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
2 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
4 # like PublicInbox::SearchIdx, but for searching for non-mail messages.
5 # Things indexed include:
8 # * (maybe) git code repository information
9 # Expect ~100K-1M documents with no parallelism opportunities,
10 # so no sharding, here.
12 # See MiscSearch for read-only counterpart
13 package PublicInbox::MiscIdx;
16 use PublicInbox::InboxWritable;
17 use PublicInbox::Search; # for SWIG Xapian and Search::Xapian compat
18 use PublicInbox::SearchIdx qw(index_text term_generator add_val);
19 use PublicInbox::Spawn qw(nodatacow_dir);
22 use PublicInbox::MiscSearch;
23 use PublicInbox::Config;
27 my ($class, $eidx) = @_;
28 PublicInbox::SearchIdx::load_xapian_writable();
29 my $mi_dir = "$eidx->{xpfx}/misc";
30 File::Path::mkpath($mi_dir);
31 nodatacow_dir($mi_dir);
32 my $flags = $PublicInbox::SearchIdx::DB_CREATE_OR_OPEN;
33 $flags |= $PublicInbox::SearchIdx::DB_NO_SYNC if $eidx->{-no_fsync};
34 $json //= PublicInbox::Config::json();
38 indexlevel => 'full', # small DB, no point in medium?
44 my $wdb = $PublicInbox::Search::X{WritableDatabase};
45 my $xdb = eval { $wdb->new($self->{mi_dir}, $self->{flags}) };
46 croak "Failed opening $self->{mi_dir}: $@" if $@;
47 $xdb->begin_transaction;
53 my $xdb = delete $self->{xdb} or return;
54 $xdb->commit_transaction;
59 $self->{xdb} //= _begin_txn($self);
64 my ($self, $eidx_key) = @_;
65 my $xdb = $self->{xdb} //= _begin_txn($self);
66 my $head = $xdb->postlist_begin('Q'.$eidx_key);
67 my $tail = $xdb->postlist_end('Q'.$eidx_key);
68 my @docids; # only one, unless we had bugs
69 for (; $head != $tail; $head++) {
70 push @docids, $head->get_docid;
72 for my $docid (@docids) {
73 $xdb->delete_document($docid);
74 warn "I: remove inbox docid #$docid ($eidx_key)\n";
78 # adds or updates according to $eidx_key
80 my ($self, $ibx) = @_;
81 my $eidx_key = $ibx->eidx_key;
82 my $xdb = $self->{xdb} //= _begin_txn($self);
83 # Q = uniQue in Xapian terminology
84 my $head = $xdb->postlist_begin('Q'.$eidx_key);
85 my $tail = $xdb->postlist_end('Q'.$eidx_key);
87 for (; $head != $tail; $head++) {
89 my $i = $head->get_docid;
92 W: multiple inboxes keyed to `$eidx_key', deleting #$i
95 $docid = $head->get_docid;
98 $xdb->delete_document($_) for @drop; # just in case
100 my $doc = $PublicInbox::Search::X{Document}->new;
101 term_generator($self)->set_document($doc);
103 # allow sorting by modified and uidvalidity (created at)
104 add_val($doc, $PublicInbox::MiscSearch::MODIFIED, $ibx->modified);
105 add_val($doc, $PublicInbox::MiscSearch::UIDVALIDITY, $ibx->uidvalidity);
107 $doc->add_boolean_term('Q'.$eidx_key); # uniQue id
108 $doc->add_boolean_term('T'.'inbox'); # Type
110 if (defined($ibx->{newsgroup}) && $ibx->nntp_usable) {
111 $doc->add_boolean_term('T'.'newsgroup'); # additional Type
114 # force reread from disk, {description} could be loaded from {misc}
115 delete $ibx->{description};
116 my $desc = $ibx->description;
118 # description = S/Subject (or title)
120 index_text($self, $desc, 1, 'S');
121 index_text($self, $ibx->{name}, 1, 'XNAME');
125 infourl => 'XINFOURL',
128 while (my ($f, $pfx) = each %map) {
129 for my $v (@{$ibx->{$f} // []}) {
130 index_text($self, $v, 1, $pfx);
134 if (defined(my $max = $ibx->max_git_epoch)) { # v2
135 my $pfx = "/$ibx->{name}/git/";
136 for my $epoch (0..$max) {
137 my $git = $ibx->git_epoch($epoch) or return;
138 if (my $ent = $git->manifest_entry($epoch, $desc)) {
139 $data->{"$pfx$epoch.git"} = $ent;
140 $ent->{git_dir} = $git->{git_dir};
142 $git->cleanup; # ->modified starts cat-file --batch
144 } elsif (my $ent = $ibx->git->manifest_entry) { # v1
145 $ent->{git_dir} = $ibx->{inboxdir};
146 $data->{"/$ibx->{name}"} = $ent;
148 $doc->set_data($json->encode($data));
149 if (defined $docid) {
150 $xdb->replace_document($docid, $doc);
152 $xdb->add_document($doc);