-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
package PublicInbox::LeiDedupe;
use strict;
use v5.10.1;
-use PublicInbox::SharedKV;
-use PublicInbox::ContentHash qw(content_hash);
-use Digest::SHA ();
+use PublicInbox::ContentHash qw(content_hash git_sha);
+use PublicInbox::SHA ();
# n.b. mutt sets most of these headers not sure about Bytes
our @OID_IGNORE = qw(Status X-Status Content-Length Lines Bytes);
push @stash, [ $k, \@v ];
$eml->header_set($k); # restore below
}
- my $dig = Digest::SHA->new(1); # XXX SHA256 later
- my $buf = $eml->as_string;
- $dig->add('blob '.length($buf)."\0");
- $dig->add($buf);
- undef $buf;
-
+ my $dig = git_sha(1, $eml);
for my $kv (@stash) { # restore stashed headers
my ($k, @v) = @$kv;
$eml->header_set($k, @v);
sub smsg_hash ($) {
my ($smsg) = @_;
- my $dig = Digest::SHA->new(256);
+ my $dig = PublicInbox::SHA->new(256);
my $x = join("\0", @$smsg{qw(from to cc ds subject references mid)});
utf8::encode($x);
$dig->add($x);
sub dedupe_oid ($) {
my ($skv) = @_;
(sub { # may be called in a child process
- my ($eml, $oid) = @_;
- $skv->set_maybe(_oidbin($oid) // _regen_oid($eml), '');
+ my ($eml, $oidhex) = @_;
+ $skv->set_maybe(_oidbin($oidhex) // _regen_oid($eml), '');
}, sub {
my ($smsg) = @_;
$skv->set_maybe(_oidbin($smsg->{blob}), '');
sub dedupe_mid ($) {
my ($skv) = @_;
(sub { # may be called in a child process
- my ($eml, $oid) = @_;
- # TODO: lei will support non-public messages w/o Message-ID
- my $mid = $eml->header_raw('Message-ID') // _oidbin($oid) //
+ my ($eml, $oidhex) = @_;
+ # lei supports non-public drafts w/o Message-ID
+ my $mid = $eml->header_raw('Message-ID') // _oidbin($oidhex) //
content_hash($eml);
$skv->set_maybe($mid, '');
}, sub {
sub dedupe_content ($) {
my ($skv) = @_;
(sub { # may be called in a child process
- my ($eml) = @_; # oid = $_[1], ignored
+ my ($eml) = @_; # $oidhex = $_[1], ignored
$skv->set_maybe(content_hash($eml), '');
}, sub {
my ($smsg) = @_;
return if ($dd eq 'none' && substr($dst // '', -1) eq '/');
my $m = "dedupe_$dd";
$cls->can($m) or die "unsupported dedupe strategy: $dd\n";
- my $skv = $dd eq 'none' ? undef : PublicInbox::SharedKV->new;
-
+ my $skv;
+ if ($dd ne 'none') {
+ require PublicInbox::SharedKV;
+ $skv = PublicInbox::SharedKV->new;
+ }
# [ $skv, $eml_cb, $smsg_cb, "dedupe_$dd" ]
bless [ $skv, undef, undef, $m ], $cls;
}
# returns true on seen messages according to the deduplication strategy,
# returns false if unseen
sub is_dup {
- my ($self, $eml, $oid) = @_;
- !$self->[1]->($eml, $oid);
+ my ($self, $eml, $smsg) = @_;
+ !$self->[1]->($eml, $smsg ? $smsg->{blob} : undef);
}
sub is_smsg_dup {
sub pause_dedupe {
my ($self) = @_;
- my $skv = $self->[0];
+ my $skv = $self->[0] or return;
$skv->dbh_release;
delete($skv->{dbh}) if $skv;
}
+sub has_entries {
+ my $skv = $_[0]->[0] or return undef;
+ $skv->has_entries;
+}
+
1;