2 # Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
3 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
7 use PublicInbox::TestCommon;
10 require_mods(qw(DBD::SQLite));
11 use_ok 'PublicInbox::LeiDedupe';
12 my $eml = eml_load('t/plack-qp.eml');
13 my $mid = $eml->header_raw('Message-ID');
14 my $different = eml_load('t/msg_iter-order.eml');
15 $different->header_set('Message-ID', $mid);
16 my $smsg = bless { ds => time }, 'PublicInbox::Smsg';
17 $smsg->populate($eml);
18 $smsg->{$_} //= '' for (qw(to cc references)) ;
20 my $lei = { opt => { dedupe => 'none' } };
21 my $dd = PublicInbox::LeiDedupe->new($lei);
23 ok(!$dd->is_dup($eml), '1st is_dup w/o dedupe');
24 ok(!$dd->is_dup($eml), '2nd is_dup w/o dedupe');
25 ok(!$dd->is_dup($different), 'different is_dup w/o dedupe');
26 ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 1');
27 ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe none 2');
29 for my $strat (undef, 'content') {
30 $lei->{opt}->{dedupe} = $strat;
31 $dd = PublicInbox::LeiDedupe->new($lei);
33 my $desc = $strat // 'default';
34 ok(!$dd->is_dup($eml), "1st is_dup with $desc dedupe");
35 ok($dd->is_dup($eml), "2nd seen with $desc dedupe");
36 ok(!$dd->is_dup($different), "different is_dup with $desc dedupe");
37 ok(!$dd->is_smsg_dup($smsg), "is_smsg_dup pass w/ $desc dedupe");
38 ok($dd->is_smsg_dup($smsg), "is_smsg_dup reject w/ $desc dedupe");
40 $lei->{opt}->{dedupe} = 'bogus';
41 eval { PublicInbox::LeiDedupe->new($lei) };
42 like($@, qr/unsupported.*bogus/, 'died on bogus strategy');
44 $lei->{opt}->{dedupe} = 'mid';
45 $dd = PublicInbox::LeiDedupe->new($lei);
47 ok(!$dd->is_dup($eml), '1st is_dup with mid dedupe');
48 ok($dd->is_dup($eml), '2nd seen with mid dedupe');
49 ok($dd->is_dup($different), 'different seen with mid dedupe');
50 ok(!$dd->is_smsg_dup($smsg), 'smsg mid dedupe pass');
51 ok($dd->is_smsg_dup($smsg), 'smsg mid dedupe reject');
53 $lei->{opt}->{dedupe} = 'oid';
54 $dd = PublicInbox::LeiDedupe->new($lei);
57 # --augment won't have OIDs:
58 ok(!$dd->is_dup($eml), '1st is_dup with oid dedupe (augment)');
59 ok($dd->is_dup($eml), '2nd seen with oid dedupe (augment)');
60 ok(!$dd->is_dup($different), 'different is_dup with mid dedupe (augment)');
61 $different->header_set('Status', 'RO');
62 ok($dd->is_dup($different), 'different seen with oid dedupe Status removed');
64 ok(!$dd->is_dup($eml, '01d'), '1st is_dup with oid dedupe');
65 ok($dd->is_dup($different, '01d'), 'different content ignored if oid matches');
66 ok($dd->is_dup($eml, '01D'), 'case insensitive oid comparison :P');
67 ok(!$dd->is_dup($eml, '01dbad'), 'case insensitive oid comparison :P');
69 $smsg->{blob} = 'dead';
70 ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe pass');
71 ok($dd->is_smsg_dup($smsg), 'smsg dedupe reject');