]> Sergey Matveev's repositories - public-inbox.git/blob - scripts/import_gmane_spool
initial commit
[public-inbox.git] / scripts / import_gmane_spool
1 #!/usr/bin/perl -w
2 # Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
3 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
4 #
5 # One-off script to convert an slrnpull news spool from gmane
6 use strict;
7 use warnings;
8 use Parallel::ForkManager;
9 use Email::Simple;
10 use PublicInbox::Filter;
11 use IPC::Run qw(run);
12 my $usage = "import_nntp_spool SLRNPULL_ROOT/news/foo/bar MAIN_REPO FAIL_REPO";
13 my $spool = shift @ARGV or die "Usage: $usage\n";
14 my $main_repo = shift @ARGV or die "Usage: $usage\n";
15 my $fail_repo = shift @ARGV or die "Usage: $usage\n";
16 my $nproc = `nproc 2>/dev/null` || 4;
17 my $pm = Parallel::ForkManager->new($nproc);
18 my @args = ('public-inbox-mda', $main_repo, $fail_repo);
19
20 foreach my $n (<$spool/*>) {
21         $n =~ m{/\d+\z} or next;
22         $pm->start and next;
23         if (open my $fh, '<', $n) {
24                 local $/;
25                 my $s = Email::Simple->new(<$fh>);
26
27                 # gmane rewrites Received headers, which increases spamminess
28                 my @h = $s->header("Original-Received");
29                 if (@h) {
30                         $s->header_set("Received", @h);
31                         $s->header_set("Original-Received");
32                 }
33
34                 # triggers for the SA HEADER_SPAM rule
35                 foreach my $drop (qw(Approved)) { $s->header_set($drop) }
36
37                 # appears to be an old gmane bug:
38                 $s->header_set("connect()");
39
40                 my $orig = $s->as_string;
41                 close $fh or die "close failed: $!\n";
42                 eval { run(\@args, \$orig) };
43                 die "fail $n: $?\n" if $?;
44                 die "fail $n: $@\n" if $@;
45         } else {
46                 warn "Failed to open $n: $!\n";
47         }
48         $pm->finish;
49 }
50
51 $pm->wait_all_children;