]> Sergey Matveev's repositories - public-inbox.git/blob - scripts/import_gmane_spool
scripts/import_gmane_spool: misc updates
[public-inbox.git] / scripts / import_gmane_spool
1 #!/usr/bin/perl -w
2 # Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
3 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
4 #
5 # One-off script to convert an slrnpull news spool from gmane, usage:
6 =begin usage
7         mkdir -p $HOME/.public-inbox
8         MAINREPO=/path/to/your/repo.git
9         export RECIPIENT='list@example.com'
10         git init --bare $MAINREPO
11         export GIT_CONFIG=$HOME/.public-inbox/config
12         git config publicinbox.$LISTNAME.address $RECIPIENT
13         git config publicinbox.$LISTNAME.mainrepo $MAINREPO
14         unset GIT_CONFIG
15         ./import_gmane_spool SLRNPULL_ROOT/news/foo/bar
16 =cut
17 use strict;
18 use warnings;
19 use Parallel::ForkManager;
20 use Email::Simple;
21 use PublicInbox::Filter;
22 use PublicInbox::Config;
23 use IPC::Run qw(run);
24 sub usage { "Usage:\n".join("",grep(/\t/, `head -n 24 $0`)) }
25 my $spool = shift @ARGV or die usage();
26 my $nproc = `nproc 2>/dev/null` || 4;
27 my $pm = Parallel::ForkManager->new($nproc);
28 defined $ENV{RECIPIENT} or die usage();
29 my @args = ('public-inbox-mda');
30
31 foreach my $n (glob("$spool/*")) {
32         $n =~ m{/\d+\z} or next;
33         $pm->start and next;
34         if (open my $fh, '<', $n) {
35                 my $s = eval {
36                         local $/;
37                         Email::Simple->new(<$fh>);
38                 };
39
40                 # gmane rewrites Received headers, which increases spamminess
41                 my @h = $s->header("Original-Received");
42                 if (@h) {
43                         $s->header_set("Received", @h);
44                         $s->header_set("Original-Received");
45                 }
46
47                 # triggers for the SA HEADER_SPAM rule
48                 foreach my $drop (qw(Approved)) { $s->header_set($drop) }
49
50                 # appears to be an old gmane bug:
51                 $s->header_set("connect()");
52
53                 my $orig = $s->as_string;
54                 close $fh or die "close failed: $!\n";
55                 eval { run(\@args, \$orig) };
56                 die "fail $n: $?\n" if $?;
57                 die "fail $n: $@\n" if $@;
58         } else {
59                 warn "Failed to open $n: $!\n";
60         }
61         $pm->finish;
62 }
63
64 $pm->wait_all_children;