]> Sergey Matveev's repositories - public-inbox.git/commitdiff
new scripts for importing slrn spools and maildirs
authorEric Wong <e@80x24.org>
Mon, 21 Apr 2014 08:07:53 +0000 (08:07 +0000)
committerEric Wong <e@80x24.org>
Mon, 21 Apr 2014 09:33:44 +0000 (09:33 +0000)
The old import_gmane_spool script was inflexible,
since we may import from maildir archives as well, so
get everything into maildir, first.

scripts/import_gmane_spool [deleted file]
scripts/import_maildir [new file with mode: 0755]
scripts/slrnspool2maildir [new file with mode: 0755]

diff --git a/scripts/import_gmane_spool b/scripts/import_gmane_spool
deleted file mode 100755 (executable)
index 3cda0bf..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/perl -w
-# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
-# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
-#
-# One-off script to convert an slrnpull news spool from gmane, usage:
-=begin usage
-       mkdir -p $HOME/.public-inbox
-       MAINREPO=/path/to/your/repo.git
-       export ORIGINAL_RECIPIENT='list@example.com'
-       git init --bare $MAINREPO
-       export GIT_CONFIG=$HOME/.public-inbox/config
-       git config publicinbox.$LISTNAME.address $ORIGINAL_RECIPIENT
-       git config publicinbox.$LISTNAME.mainrepo $MAINREPO
-       unset GIT_CONFIG
-       ./import_gmane_spool SLRNPULL_ROOT/news/foo/bar
-=cut
-use strict;
-use warnings;
-use Email::Simple;
-use PublicInbox::Filter;
-use PublicInbox::Config;
-use IPC::Run qw(run);
-sub usage { "Usage:\n".join("",grep(/\t/, `head -n 24 $0`)) }
-my $spool = shift @ARGV or die usage();
-defined $ENV{ORIGINAL_RECIPIENT} or die usage();
-my @args = ('public-inbox-mda');
-
-chdir $spool or die "chdir $spool failed: $!\n";
-
-foreach my $n (sort { $a <=> $b } grep(/\d+\z/, glob("*"))) {
-       if (open my $fh, '<', $n) {
-               my $s = eval {
-                       local $/;
-                       Email::Simple->new(<$fh>);
-               };
-
-               # gmane rewrites Received headers, which increases spamminess
-               my @h = $s->header("Original-Received");
-               if (@h) {
-                       $s->header_set("Received", @h);
-                       $s->header_set("Original-Received");
-               }
-
-               # this is needed for "git rev-list --since=..." to work
-               local $ENV{GIT_COMMITTER_DATE} = $s->header('Date');
-
-               # triggers for the SA HEADER_SPAM rule
-               foreach my $drop (qw(Approved)) { $s->header_set($drop) }
-
-               # appears to be an old gmane bug:
-               $s->header_set("connect()");
-
-               my $orig = $s->as_string;
-               close $fh or die "close failed: $!\n";
-               eval { run(\@args, \$orig) };
-               die "fail $n: $?\n" if $?;
-               die "fail $n: $@\n" if $@;
-       } else {
-               warn "Failed to open $n: $!\n";
-       }
-}
diff --git a/scripts/import_maildir b/scripts/import_maildir
new file mode 100755 (executable)
index 0000000..aaabe80
--- /dev/null
@@ -0,0 +1,52 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2014, Eric Wong <e@80x24.org> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+#
+# Script to import a Maildir into a public-inbox
+=begin usage
+       mkdir -p $HOME/.public-inbox
+       MAINREPO=/path/to/your/repo.git
+       export ORIGINAL_RECIPIENT='list@example.com'
+       git init --bare $MAINREPO
+       export GIT_CONFIG=$HOME/.public-inbox/config
+       git config publicinbox.$LISTNAME.address $ORIGINAL_RECIPIENT
+       git config publicinbox.$LISTNAME.mainrepo $MAINREPO
+       unset GIT_CONFIG
+       ./import_maildir /path/to/maildir/
+=cut
+use strict;
+use warnings;
+use Email::Filter;
+use Date::Parse qw/str2time/;
+use IPC::Run qw/run/;
+sub usage { "Usage:\n".join('', grep(/\t/, `head -n 24 $0`)) }
+my $dir = shift @ARGV or die usage();
+defined $ENV{ORIGINAL_RECIPIENT} or die usage();
+my @mda = qw(public-inbox-mda);
+foreach my $sub (qw(cur new tmp)) {
+       -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
+}
+
+my @msgs;
+foreach my $sub (qw(cur new)) {
+       foreach my $fn (glob("$dir/$sub/*")) {
+               open my $fh, '<', $fn or next;
+               my $f = Email::Filter->new(data => eval { local $/; <$fh> });
+               my $date = $f->simple->header('Date');
+               my $t = eval { str2time($date) };
+               $f->exit(0);
+               $f->ignore;
+               defined $t or next;
+               my @fn = split(m!/!, $fn);
+               push @msgs, [ $t, "$sub/" . pop @fn, $date ];
+       }
+}
+
+@msgs = sort { $b->[0] <=> $a->[0] } @msgs;
+while (my $ary = pop @msgs) {
+       my $fn = "$dir/$ary->[1]";
+       local $ENV{GIT_COMMITTER_DATE} = $ary->[2]; # this preserves timezone
+       run(\@mda, '<', $fn);
+}
+
+1;
diff --git a/scripts/slrnspool2maildir b/scripts/slrnspool2maildir
new file mode 100755 (executable)
index 0000000..fb7a6f2
--- /dev/null
@@ -0,0 +1,45 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+#
+# One-off script to convert an slrnpull news spool to Maildir
+=begin usage
+       ./slrnspool2maildir SLRNPULL_ROOT/news/foo/bar /path/to/maildir/
+=cut
+use strict;
+use warnings;
+use Email::Filter;
+use Email::LocalDelivery;
+sub usage { "Usage:\n".join('',grep(/\t/, `head -n 24 $0`)) }
+my $spool = shift @ARGV or die usage();
+my $dir = shift @ARGV or die usage();
+-d $dir or die "$dir is not a directory\n";
+$dir .= '/' unless $dir =~ m!/\z!;
+foreach my $sub (qw(cur new tmp)) {
+       -d "$dir/$sub" or mkdir $sub or die "mkdir $dir/$sub failed: $!\n";
+}
+
+foreach my $n (grep(/\d+\z/, glob("$spool/*"))) {
+       if (open my $fh, '<', $n) {
+               my $f = Email::Filter->new(data => eval { local $/; <$fh> });
+               my $s = $f->simple;
+
+               # gmane rewrites Received headers, which increases spamminess
+               my @h = $s->header('Original-Received');
+               if (@h) {
+                       $s->header_set('Received', @h);
+                       $s->header_set('Original-Received');
+               }
+
+               # triggers for the SA HEADER_SPAM rule
+               foreach my $drop (qw(Approved)) { $s->header_set($drop) }
+
+               # appears to be an old gmane bug:
+               $s->header_set('connect()');
+
+               $f->exit(0);
+               $f->accept($dir);
+       } else {
+               warn "Failed to open $n: $!\n";
+       }
+}