From 599246c3aa88ca925f854281297410b73fd6f129 Mon Sep 17 00:00:00 2001
From: Eric Wong <e@80x24.org>
Date: Mon, 21 Apr 2014 08:07:53 +0000
Subject: [PATCH] new scripts for importing slrn spools and maildirs

The old import_gmane_spool script was inflexible,
since we may import from maildir archives as well, so
get everything into maildir, first.
---
 scripts/import_gmane_spool | 61 --------------------------------------
 scripts/import_maildir     | 52 ++++++++++++++++++++++++++++++++
 scripts/slrnspool2maildir  | 45 ++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 61 deletions(-)
 delete mode 100755 scripts/import_gmane_spool
 create mode 100755 scripts/import_maildir
 create mode 100755 scripts/slrnspool2maildir

diff --git a/scripts/import_gmane_spool b/scripts/import_gmane_spool
deleted file mode 100755
index 3cda0bf6..00000000
--- a/scripts/import_gmane_spool
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/perl -w
-# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
-# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
-#
-# One-off script to convert an slrnpull news spool from gmane, usage:
-=begin usage
-	mkdir -p $HOME/.public-inbox
-	MAINREPO=/path/to/your/repo.git
-	export ORIGINAL_RECIPIENT='list@example.com'
-	git init --bare $MAINREPO
-	export GIT_CONFIG=$HOME/.public-inbox/config
-	git config publicinbox.$LISTNAME.address $ORIGINAL_RECIPIENT
-	git config publicinbox.$LISTNAME.mainrepo $MAINREPO
-	unset GIT_CONFIG
-	./import_gmane_spool SLRNPULL_ROOT/news/foo/bar
-=cut
-use strict;
-use warnings;
-use Email::Simple;
-use PublicInbox::Filter;
-use PublicInbox::Config;
-use IPC::Run qw(run);
-sub usage { "Usage:\n".join("",grep(/\t/, `head -n 24 $0`)) }
-my $spool = shift @ARGV or die usage();
-defined $ENV{ORIGINAL_RECIPIENT} or die usage();
-my @args = ('public-inbox-mda');
-
-chdir $spool or die "chdir $spool failed: $!\n";
-
-foreach my $n (sort { $a <=> $b } grep(/\d+\z/, glob("*"))) {
-	if (open my $fh, '<', $n) {
-		my $s = eval {
-			local $/;
-			Email::Simple->new(<$fh>);
-		};
-
-		# gmane rewrites Received headers, which increases spamminess
-		my @h = $s->header("Original-Received");
-		if (@h) {
-			$s->header_set("Received", @h);
-			$s->header_set("Original-Received");
-		}
-
-		# this is needed for "git rev-list --since=..." to work
-		local $ENV{GIT_COMMITTER_DATE} = $s->header('Date');
-
-		# triggers for the SA HEADER_SPAM rule
-		foreach my $drop (qw(Approved)) { $s->header_set($drop) }
-
-		# appears to be an old gmane bug:
-		$s->header_set("connect()");
-
-		my $orig = $s->as_string;
-		close $fh or die "close failed: $!\n";
-		eval { run(\@args, \$orig) };
-		die "fail $n: $?\n" if $?;
-		die "fail $n: $@\n" if $@;
-	} else {
-		warn "Failed to open $n: $!\n";
-	}
-}
diff --git a/scripts/import_maildir b/scripts/import_maildir
new file mode 100755
index 00000000..aaabe80d
--- /dev/null
+++ b/scripts/import_maildir
@@ -0,0 +1,52 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2014, Eric Wong <e@80x24.org> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+#
+# Script to import a Maildir into a public-inbox
+=begin usage
+	mkdir -p $HOME/.public-inbox
+	MAINREPO=/path/to/your/repo.git
+	export ORIGINAL_RECIPIENT='list@example.com'
+	git init --bare $MAINREPO
+	export GIT_CONFIG=$HOME/.public-inbox/config
+	git config publicinbox.$LISTNAME.address $ORIGINAL_RECIPIENT
+	git config publicinbox.$LISTNAME.mainrepo $MAINREPO
+	unset GIT_CONFIG
+	./import_maildir /path/to/maildir/
+=cut
+use strict;
+use warnings;
+use Email::Filter;
+use Date::Parse qw/str2time/;
+use IPC::Run qw/run/;
+sub usage { "Usage:\n".join('', grep(/\t/, `head -n 24 $0`)) }
+my $dir = shift @ARGV or die usage();
+defined $ENV{ORIGINAL_RECIPIENT} or die usage();
+my @mda = qw(public-inbox-mda);
+foreach my $sub (qw(cur new tmp)) {
+	-d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
+}
+
+my @msgs;
+foreach my $sub (qw(cur new)) {
+	foreach my $fn (glob("$dir/$sub/*")) {
+		open my $fh, '<', $fn or next;
+		my $f = Email::Filter->new(data => eval { local $/; <$fh> });
+		my $date = $f->simple->header('Date');
+		my $t = eval { str2time($date) };
+		$f->exit(0);
+		$f->ignore;
+		defined $t or next;
+		my @fn = split(m!/!, $fn);
+		push @msgs, [ $t, "$sub/" . pop @fn, $date ];
+	}
+}
+
+@msgs = sort { $b->[0] <=> $a->[0] } @msgs;
+while (my $ary = pop @msgs) {
+	my $fn = "$dir/$ary->[1]";
+	local $ENV{GIT_COMMITTER_DATE} = $ary->[2]; # this preserves timezone
+	run(\@mda, '<', $fn);
+}
+
+1;
diff --git a/scripts/slrnspool2maildir b/scripts/slrnspool2maildir
new file mode 100755
index 00000000..fb7a6f25
--- /dev/null
+++ b/scripts/slrnspool2maildir
@@ -0,0 +1,45 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+#
+# One-off script to convert an slrnpull news spool to Maildir
+=begin usage
+	./slrnspool2maildir SLRNPULL_ROOT/news/foo/bar /path/to/maildir/
+=cut
+use strict;
+use warnings;
+use Email::Filter;
+use Email::LocalDelivery;
+sub usage { "Usage:\n".join('',grep(/\t/, `head -n 24 $0`)) }
+my $spool = shift @ARGV or die usage();
+my $dir = shift @ARGV or die usage();
+-d $dir or die "$dir is not a directory\n";
+$dir .= '/' unless $dir =~ m!/\z!;
+foreach my $sub (qw(cur new tmp)) {
+	-d "$dir/$sub" or mkdir $sub or die "mkdir $dir/$sub failed: $!\n";
+}
+
+foreach my $n (grep(/\d+\z/, glob("$spool/*"))) {
+	if (open my $fh, '<', $n) {
+		my $f = Email::Filter->new(data => eval { local $/; <$fh> });
+		my $s = $f->simple;
+
+		# gmane rewrites Received headers, which increases spamminess
+		my @h = $s->header('Original-Received');
+		if (@h) {
+			$s->header_set('Received', @h);
+			$s->header_set('Original-Received');
+		}
+
+		# triggers for the SA HEADER_SPAM rule
+		foreach my $drop (qw(Approved)) { $s->header_set($drop) }
+
+		# appears to be an old gmane bug:
+		$s->header_set('connect()');
+
+		$f->exit(0);
+		$f->accept($dir);
+	} else {
+		warn "Failed to open $n: $!\n";
+	}
+}
-- 
2.50.0