2 # Copyright (C) 2018-2020 all contributors <meta@public-inbox.org>
3 # License: AGPL-3.0+ <http://www.gnu.org/licenses/agpl-3.0.txt>
6 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
7 my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term:
8 usage: public-inbox-convert [options] OLD NEW
10 convert v1 format inboxes to v2
14 --no-index do not index after conversion
15 --jobs=NUM set shards (NUM=0)
16 --verbose | -v increase verbosity (may be repeated)
18 index options (see public-inbox-index(1) man page for full description):
20 --no-fsync speed up indexing, risk corruption on power outage
21 -L LEVEL `basic', `medium', or `full' (default: full)
22 --compact | -c run public-inbox-compact(1) after indexing
23 --sequential-shard index Xapian shards sequentially for slow storage
24 --batch-size=BYTES flush changes to OS after a given number of bytes
25 --max-size=BYTES do not index messages larger than the given size
27 See public-inbox-convert(1) man page for full documentation.
33 quiet => -1, compact => 0, maxsize => undef, fsync => 1,
34 reindex => 1, # we always reindex
36 GetOptions($opt, qw(jobs|j=i index! help|h),
38 qw(verbose|v+ rethread compact|c+ fsync|sync!
39 indexlevel|index-level|L=s max_size|max-size=s
40 batch_size|batch-size=s
41 sequential_shard|sequential-shard|seq-shard
43 if ($opt->{help}) { print $help; exit 0 };
44 my $old_dir = shift(@ARGV) // '';
45 my $new_dir = shift(@ARGV) // '';
46 die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq '');
47 die "$new_dir exists\n" if -d $new_dir;
48 die "$old_dir not a directory\n" unless -d $old_dir;
51 Cwd->import('abs_path');
52 require PublicInbox::Config;
53 require PublicInbox::InboxWritable;
55 $old_dir = abs_path($old_dir);
56 my $cfg = PublicInbox::Config->new;
58 $cfg->each_inbox(sub {
59 $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir;
62 $old = PublicInbox::InboxWritable->new($old);
64 warn "W: $old_dir not configured in " .
65 PublicInbox::Config::default_file() . "\n";
66 $old = PublicInbox::InboxWritable->new({
69 -primary_address => 'old@example.com',
70 address => [ 'old@example.com' ],
73 die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2;
75 require PublicInbox::Admin;
76 my $detected = PublicInbox::Admin::detect_indexlevel($old);
77 $old->{indexlevel} //= $detected;
79 if ($opt->{'index'}) {
81 PublicInbox::Admin::scan_ibx_modules($mods, $old);
82 PublicInbox::Admin::require_or_die(keys %$mods);
83 PublicInbox::Admin::progress_prepare($opt);
84 $env = PublicInbox::Admin::index_prepare($opt, $cfg);
86 local %ENV = (%$env, %ENV) if $env;
88 $new->{inboxdir} = abs_path($new_dir);
90 $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} });
91 $new->{-no_fsync} = 1 if !$opt->{fsync};
95 sub link_or_copy ($$) {
97 link($src, $dst) and return;
98 $!{EXDEV} or warn "link $src, $dst failed: $!, trying cp\n";
99 require File::Copy; # preserves permissions:
100 File::Copy::cp($src, $dst) or die "cp $src, $dst failed: $!\n";
103 $old->with_umask(sub {
104 my $old_cfg = "$old->{inboxdir}/config";
105 local $ENV{GIT_CONFIG} = $old_cfg;
106 my $new_cfg = "$new->{inboxdir}/all.git/config";
107 $v2w = $new->importer(1);
108 $v2w->init_inbox(delete $opt->{jobs});
110 link_or_copy($old_cfg, $new_cfg);
111 if (my $alt = $new->{altid}) {
112 require PublicInbox::AltId;
113 foreach my $i (0..$#$alt) {
114 my $src = PublicInbox::AltId->new($old, $alt->[$i], 0);
115 $src = $src->mm_alt or next;
116 $src = $src->{dbh}->sqlite_db_filename;
117 my $dst = PublicInbox::AltId->new($new, $alt->[$i], 1);
118 $dst->mm_alt->{dbh}->sqlite_backup_from_file($src);
121 my $desc = "$old->{inboxdir}/description";
122 link_or_copy($desc, "$new->{inboxdir}/description") if -e $desc;
123 my $clone = "$old->{inboxdir}/cloneurl";
126 $clone may not be valid after migrating to v2, not copying
131 my $head = $old->{ref_head} || 'HEAD';
132 my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head);
133 $v2w->idx_init($opt);
134 my $im = $v2w->importer;
135 my ($r, $w) = $im->gfi_start;
140 if ($_ eq "blob\n") {
142 } elsif (/^commit /) {
144 } elsif (/^data ([0-9]+)/) {
146 print $w $_ or $im->wfail;
148 my $n = read($rd, my $tmp, $len) or die "read: $!";
149 warn "$n != $len\n" if $n != $len;
151 print $w $tmp or $im->wfail;
154 } elsif ($state eq 'commit') {
155 if (m{^M 100644 :([0-9]+) (${h}{2}/${h}{38})}o) {
156 my ($mark, $path) = ($1, $2);
158 if ($last && $last ne 'm') {
159 print $w "D $last\n" or $im->wfail;
161 print $w "M 100644 :$mark m\n" or $im->wfail;
165 if (m{^D (${h}{2}/${h}{38})}o) {
166 my $mark = delete $D{$1};
167 defined $mark or die "undeleted path: $1\n";
168 if ($last && $last ne 'd') {
169 print $w "D $last\n" or $im->wfail;
171 print $w "M 100644 :$mark d\n" or $im->wfail;
176 last if $_ eq "done\n";
177 print $w $_ or $im->wfail;
179 close $rd or die "close fast-export: $!\n";
180 waitpid($pid, 0) or die "waitpid failed: $!\n";
181 $? == 0 or die "fast-export failed: $?\n";
182 $r = $w = undef; # v2w->done does the actual close and error checking
184 if (my $old_mm = $old->mm) {
186 $old_mm = $old_mm->{dbh}->sqlite_db_filename;
188 # we want to trigger a reindex, not a from scratch index if
189 # we're reusing the msgmap from an existing v1 installation.
190 $v2w->idx_init($opt);
191 $v2w->{mm}->{dbh}->sqlite_backup_from_file($old_mm);
193 my $epoch0 = PublicInbox::Git->new($v2w->git_init(0));
194 chop(my $cmt = $epoch0->qx(qw(rev-parse --verify), $head));
195 $v2w->last_epoch_commit(0, $cmt);
197 $v2w->index_sync($opt) if delete $opt->{'index'};