+sub ext_canonicalize {
+ my ($location) = @_;
+ if ($location !~ m!\Ahttps?://!) {
+ PublicInbox::Config::rel2abs_collapsed($location);
+ } else {
+ require URI;
+ my $uri = URI->new($location)->canonical;
+ my $path = $uri->path . '/';
+ $path =~ tr!/!/!s; # squeeze redundant '/'
+ $uri->path($path);
+ $uri->as_string;
+ }
+}
+
+# TODO: we will probably extract glob2re into a separate module for
+# PublicInbox::Filter::Base and maybe other places
+my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
+ '[' => '[', ']' => ']', ',' => ',' );
+
+sub glob2re {
+ my ($re) = @_;
+ my $p = '';
+ my $in_bracket = 0;
+ my $qm = 0;
+ my $schema_host_port = '';
+
+ # don't glob URL-looking things that look like IPv6
+ if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) {
+ $schema_host_port = quotemeta $1; # "http://[::1]:1234"
+ }
+ my $changes = ($re =~ s!(.)!
+ $re_map{$p eq '\\' ? '' : do {
+ if ($1 eq '[') { ++$in_bracket }
+ elsif ($1 eq ']') { --$in_bracket }
+ elsif ($1 eq ',') { ++$qm } # no change
+ $p = $1;
+ }} // do {
+ $p = $1;
+ ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
+ }!sge);
+ # bashism (also supported by curl): {a,b,c} => (a|b|c)
+ $changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/
+ (my $in_braces = $2) =~ tr!,!|!;
+ $1."($in_braces)";
+ /sge);
+ ($changes - $qm) ? $schema_host_port.$re : undef;
+}
+
+# get canonicalized externals list matching $loc
+# $is_exclude denotes it's for --exclude
+# otherwise it's for --only/--include is assumed
+sub get_externals {
+ my ($self, $loc, $is_exclude) = @_;
+ return (ext_canonicalize($loc)) if -e $loc;
+ my @m;
+ my @cur = externals_each($self);
+ my $do_glob = !$self->{opt}->{globoff}; # glob by default
+ if ($do_glob && (my $re = glob2re($loc))) {
+ @m = grep(m!$re!, @cur);
+ return @m if scalar(@m);
+ } elsif (index($loc, '/') < 0) { # exact basename match:
+ @m = grep(m!/\Q$loc\E/?\z!, @cur);
+ return @m if scalar(@m) == 1;
+ } elsif ($is_exclude) { # URL, maybe:
+ my $canon = ext_canonicalize($loc);
+ @m = grep(m!\A\Q$canon\E\z!, @cur);
+ return @m if scalar(@m) == 1;
+ } else { # URL:
+ return (ext_canonicalize($loc));
+ }
+ if (scalar(@m) == 0) {
+ $self->fail("`$loc' is unknown");
+ } else {
+ $self->fail("`$loc' is ambiguous:\n", map { "\t$_\n" } @m);
+ }
+ ();
+}
+
+# TODO: does this need JSON output?
+sub lei_ls_external {
+ my ($self, $filter) = @_;
+ my $opt = $self->{opt};
+ my $do_glob = !$opt->{globoff}; # glob by default
+ my ($OFS, $ORS) = $opt->{z} ? ("\0", "\0\0") : (" ", "\n");
+ $filter //= '*';
+ my $re = $do_glob ? glob2re($filter) : undef;
+ $re //= index($filter, '/') < 0 ?
+ qr!/\Q$filter\E/?\z! : # exact basename match
+ qr/\Q$filter\E/; # grep -F semantics
+ my @ext = externals_each($self, my $boost = {});
+ @ext = $opt->{'invert-match'} ? grep(!/$re/, @ext)
+ : grep(/$re/, @ext);
+ if ($opt->{'local'} && !$opt->{remote}) {
+ @ext = grep(!m!\A[a-z\+]+://!, @ext);
+ } elsif ($opt->{remote} && !$opt->{'local'}) {
+ @ext = grep(m!\A[a-z\+]+://!, @ext);
+ }
+ for my $loc (@ext) {
+ $self->out($loc, $OFS, 'boost=', $boost->{$loc}, $ORS);