-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# *-external commands of lei
package PublicInbox::LeiExternal;
use strict;
use v5.10.1;
-use parent qw(Exporter);
-our @EXPORT = qw(lei_ls_external lei_add_external lei_forget_external);
use PublicInbox::Config;
-sub _externals_each {
+sub externals_each {
my ($self, $cb, @arg) = @_;
- my $cfg = $self->_lei_cfg(0);
+ my $cfg = $self->_lei_cfg;
my %boost;
for my $sec (grep(/\Aexternal\./, @{$cfg->{-section_order}})) {
my $loc = substr($sec, length('external.'));
# highest boost first, but stable for alphabetic tie break
use sort 'stable';
my @order = sort { $boost{$b} <=> $boost{$a} } sort keys %boost;
- return @order if !$cb;
- for my $loc (@order) {
- $cb->(@arg, $loc, $boost{$loc});
+ if (ref($cb) eq 'CODE') {
+ for my $loc (@order) {
+ $cb->(@arg, $loc, $boost{$loc});
+ }
+ } elsif (ref($cb) eq 'HASH') {
+ %$cb = %boost;
}
@order; # scalar or array
}
-sub lei_ls_external {
- my ($self, @argv) = @_;
- my $out = $self->{1};
- my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n");
- $self->_externals_each(sub {
- my ($loc, $boost_val) = @_;
- print $out $loc, $OFS, 'boost=', $boost_val, $ORS;
- });
-}
-
-sub _canonicalize {
- my ($location) = @_;
+sub ext_canonicalize {
+ my $location = $_[-1]; # $_[0] may be $lei
if ($location !~ m!\Ahttps?://!) {
PublicInbox::Config::rel2abs_collapsed($location);
} else {
}
}
-sub lei_add_external {
- my ($self, $location) = @_;
- my $cfg = $self->_lei_cfg(1);
- my $new_boost = $self->{opt}->{boost} // 0;
- $location = _canonicalize($location);
- my $key = "external.$location.boost";
- my $cur_boost = $cfg->{$key};
- return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
- $self->lei_config($key, $new_boost);
- $self->_lei_store(1)->done; # just create the store
+# TODO: we will probably extract glob2re into a separate module for
+# PublicInbox::Filter::Base and maybe other places
+my %re_map = ( '*' => '[^/]*?', '?' => '[^/]',
+ '[' => '[', ']' => ']', ',' => ',' );
+
+sub glob2re {
+ my $re = $_[-1]; # $_[0] may be $lei
+ my $p = '';
+ my $in_bracket = 0;
+ my $qm = 0;
+ my $schema_host_port = '';
+
+ # don't glob URL-looking things that look like IPv6
+ if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) {
+ $schema_host_port = quotemeta $1; # "http://[::1]:1234"
+ }
+ my $changes = ($re =~ s!(.)!
+ $re_map{$p eq '\\' ? '' : do {
+ if ($1 eq '[') { ++$in_bracket }
+ elsif ($1 eq ']') { --$in_bracket }
+ elsif ($1 eq ',') { ++$qm } # no change
+ $p = $1;
+ }} // do {
+ $p = $1;
+ ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p")
+ }!sge);
+ # bashism (also supported by curl): {a,b,c} => (a|b|c)
+ $changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/
+ (my $in_braces = $2) =~ tr!,!|!;
+ $1."($in_braces)";
+ /sge);
+ ($changes - $qm) ? $schema_host_port.$re : undef;
}
-sub lei_forget_external {
- my ($self, @locations) = @_;
- my $cfg = $self->_lei_cfg(1);
- my $quiet = $self->{opt}->{quiet};
- for my $loc (@locations) {
- my (@unset, @not_found);
- for my $l ($loc, _canonicalize($loc)) {
- my $key = "external.$l.boost";
- delete($cfg->{$key});
- $self->_config('--unset', $key);
- if ($? == 0) {
- push @unset, $key;
- } elsif (($? >> 8) == 5) {
- push @not_found, $key;
- } else {
- $self->err("# --unset $key error");
- return $self->x_it($?);
- }
- }
- if (@unset) {
- next if $quiet;
- $self->err("# $_ unset") for @unset;
- } elsif (@not_found) {
- $self->err("# $_ not found") for @not_found;
- } # else { already exited
+# get canonicalized externals list matching $loc
+# $is_exclude denotes it's for --exclude
+# otherwise it's for --only/--include is assumed
+sub get_externals {
+ my ($self, $loc, $is_exclude) = @_;
+ return (ext_canonicalize($loc)) if -e $loc;
+ my @m;
+ my @cur = externals_each($self);
+ my $do_glob = !$self->{opt}->{globoff}; # glob by default
+ if ($do_glob && (my $re = glob2re($loc))) {
+ @m = grep(m!$re/?\z!, @cur);
+ return @m if scalar(@m);
+ } elsif (index($loc, '/') < 0) { # exact basename match:
+ @m = grep(m!/\Q$loc\E/?\z!, @cur);
+ return @m if scalar(@m) == 1;
+ } elsif ($is_exclude) { # URL, maybe:
+ my $canon = ext_canonicalize($loc);
+ @m = grep(m!\A\Q$canon\E\z!, @cur);
+ return @m if scalar(@m) == 1;
+ } else { # URL:
+ return (ext_canonicalize($loc));
}
+ if (scalar(@m) == 0) {
+ die "`$loc' is unknown\n";
+ } else {
+ die("`$loc' is ambiguous:\n", map { "\t$_\n" } @m, "\n");
+ }
+}
+
+sub canonicalize_excludes {
+ my ($lei, $excludes) = @_;
+ my %x;
+ for my $loc (@$excludes) {
+ my @l = get_externals($lei, $loc, 1);
+ $x{$_} = 1 for @l;
+ }
+ \%x;
}
-# shell completion helper called by lei__complete
-sub _complete_forget_external {
- my ($self, @argv) = @_;
- my $cfg = $self->_lei_cfg(0);
- my $cur = pop @argv;
+# returns an anonymous sub which returns an array of potential results
+sub complete_url_prepare {
+ my $argv = $_[-1]; # $_[0] may be $lei
# Workaround bash word-splitting URLs to ['https', ':', '//' ...]
# Maybe there's a better way to go about this in
# contrib/completion/lei-completion.bash
my $re = '';
- if (@argv) {
- my @x = @argv;
+ my $cur = pop(@$argv) // '';
+ if (@$argv) {
+ my @x = @$argv;
if ($cur eq ':' && @x) {
push @x, $cur;
$cur = '';
}
- while (@x > 2 && $x[0] !~ /\Ahttps?\z/ && $x[1] ne ':') {
+ while (@x > 2 && $x[0] !~ /\A(?:http|nntp|imap)s?\z/i &&
+ $x[1] ne ':') {
shift @x;
}
if (@x >= 2) { # qw(https : hostname : 443) or qw(http :)
$re = join('', @x);
} else { # just filter out the flags and hope for the best
- $re = join('', grep(!/^-/, @argv));
+ $re = join('', grep(!/^-/, @$argv));
}
$re = quotemeta($re);
}
- # FIXME: bash completion off "http:" or "https:" when the last
- # character is a colon doesn't work properly even if we're
- # returning "//$HTTP_HOST/$PATH_INFO/", not sure why, could
- # be a bash issue.
- map {
- my $x = substr($_, length('external.'));
+ my $match_cb = sub {
+ # the "//;" here (for AUTH=ANONYMOUS) interacts badly with
+ # bash tab completion, strip it out for now since our commands
+ # work w/o it. Not sure if there's a better solution...
+ $_[0] =~ s!//;AUTH=ANONYMOUS\@!//!i;
+ $_[0] =~ s!;!\\;!g;
# only return the part specified on the CLI
- if ($x =~ /\A$re(\Q$cur\E.*)/) {
- # don't duplicate if already 100% completed
- $cur eq $1 ? () : $1;
- } else {
- ();
- }
- } grep(/\Aexternal\.$re\Q$cur/, @{$cfg->{-section_order}});
+ # don't duplicate if already 100% completed
+ $_[0] =~ /\A$re(\Q$cur\E.*)/ ? ($cur eq $1 ? () : $1) : ()
+ };
+ wantarray ? ($re, $cur, $match_cb) : $match_cb;
}
1;