X-Git-Url: http://www.git.stargrave.org/?a=blobdiff_plain;f=lib%2FPublicInbox%2FLeiExternal.pm;h=30bb1a4579c74d8245d1eec7b66b9687bd526e71;hb=4eee5af6011cc8cdefb66c9729952c7eff5c0b0b;hp=5b5f08d178d0f04d68455b0ad0964c0849c2cccb;hpb=44d9e8224729a392c278ea6254038f961c95a0e8;p=public-inbox.git diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm index 5b5f08d1..30bb1a45 100644 --- a/lib/PublicInbox/LeiExternal.pm +++ b/lib/PublicInbox/LeiExternal.pm @@ -5,13 +5,11 @@ package PublicInbox::LeiExternal; use strict; use v5.10.1; -use parent qw(Exporter); -our @EXPORT = qw(lei_ls_external lei_add_external lei_forget_external); use PublicInbox::Config; -sub _externals_each { +sub externals_each { my ($self, $cb, @arg) = @_; - my $cfg = $self->_lei_cfg(0); + my $cfg = $self->_lei_cfg; my %boost; for my $sec (grep(/\Aexternal\./, @{$cfg->{-section_order}})) { my $loc = substr($sec, length('external.')); @@ -22,25 +20,18 @@ sub _externals_each { # highest boost first, but stable for alphabetic tie break use sort 'stable'; my @order = sort { $boost{$b} <=> $boost{$a} } sort keys %boost; - return @order if !$cb; - for my $loc (@order) { - $cb->(@arg, $loc, $boost{$loc}); + if (ref($cb) eq 'CODE') { + for my $loc (@order) { + $cb->(@arg, $loc, $boost{$loc}); + } + } elsif (ref($cb) eq 'HASH') { + %$cb = %boost; } @order; # scalar or array } -sub lei_ls_external { - my ($self, @argv) = @_; - my $out = $self->{1}; - my ($OFS, $ORS) = $self->{opt}->{z} ? ("\0", "\0\0") : (" ", "\n"); - $self->_externals_each(sub { - my ($loc, $boost_val) = @_; - print $out $loc, $OFS, 'boost=', $boost_val, $ORS; - }); -} - -sub _canonicalize { - my ($location) = @_; +sub ext_canonicalize { + my $location = $_[-1]; # $_[0] may be $lei if ($location !~ m!\Ahttps?://!) { PublicInbox::Config::rel2abs_collapsed($location); } else { @@ -53,85 +44,115 @@ sub _canonicalize { } } -sub lei_add_external { - my ($self, $location) = @_; - my $cfg = $self->_lei_cfg(1); - my $new_boost = $self->{opt}->{boost} // 0; - $location = _canonicalize($location); - my $key = "external.$location.boost"; - my $cur_boost = $cfg->{$key}; - return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent - $self->lei_config($key, $new_boost); - $self->_lei_store(1)->done; # just create the store +# TODO: we will probably extract glob2re into a separate module for +# PublicInbox::Filter::Base and maybe other places +my %re_map = ( '*' => '[^/]*?', '?' => '[^/]', + '[' => '[', ']' => ']', ',' => ',' ); + +sub glob2re { + my $re = $_[-1]; # $_[0] may be $lei + my $p = ''; + my $in_bracket = 0; + my $qm = 0; + my $schema_host_port = ''; + + # don't glob URL-looking things that look like IPv6 + if ($re =~ s!\A([a-z0-9\+]+://\[[a-f0-9\:]+\](?::[0-9]+)?/)!!i) { + $schema_host_port = quotemeta $1; # "http://[::1]:1234" + } + my $changes = ($re =~ s!(.)! + $re_map{$p eq '\\' ? '' : do { + if ($1 eq '[') { ++$in_bracket } + elsif ($1 eq ']') { --$in_bracket } + elsif ($1 eq ',') { ++$qm } # no change + $p = $1; + }} // do { + $p = $1; + ($p eq '-' && $in_bracket) ? $p : (++$qm, "\Q$p") + }!sge); + # bashism (also supported by curl): {a,b,c} => (a|b|c) + $changes += ($re =~ s/([^\\]*)\\\{([^,]*,[^\\]*)\\\}/ + (my $in_braces = $2) =~ tr!,!|!; + $1."($in_braces)"; + /sge); + ($changes - $qm) ? $schema_host_port.$re : undef; } -sub lei_forget_external { - my ($self, @locations) = @_; - my $cfg = $self->_lei_cfg(1); - my $quiet = $self->{opt}->{quiet}; - for my $loc (@locations) { - my (@unset, @not_found); - for my $l ($loc, _canonicalize($loc)) { - my $key = "external.$l.boost"; - delete($cfg->{$key}); - $self->_config('--unset', $key); - if ($? == 0) { - push @unset, $l; - } elsif (($? >> 8) == 5) { - push @not_found, $l; - } else { - $self->err("# --unset $key error"); - return $self->x_it($?); - } - } - if (@unset) { - next if $quiet; - $self->err("# $_ gone") for @unset; - } elsif (@not_found) { - $self->err("# $_ not found") for @not_found; - } # else { already exited +# get canonicalized externals list matching $loc +# $is_exclude denotes it's for --exclude +# otherwise it's for --only/--include is assumed +sub get_externals { + my ($self, $loc, $is_exclude) = @_; + return (ext_canonicalize($loc)) if -e $loc; + my @m; + my @cur = externals_each($self); + my $do_glob = !$self->{opt}->{globoff}; # glob by default + if ($do_glob && (my $re = glob2re($loc))) { + @m = grep(m!$re!, @cur); + return @m if scalar(@m); + } elsif (index($loc, '/') < 0) { # exact basename match: + @m = grep(m!/\Q$loc\E/?\z!, @cur); + return @m if scalar(@m) == 1; + } elsif ($is_exclude) { # URL, maybe: + my $canon = ext_canonicalize($loc); + @m = grep(m!\A\Q$canon\E\z!, @cur); + return @m if scalar(@m) == 1; + } else { # URL: + return (ext_canonicalize($loc)); } + if (scalar(@m) == 0) { + die "`$loc' is unknown\n"; + } else { + die("`$loc' is ambiguous:\n", map { "\t$_\n" } @m, "\n"); + } +} + +sub canonicalize_excludes { + my ($lei, $excludes) = @_; + my %x; + for my $loc (@$excludes) { + my @l = get_externals($lei, $loc, 1); + $x{$_} = 1 for @l; + } + \%x; } -# shell completion helper called by lei__complete -sub _complete_forget_external { - my ($self, @argv) = @_; - my $cfg = $self->_lei_cfg(0); - my $cur = pop @argv; +# returns an anonymous sub which returns an array of potential results +sub complete_url_prepare { + my $argv = $_[-1]; # $_[0] may be $lei # Workaround bash word-splitting URLs to ['https', ':', '//' ...] # Maybe there's a better way to go about this in # contrib/completion/lei-completion.bash my $re = ''; - if (@argv) { - my @x = @argv; + my $cur = pop(@$argv) // ''; + if (@$argv) { + my @x = @$argv; if ($cur eq ':' && @x) { push @x, $cur; $cur = ''; } - while (@x > 2 && $x[0] !~ /\Ahttps?\z/ && $x[1] ne ':') { + while (@x > 2 && $x[0] !~ /\A(?:http|nntp|imap)s?\z/i && + $x[1] ne ':') { shift @x; } if (@x >= 2) { # qw(https : hostname : 443) or qw(http :) $re = join('', @x); } else { # just filter out the flags and hope for the best - $re = join('', grep(!/^-/, @argv)); + $re = join('', grep(!/^-/, @$argv)); } $re = quotemeta($re); } - # FIXME: bash completion off "http:" or "https:" when the last - # character is a colon doesn't work properly even if we're - # returning "//$HTTP_HOST/$PATH_INFO/", not sure why, could - # be a bash issue. - map { - my $x = substr($_, length('external.')); + my $match_cb = sub { + # the "//;" here (for AUTH=ANONYMOUS) interacts badly with + # bash tab completion, strip it out for now since our commands + # work w/o it. Not sure if there's a better solution... + $_[0] =~ s!//;AUTH=ANONYMOUS\@!//!i; + $_[0] =~ s!;!\\;!g; # only return the part specified on the CLI - if ($x =~ /\A$re(\Q$cur\E.*)/) { - # don't duplicate if already 100% completed - $cur eq $1 ? () : $1; - } else { - (); - } - } grep(/\Aexternal\.$re\Q$cur/, @{$cfg->{-section_order}}); + # don't duplicate if already 100% completed + $_[0] =~ /\A$re(\Q$cur\E.*)/ ? ($cur eq $1 ? () : $1) : () + }; + wantarray ? ($re, $cur, $match_cb) : $match_cb; } 1;