/dev/null mis-handling was reported by Kyle Meyer.
Phrases quoting rules are also refined to avoid leaving spaces
unquoted when "phrase generator" characters exist. Also,
context-free hunk headers no longer clobber the in_diff
state of the parser, since git can still generate those.
Link: https://public-inbox.org/meta/87k0qrrhve.fsf@kyleam.com/
use PublicInbox::Git qw(git_unquote);
use PublicInbox::Spawn qw(popen_rd);
use URI::Escape qw(uri_escape_utf8);
use PublicInbox::Git qw(git_unquote);
use PublicInbox::Spawn qw(popen_rd);
use URI::Escape qw(uri_escape_utf8);
+my $FN = qr!((?:"?[^/\n]+/[^\r\n]+)|/dev/null)!;
sub xphrase ($) {
my ($s) = @_;
sub xphrase ($) {
my ($s) = @_;
map {
s/\A\s*//;
s/\s+\z//;
map {
s/\A\s*//;
s/\s+\z//;
- /[\|=><,\sA-Z]/ && !m![\./:\\\@]! ? qq("$_") : $_;
+ m![^\./:\\\@\-\w]! ? qq("$_") : $_ ;
} ($s =~ m!(\w[\|=><,\./:\\\@\-\w\s]+)!g);
}
} ($s =~ m!(\w[\|=><,\./:\\\@\-\w\s]+)!g);
}
push @{$lei->{qterms}->{dfctx}}, xphrase($_);
} elsif (/^-- $/) { # email signature begins
$in_diff = undef;
push @{$lei->{qterms}->{dfctx}}, xphrase($_);
} elsif (/^-- $/) { # email signature begins
$in_diff = undef;
- } elsif (m!^diff --git "?[^/]+/.+ "?[^/]+/.+\z!) {
+ } elsif (m!^diff --git $FN $FN!) {
# wait until "---" and "+++" to capture filenames
$in_diff = 1;
} elsif (/^index ([a-f0-9]+)\.\.([a-f0-9]+)\b/) {
# wait until "---" and "+++" to capture filenames
$in_diff = 1;
} elsif (/^index ([a-f0-9]+)\.\.([a-f0-9]+)\b/) {
push @{$lei->{qterms}->{dfpre}}, $oa;
push @{$lei->{qterms}->{dfpost}}, $ob;
# who uses dfblob?
push @{$lei->{qterms}->{dfpre}}, $oa;
push @{$lei->{qterms}->{dfpost}}, $ob;
# who uses dfblob?
- } elsif (m!^(?:---|\+{3}) ("?[^/]+/.+)!) {
+ } elsif (m!^(?:---|\+{3}) ($FN)!) {
+ next if $1 eq '/dev/null';
my $fn = (split(m!/!, git_unquote($1.''), 2))[1];
push @{$lei->{qterms}->{dfn}}, xphrase($fn);
} elsif ($in_diff && s/^\+//) { # diff added
push @{$lei->{qterms}->{dfb}}, xphrase($_);
} elsif ($in_diff && s/^-//) { # diff removed
push @{$lei->{qterms}->{dfa}}, xphrase($_);
my $fn = (split(m!/!, git_unquote($1.''), 2))[1];
push @{$lei->{qterms}->{dfn}}, xphrase($fn);
} elsif ($in_diff && s/^\+//) { # diff added
push @{$lei->{qterms}->{dfb}}, xphrase($_);
} elsif ($in_diff && s/^-//) { # diff removed
push @{$lei->{qterms}->{dfa}}, xphrase($_);
+ } elsif (/^@@ (?:\S+) (?:\S+) @@\s*$/) {
+ # traditional diff w/o -p
} elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)/) {
push @{$lei->{qterms}->{dfhh}}, xphrase($1);
} elsif (/^(?:dis)similarity index/ ||
} elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)/) {
push @{$lei->{qterms}->{dfhh}}, xphrase($1);
} elsif (/^(?:dis)similarity index/ ||
"dfpost:6e006fd73b OR " .
"dfpost:6e006fd73\n",
'3-byte chop');
"dfpost:6e006fd73b OR " .
"dfpost:6e006fd73\n",
'3-byte chop');
+
+ lei_ok(qw(p2q t/data/message_embed.eml --want=dfb));
+ like($lei_out, qr/\bdfb:\S+/, 'got dfb off /dev/null file');