From: Eric Wong Date: Mon, 1 Mar 2021 05:47:36 +0000 (+0600) Subject: lei p2q: fix /dev/null filenames, fix phrase quoting rules X-Git-Tag: v1.7.0~1045 X-Git-Url: http://www.git.stargrave.org/?a=commitdiff_plain;h=5b0ff78b53a796a54f8a8d7402bd04bcd2235b14;p=public-inbox.git lei p2q: fix /dev/null filenames, fix phrase quoting rules /dev/null mis-handling was reported by Kyle Meyer. Phrases quoting rules are also refined to avoid leaving spaces unquoted when "phrase generator" characters exist. Also, context-free hunk headers no longer clobber the in_diff state of the parser, since git can still generate those. Link: https://public-inbox.org/meta/87k0qrrhve.fsf@kyleam.com/ --- diff --git a/lib/PublicInbox/LeiP2q.pm b/lib/PublicInbox/LeiP2q.pm index d1dd125e..e7ddc852 100644 --- a/lib/PublicInbox/LeiP2q.pm +++ b/lib/PublicInbox/LeiP2q.pm @@ -12,6 +12,7 @@ use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::Git qw(git_unquote); use PublicInbox::Spawn qw(popen_rd); use URI::Escape qw(uri_escape_utf8); +my $FN = qr!((?:"?[^/\n]+/[^\r\n]+)|/dev/null)!; sub xphrase ($) { my ($s) = @_; @@ -23,7 +24,7 @@ sub xphrase ($) { map { s/\A\s*//; s/\s+\z//; - /[\|=><,\sA-Z]/ && !m![\./:\\\@]! ? qq("$_") : $_; + m![^\./:\\\@\-\w]! ? qq("$_") : $_ ; } ($s =~ m!(\w[\|=><,\./:\\\@\-\w\s]+)!g); } @@ -40,7 +41,7 @@ sub extract_terms { # eml->each_part callback push @{$lei->{qterms}->{dfctx}}, xphrase($_); } elsif (/^-- $/) { # email signature begins $in_diff = undef; - } elsif (m!^diff --git "?[^/]+/.+ "?[^/]+/.+\z!) { + } elsif (m!^diff --git $FN $FN!) { # wait until "---" and "+++" to capture filenames $in_diff = 1; } elsif (/^index ([a-f0-9]+)\.\.([a-f0-9]+)\b/) { @@ -48,13 +49,16 @@ sub extract_terms { # eml->each_part callback push @{$lei->{qterms}->{dfpre}}, $oa; push @{$lei->{qterms}->{dfpost}}, $ob; # who uses dfblob? - } elsif (m!^(?:---|\+{3}) ("?[^/]+/.+)!) { + } elsif (m!^(?:---|\+{3}) ($FN)!) { + next if $1 eq '/dev/null'; my $fn = (split(m!/!, git_unquote($1.''), 2))[1]; push @{$lei->{qterms}->{dfn}}, xphrase($fn); } elsif ($in_diff && s/^\+//) { # diff added push @{$lei->{qterms}->{dfb}}, xphrase($_); } elsif ($in_diff && s/^-//) { # diff removed push @{$lei->{qterms}->{dfa}}, xphrase($_); + } elsif (/^@@ (?:\S+) (?:\S+) @@\s*$/) { + # traditional diff w/o -p } elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)/) { push @{$lei->{qterms}->{dfhh}}, xphrase($1); } elsif (/^(?:dis)similarity index/ || diff --git a/t/lei-p2q.t b/t/lei-p2q.t index 1a2c2e4f..87cf9fa7 100644 --- a/t/lei-p2q.t +++ b/t/lei-p2q.t @@ -25,5 +25,8 @@ test_lei(sub { "dfpost:6e006fd73b OR " . "dfpost:6e006fd73\n", '3-byte chop'); + + lei_ok(qw(p2q t/data/message_embed.eml --want=dfb)); + like($lei_out, qr/\bdfb:\S+/, 'got dfb off /dev/null file'); }); done_testing;