]> Sergey Matveev's repositories - public-inbox.git/commitdiff
lei p2q: fix /dev/null filenames, fix phrase quoting rules
authorEric Wong <e@80x24.org>
Mon, 1 Mar 2021 05:47:36 +0000 (11:47 +0600)
committerEric Wong <e@80x24.org>
Mon, 1 Mar 2021 05:52:16 +0000 (05:52 +0000)
/dev/null mis-handling was reported by Kyle Meyer.

Phrases quoting rules are also refined to avoid leaving spaces
unquoted when "phrase generator" characters exist.  Also,
context-free hunk headers no longer clobber the in_diff
state of the parser, since git can still generate those.

Link: https://public-inbox.org/meta/87k0qrrhve.fsf@kyleam.com/
lib/PublicInbox/LeiP2q.pm
t/lei-p2q.t

index d1dd125ebb858303e3740eaa2ccc1b6e80d1cf46..e7ddc852e492cfe500aa8b9ce31215f2bf498acb 100644 (file)
@@ -12,6 +12,7 @@ use PublicInbox::MsgIter qw(msg_part_text);
 use PublicInbox::Git qw(git_unquote);
 use PublicInbox::Spawn qw(popen_rd);
 use URI::Escape qw(uri_escape_utf8);
+my $FN = qr!((?:"?[^/\n]+/[^\r\n]+)|/dev/null)!;
 
 sub xphrase ($) {
        my ($s) = @_;
@@ -23,7 +24,7 @@ sub xphrase ($) {
        map {
                s/\A\s*//;
                s/\s+\z//;
-               /[\|=><,\sA-Z]/ && !m![\./:\\\@]! ? qq("$_") : $_;
+               m![^\./:\\\@\-\w]! ? qq("$_") : $_ ;
        } ($s =~ m!(\w[\|=><,\./:\\\@\-\w\s]+)!g);
 }
 
@@ -40,7 +41,7 @@ sub extract_terms { # eml->each_part callback
                        push @{$lei->{qterms}->{dfctx}}, xphrase($_);
                } elsif (/^-- $/) { # email signature begins
                        $in_diff = undef;
-               } elsif (m!^diff --git "?[^/]+/.+ "?[^/]+/.+\z!) {
+               } elsif (m!^diff --git $FN $FN!) {
                        # wait until "---" and "+++" to capture filenames
                        $in_diff = 1;
                } elsif (/^index ([a-f0-9]+)\.\.([a-f0-9]+)\b/) {
@@ -48,13 +49,16 @@ sub extract_terms { # eml->each_part callback
                        push @{$lei->{qterms}->{dfpre}}, $oa;
                        push @{$lei->{qterms}->{dfpost}}, $ob;
                        # who uses dfblob?
-               } elsif (m!^(?:---|\+{3}) ("?[^/]+/.+)!) {
+               } elsif (m!^(?:---|\+{3}) ($FN)!) {
+                       next if $1 eq '/dev/null';
                        my $fn = (split(m!/!, git_unquote($1.''), 2))[1];
                        push @{$lei->{qterms}->{dfn}}, xphrase($fn);
                } elsif ($in_diff && s/^\+//) { # diff added
                        push @{$lei->{qterms}->{dfb}}, xphrase($_);
                } elsif ($in_diff && s/^-//) { # diff removed
                        push @{$lei->{qterms}->{dfa}}, xphrase($_);
+               } elsif (/^@@ (?:\S+) (?:\S+) @@\s*$/) {
+                       # traditional diff w/o -p
                } elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)/) {
                        push @{$lei->{qterms}->{dfhh}}, xphrase($1);
                } elsif (/^(?:dis)similarity index/ ||
index 1a2c2e4f2d201ad726502a59878964277594b69f..87cf9fa7d6d9fcad15097037f5cf8610e72a8f3f 100644 (file)
@@ -25,5 +25,8 @@ test_lei(sub {
                        "dfpost:6e006fd73b OR " .
                        "dfpost:6e006fd73\n",
                '3-byte chop');
+
+       lei_ok(qw(p2q t/data/message_embed.eml --want=dfb));
+       like($lei_out, qr/\bdfb:\S+/, 'got dfb off /dev/null file');
 });
 done_testing;