+sub add_values ($$) {
+ my ($doc, $values) = @_;
+
+ my $ts = $values->[PublicInbox::Search::TS];
+ add_val($doc, PublicInbox::Search::TS, $ts);
+
+ my $num = $values->[PublicInbox::Search::NUM];
+ defined($num) and add_val($doc, PublicInbox::Search::NUM, $num);
+
+ my $bytes = $values->[PublicInbox::Search::BYTES];
+ defined($bytes) and add_val($doc, PublicInbox::Search::BYTES, $bytes);
+
+ my $lines = $values->[PublicInbox::Search::LINES];
+ add_val($doc, PublicInbox::Search::LINES, $lines);
+
+ my $yyyymmdd = strftime('%Y%m%d', gmtime($ts));
+ add_val($doc, PublicInbox::Search::YYYYMMDD, $yyyymmdd);
+}
+
+sub index_users ($$) {
+ my ($tg, $smsg) = @_;
+
+ my $from = $smsg->from;
+ my $to = $smsg->to;
+ my $cc = $smsg->cc;
+
+ $tg->index_text($from, 1, 'A'); # A - author
+ $tg->increase_termpos;
+ $tg->index_text($to, 1, 'XTO') if $to ne '';
+ $tg->increase_termpos;
+ $tg->index_text($cc, 1, 'XCC') if $cc ne '';
+ $tg->increase_termpos;
+}
+
+sub index_diff_inc ($$$$) {
+ my ($tg, $text, $pfx, $xnq) = @_;
+ if (@$xnq) {
+ $tg->index_text(join("\n", @$xnq), 1, 'XNQ');
+ $tg->increase_termpos;
+ @$xnq = ();
+ }
+ $tg->index_text($text, 1, $pfx);
+ $tg->increase_termpos;
+}
+
+sub index_old_diff_fn {
+ my ($tg, $seen, $fa, $fb, $xnq) = @_;
+
+ # no renames or space support for traditional diffs,
+ # find the number of leading common paths to strip:
+ my @fa = split('/', $fa);
+ my @fb = split('/', $fb);
+ while (scalar(@fa) && scalar(@fb)) {
+ $fa = join('/', @fa);
+ $fb = join('/', @fb);
+ if ($fa eq $fb) {
+ unless ($seen->{$fa}++) {
+ index_diff_inc($tg, $fa, 'XDFN', $xnq);
+ }
+ return 1;
+ }
+ shift @fa;
+ shift @fb;
+ }
+ 0;
+}
+
+sub index_diff ($$$) {
+ my ($tg, $lines, $doc) = @_;
+ my %seen;
+ my $in_diff;
+ my @xnq;
+ my $xnq = \@xnq;
+ foreach (@$lines) {
+ if ($in_diff && s/^ //) { # diff context
+ index_diff_inc($tg, $_, 'XDFCTX', $xnq);
+ } elsif (/^-- $/) { # email signature begins
+ $in_diff = undef;
+ } elsif (m!^diff --git ("?a/.+) ("?b/.+)\z!) {
+ my ($fa, $fb) = ($1, $2);
+ my $fn = (split('/', git_unquote($fa), 2))[1];
+ $seen{$fn}++ or index_diff_inc($tg, $fn, 'XDFN', $xnq);
+ $fn = (split('/', git_unquote($fb), 2))[1];
+ $seen{$fn}++ or index_diff_inc($tg, $fn, 'XDFN', $xnq);
+ $in_diff = 1;
+ # traditional diff:
+ } elsif (m/^diff -(.+) (\S+) (\S+)$/) {
+ my ($opt, $fa, $fb) = ($1, $2, $3);
+ push @xnq, $_;
+ # only support unified:
+ next unless $opt =~ /[uU]/;
+ $in_diff = index_old_diff_fn($tg, \%seen, $fa, $fb,
+ $xnq);
+ } elsif (m!^--- ("?a/.+)!) {
+ my $fn = (split('/', git_unquote($1), 2))[1];
+ $seen{$fn}++ or index_diff_inc($tg, $fn, 'XDFN', $xnq);
+ $in_diff = 1;
+ } elsif (m!^\+\+\+ ("?b/.+)!) {
+ my $fn = (split('/', git_unquote($1), 2))[1];
+ $seen{$fn}++ or index_diff_inc($tg, $fn, 'XDFN', $xnq);
+ $in_diff = 1;
+ } elsif (/^--- (\S+)/) {
+ $in_diff = $1;
+ push @xnq, $_;
+ } elsif (defined $in_diff && /^\+\+\+ (\S+)/) {
+ $in_diff = index_old_diff_fn($tg, \%seen, $in_diff, $1,
+ $xnq);
+ } elsif ($in_diff && s/^\+//) { # diff added
+ index_diff_inc($tg, $_, 'XDFB', $xnq);
+ } elsif ($in_diff && s/^-//) { # diff removed
+ index_diff_inc($tg, $_, 'XDFA', $xnq);
+ } elsif (m!^index ([a-f0-9]+)\.\.([a-f0-9]+)!) {
+ my ($ba, $bb) = ($1, $2);
+ index_git_blob_id($doc, 'XDFPRE', $ba);
+ index_git_blob_id($doc, 'XDFPOST', $bb);
+ $in_diff = 1;
+ } elsif (/^@@ (?:\S+) (?:\S+) @@\s*$/) {
+ # traditional diff w/o -p
+ } elsif (/^@@ (?:\S+) (?:\S+) @@\s*(\S+.*)$/) {
+ # hunk header context
+ index_diff_inc($tg, $1, 'XDFHH', $xnq);
+ # ignore the following lines:
+ } elsif (/^(?:dis)similarity index/ ||
+ /^(?:old|new) mode/ ||
+ /^(?:deleted|new) file mode/ ||
+ /^(?:copy|rename) (?:from|to) / ||
+ /^(?:dis)?similarity index / ||
+ /^\\ No newline at end of file/ ||
+ /^Binary files .* differ/) {
+ push @xnq, $_;
+ } elsif ($_ eq '') {
+ $in_diff = undef;
+ } else {
+ push @xnq, $_;
+ warn "non-diff line: $_\n" if DEBUG && $_ ne '';
+ $in_diff = undef;
+ }
+ }
+
+ $tg->index_text(join("\n", @xnq), 1, 'XNQ');
+ $tg->increase_termpos;
+}
+
+sub index_body ($$$) {
+ my ($tg, $lines, $doc) = @_;
+ my $txt = join("\n", @$lines);
+ if ($doc) {
+ # does it look like a diff?
+ if ($txt =~ /^(?:diff|---|\+\+\+) /ms) {
+ $txt = undef;
+ index_diff($tg, $lines, $doc);
+ } else {
+ $tg->index_text($txt, 1, 'XNQ');
+ }
+ } else {
+ $tg->index_text($txt, 0, 'XQUOT');
+ }
+ $tg->increase_termpos;
+ @$lines = ();
+}
+