src/qsort.h

   1 /*
   2  * Copyright (c) 2013, 2017 Alexey Tourbin
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 /*
  24  * This is a traditional Quicksort implementation which mostly follows
  25  * [Sedgewick 1978].  Sorting is performed entirely on array indices,
  26  * while actual access to the array elements is abstracted out with the
  27  * user-defined `LESS` and `SWAP` primitives.
  28  *
  29  * Synopsis:
  30  *      QSORT(N, LESS, SWAP);
  31  * where
  32  *      N - the number of elements in A[];
  33  *      LESS(i, j) - compares A[i] to A[j];
  34  *      SWAP(i, j) - exchanges A[i] with A[j].
  35  */
  36
  37 #ifndef QSORT_H
  38 #define QSORT_H
  39
  40 /* Sort 3 elements. */
  41 #define Q_SORT3(q_a1, q_a2, q_a3, Q_LESS, Q_SWAP) \
  42 do {                                    \
  43     if (Q_LESS(q_a2, q_a1)) {           \
  44         if (Q_LESS(q_a3, q_a2))         \
  45             Q_SWAP(q_a1, q_a3);         \
  46         else {                          \
  47             Q_SWAP(q_a1, q_a2);         \
  48             if (Q_LESS(q_a3, q_a2))     \
  49                 Q_SWAP(q_a2, q_a3);     \
  50         }                               \
  51     }                                   \
  52     else if (Q_LESS(q_a3, q_a2)) {      \
  53         Q_SWAP(q_a2, q_a3);             \
  54         if (Q_LESS(q_a2, q_a1))         \
  55             Q_SWAP(q_a1, q_a2);         \
  56     }                                   \
  57 } while (0)
  58
  59 /* Partition [q_l,q_r] around a pivot.  After partitioning,
  60  * [q_l,q_j] are the elements that are less than or equal to the pivot,
  61  * while [q_i,q_r] are the elements greater than or equal to the pivot. */
  62 #define Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP)         \
  63 do {                                                                    \
  64     /* The middle element, not to be confused with the median. */       \
  65     Q_UINT q_m = (q_l) + (((q_r) - (q_l)) >> 1);                        \
  66     /* Reorder the second, the middle, and the last items.              \
  67      * As [Edelkamp Weiss 2016] explain, using the second element       \
  68      * instead of the first one helps avoid bad behaviour for           \
  69      * decreasingly sorted arrays.  This method is used in recent       \
  70      * versions of gcc's std::sort, see gcc bug 58437#c13, although     \
  71      * the details are somewhat different (cf. #c14). */                \
  72     Q_SORT3((q_l) + 1, q_m, q_r, Q_LESS, Q_SWAP);                       \
  73     /* Place the median at the beginning. */                            \
  74     Q_SWAP(q_l, q_m);                                                   \
  75     /* Partition [q_l+2, q_r-1] around the median which is in q_l.      \
  76      * q_i and q_j are initially off by one, they get decremented       \
  77      * in the do-while loops. */                                        \
  78     (q_i) = (q_l) + 1; (q_j) = q_r;                                     \
  79     while (1) {                                                         \
  80         do (q_i)++; while (Q_LESS(q_i, q_l));                           \
  81         do (q_j)--; while (Q_LESS(q_l, q_j));                           \
  82         if ((q_i) >= (q_j)) break; /* Sedgewick says "until j < i" */   \
  83         Q_SWAP(q_i, q_j);                                               \
  84     }                                                                   \
  85     /* Compensate for the i==j case. */                                 \
  86     (q_i) = (q_j) + 1;                                                  \
  87     /* Put the median to its final place. */                            \
  88     Q_SWAP(q_l, q_j);                                                   \
  89     /* The median is not part of the left subfile. */                   \
  90     (q_j)--;                                                            \
  91 } while (0)
  92
  93 /* Insertion sort is applied to small subfiles - this is contrary to
  94  * Sedgewick's suggestion to run a separate insertion sort pass after
  95  * the partitioning is done.  The reason I don't like a separate pass
  96  * is that it triggers extra comparisons, because it can't see that the
  97  * medians are already in their final positions and need not be rechecked.
  98  * Since I do not assume that comparisons are cheap, I also do not try
  99  * to eliminate the (q_j > q_l) boundary check. */
 100 #define Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP)              \
 101 do {                                                                    \
 102     Q_UINT q_i, q_j;                                                    \
 103     /* For each item starting with the second... */                     \
 104     for (q_i = (q_l) + 1; q_i <= (q_r); q_i++)                          \
 105     /* move it down the array so that the first part is sorted. */      \
 106     for (q_j = q_i; q_j > (q_l) && (Q_LESS(q_j, q_j - 1)); q_j--)       \
 107         Q_SWAP(q_j, q_j - 1);                                           \
 108 } while (0)
 109
 110 /* When the size of [q_l,q_r], i.e. q_r-q_l+1, is greater than or equal to
 111  * Q_THRESH, the algorithm performs recursive partitioning.  When the size
 112  * drops below Q_THRESH, the algorithm switches to insertion sort.
 113  * The minimum valid value is probably 5 (with 5 items, the second and
 114  * the middle items, the middle itself being rounded down, are distinct). */
 115 #define Q_THRESH 16
 116
 117 /* The main loop. */
 118 #define Q_LOOP(Q_UINT, Q_N, Q_LESS, Q_SWAP)                             \
 119 do {                                                                    \
 120     Q_UINT q_l = 0;                                                     \
 121     Q_UINT q_r = (Q_N) - 1;                                             \
 122     Q_UINT q_sp = 0; /* the number of frames pushed to the stack */     \
 123     struct { Q_UINT q_l, q_r; }                                         \
 124         /* On 32-bit platforms, to sort a "char[3GB+]" array,           \
 125          * it may take full 32 stack frames.  On 64-bit CPUs,           \
 126          * though, the address space is limited to 48 bits.             \
 127          * The usage is further reduced if Q_N has a 32-bit type. */    \
 128         q_st[sizeof(Q_UINT) > 4 && sizeof(Q_N) > 4 ? 48 : 32];          \
 129     while (1) {                                                         \
 130         if (q_r - q_l + 1 >= Q_THRESH) {                                \
 131             Q_UINT q_i, q_j;                                            \
 132             Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP);    \
 133             /* Now have two subfiles: [q_l,q_j] and [q_i,q_r].          \
 134              * Dealing with them depends on which one is bigger. */     \
 135             if (q_j - q_l >= q_r - q_i)                                 \
 136                 Q_SUBFILES(q_l, q_j, q_i, q_r);                         \
 137             else                                                        \
 138                 Q_SUBFILES(q_i, q_r, q_l, q_j);                         \
 139         }                                                               \
 140         else {                                                          \
 141             Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP);         \
 142             /* Pop subfiles from the stack, until it gets empty. */     \
 143             if (q_sp == 0) break;                                       \
 144             q_sp--;                                                     \
 145             q_l = q_st[q_sp].q_l;                                       \
 146             q_r = q_st[q_sp].q_r;                                       \
 147         }                                                               \
 148     }                                                                   \
 149 } while (0)
 150
 151 /* The missing part: dealing with subfiles.
 152  * Assumes that the first subfile is not smaller than the second. */
 153 #define Q_SUBFILES(q_l1, q_r1, q_l2, q_r2)                              \
 154 do {                                                                    \
 155     /* If the second subfile is only a single element, it needs         \
 156      * no further processing.  The first subfile will be processed      \
 157      * on the next iteration (both subfiles cannot be only a single     \
 158      * element, due to Q_THRESH). */                                    \
 159     if ((q_l2) == (q_r2)) {                                             \
 160         q_l = q_l1;                                                     \
 161         q_r = q_r1;                                                     \
 162     }                                                                   \
 163     else {                                                              \
 164         /* Otherwise, both subfiles need processing.                    \
 165          * Push the larger subfile onto the stack. */                   \
 166         q_st[q_sp].q_l = q_l1;                                          \
 167         q_st[q_sp].q_r = q_r1;                                          \
 168         q_sp++;                                                         \
 169         /* Process the smaller subfile on the next iteration. */        \
 170         q_l = q_l2;                                                     \
 171         q_r = q_r2;                                                     \
 172     }                                                                   \
 173 } while (0)
 174
 175 /* And now, ladies and gentlemen, may I proudly present to you... */
 176 #define QSORT(Q_N, Q_LESS, Q_SWAP)                                      \
 177 do {                                                                    \
 178     if ((Q_N) > 1)                                                      \
 179         /* We could check sizeof(Q_N) and use "unsigned", but at least  \
 180          * on x86_64, this has the performance penalty of up to 5%. */  \
 181         Q_LOOP(unsigned long, Q_N, Q_LESS, Q_SWAP);                     \
 182 } while (0)
 183
 184 #endif
 185
 186 /* ex:set ts=8 sts=4 sw=4 noet: */