qsort.c 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. /* Copyright (C) 2011 by Valentin Ochs
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to
  5. * deal in the Software without restriction, including without limitation the
  6. * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  7. * sell copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  18. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  19. * IN THE SOFTWARE.
  20. */
  21. /* Minor changes by Rich Felker for integration in musl, 2011-04-27. */
  22. /* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1).
  23. Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */
  24. #include <stdint.h>
  25. #include <stdlib.h>
  26. #include <string.h>
  27. #include "atomic.h"
  28. #define ntz(x) a_ctz_l((x))
  29. typedef int (*cmpfun)(const void *, const void *);
  30. static inline int pntz(size_t p[2]) {
  31. int r = ntz(p[0] - 1);
  32. if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) {
  33. return r;
  34. }
  35. return 0;
  36. }
  37. static void cycle(size_t width, unsigned char* ar[], int n)
  38. {
  39. unsigned char tmp[256];
  40. size_t l;
  41. int i;
  42. if(n < 2) {
  43. return;
  44. }
  45. ar[n] = tmp;
  46. while(width) {
  47. l = sizeof(tmp) < width ? sizeof(tmp) : width;
  48. memcpy(ar[n], ar[0], l);
  49. for(i = 0; i < n; i++) {
  50. memcpy(ar[i], ar[i + 1], l);
  51. ar[i] += l;
  52. }
  53. width -= l;
  54. }
  55. }
  56. /* shl() and shr() need n > 0 */
  57. static inline void shl(size_t p[2], int n)
  58. {
  59. if(n >= 8 * sizeof(size_t)) {
  60. n -= 8 * sizeof(size_t);
  61. p[1] = p[0];
  62. p[0] = 0;
  63. }
  64. p[1] <<= n;
  65. p[1] |= p[0] >> (sizeof(size_t) * 8 - n);
  66. p[0] <<= n;
  67. }
  68. static inline void shr(size_t p[2], int n)
  69. {
  70. if(n >= 8 * sizeof(size_t)) {
  71. n -= 8 * sizeof(size_t);
  72. p[0] = p[1];
  73. p[1] = 0;
  74. }
  75. p[0] >>= n;
  76. p[0] |= p[1] << (sizeof(size_t) * 8 - n);
  77. p[1] >>= n;
  78. }
  79. static void sift(unsigned char *head, size_t width, cmpfun cmp, int pshift, size_t lp[])
  80. {
  81. unsigned char *rt, *lf;
  82. unsigned char *ar[14 * sizeof(size_t) + 1];
  83. int i = 1;
  84. ar[0] = head;
  85. while(pshift > 1) {
  86. rt = head - width;
  87. lf = head - width - lp[pshift - 2];
  88. if((*cmp)(ar[0], lf) >= 0 && (*cmp)(ar[0], rt) >= 0) {
  89. break;
  90. }
  91. if((*cmp)(lf, rt) >= 0) {
  92. ar[i++] = lf;
  93. head = lf;
  94. pshift -= 1;
  95. } else {
  96. ar[i++] = rt;
  97. head = rt;
  98. pshift -= 2;
  99. }
  100. }
  101. cycle(width, ar, i);
  102. }
  103. static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2], int pshift, int trusty, size_t lp[])
  104. {
  105. unsigned char *stepson,
  106. *rt, *lf;
  107. size_t p[2];
  108. unsigned char *ar[14 * sizeof(size_t) + 1];
  109. int i = 1;
  110. int trail;
  111. p[0] = pp[0];
  112. p[1] = pp[1];
  113. ar[0] = head;
  114. while(p[0] != 1 || p[1] != 0) {
  115. stepson = head - lp[pshift];
  116. if((*cmp)(stepson, ar[0]) <= 0) {
  117. break;
  118. }
  119. if(!trusty && pshift > 1) {
  120. rt = head - width;
  121. lf = head - width - lp[pshift - 2];
  122. if((*cmp)(rt, stepson) >= 0 || (*cmp)(lf, stepson) >= 0) {
  123. break;
  124. }
  125. }
  126. ar[i++] = stepson;
  127. head = stepson;
  128. trail = pntz(p);
  129. shr(p, trail);
  130. pshift += trail;
  131. trusty = 0;
  132. }
  133. if(!trusty) {
  134. cycle(width, ar, i);
  135. sift(head, width, cmp, pshift, lp);
  136. }
  137. }
  138. void qsort(void *base, size_t nel, size_t width, cmpfun cmp)
  139. {
  140. size_t lp[12*sizeof(size_t)];
  141. size_t i, size = width * nel;
  142. unsigned char *head, *high;
  143. size_t p[2] = {1, 0};
  144. int pshift = 1;
  145. int trail;
  146. if (!size) return;
  147. head = base;
  148. high = head + size - width;
  149. /* Precompute Leonardo numbers, scaled by element width */
  150. for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++);
  151. while(head < high) {
  152. if((p[0] & 3) == 3) {
  153. sift(head, width, cmp, pshift, lp);
  154. shr(p, 2);
  155. pshift += 2;
  156. } else {
  157. if(lp[pshift - 1] >= high - head) {
  158. trinkle(head, width, cmp, p, pshift, 0, lp);
  159. } else {
  160. sift(head, width, cmp, pshift, lp);
  161. }
  162. if(pshift == 1) {
  163. shl(p, 1);
  164. pshift = 0;
  165. } else {
  166. shl(p, pshift - 1);
  167. pshift = 1;
  168. }
  169. }
  170. p[0] |= 1;
  171. head += width;
  172. }
  173. trinkle(head, width, cmp, p, pshift, 0, lp);
  174. while(pshift != 1 || p[0] != 1 || p[1] != 0) {
  175. if(pshift <= 1) {
  176. trail = pntz(p);
  177. shr(p, trail);
  178. pshift += trail;
  179. } else {
  180. shl(p, 2);
  181. pshift -= 2;
  182. p[0] ^= 7;
  183. shr(p, 1);
  184. trinkle(head - lp[pshift] - width, width, cmp, p, pshift + 1, 1, lp);
  185. shl(p, 1);
  186. p[0] |= 1;
  187. trinkle(head - width, width, cmp, p, pshift, 1, lp);
  188. }
  189. head -= width;
  190. }
  191. }