towctrans.c 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. #include <wchar.h>
  2. #include <wctype.h>
  3. #include <stdio.h>
  4. #define CASEMAP(u1,u2,l) { (u1), (l)-(u1), (u2)-(u1)+1 }
  5. #define CASELACE(u1,u2) CASEMAP((u1),(u2),(u1)+1)
  6. static const struct {
  7. unsigned short upper;
  8. signed char lower;
  9. unsigned char len;
  10. } casemaps[] = {
  11. CASEMAP('A','Z','a'),
  12. CASEMAP(0xc0,0xde,0xe0),
  13. CASELACE(0x0100,0x012e),
  14. CASELACE(0x0132,0x0136),
  15. CASELACE(0x0139,0x0147),
  16. CASELACE(0x014a,0x0176),
  17. CASELACE(0x0179,0x017d),
  18. CASELACE(0x370,0x372),
  19. CASEMAP(0x391,0x3a1,0x3b1),
  20. CASEMAP(0x3a3,0x3ab,0x3c3),
  21. CASEMAP(0x400,0x40f,0x450),
  22. CASEMAP(0x410,0x42f,0x430),
  23. CASELACE(0x460,0x480),
  24. CASELACE(0x48a,0x4be),
  25. CASELACE(0x4c1,0x4cd),
  26. CASELACE(0x4d0,0x50e),
  27. CASEMAP(0x531,0x556,0x561),
  28. CASELACE(0x01a0,0x01a4),
  29. CASELACE(0x01b3,0x01b5),
  30. CASELACE(0x01cd,0x01db),
  31. CASELACE(0x01de,0x01ee),
  32. CASELACE(0x01f8,0x021e),
  33. CASELACE(0x0222,0x0232),
  34. CASELACE(0x03d8,0x03ee),
  35. CASELACE(0x1e00,0x1e94),
  36. CASELACE(0x1ea0,0x1efe),
  37. CASEMAP(0x1f08,0x1f0f,0x1f00),
  38. CASEMAP(0x1f18,0x1f1d,0x1f10),
  39. CASEMAP(0x1f28,0x1f2f,0x1f20),
  40. CASEMAP(0x1f38,0x1f3f,0x1f30),
  41. CASEMAP(0x1f48,0x1f4d,0x1f40),
  42. CASEMAP(0x1f68,0x1f6f,0x1f60),
  43. CASEMAP(0x1f88,0x1f8f,0x1f80),
  44. CASEMAP(0x1f98,0x1f9f,0x1f90),
  45. CASEMAP(0x1fa8,0x1faf,0x1fa0),
  46. CASEMAP(0x1fb8,0x1fb9,0x1fb0),
  47. CASEMAP(0x1fba,0x1fbb,0x1f70),
  48. CASEMAP(0x1fc8,0x1fcb,0x1f72),
  49. CASEMAP(0x1fd8,0x1fd9,0x1fd0),
  50. CASEMAP(0x1fda,0x1fdb,0x1f76),
  51. CASEMAP(0x1fe8,0x1fe9,0x1fe0),
  52. CASEMAP(0x1fea,0x1feb,0x1f7a),
  53. CASEMAP(0x1ff8,0x1ff9,0x1f78),
  54. CASEMAP(0x1ffa,0x1ffb,0x1f7c),
  55. CASELACE(0x246,0x24e),
  56. CASELACE(0x510,0x512),
  57. CASEMAP(0x2160,0x216f,0x2170),
  58. CASEMAP(0x2c00,0x2c2e,0x2c30),
  59. CASELACE(0x2c67,0x2c6b),
  60. CASELACE(0x2c80,0x2ce2),
  61. CASELACE(0xa722,0xa72e),
  62. CASELACE(0xa732,0xa76e),
  63. CASELACE(0xa779,0xa77b),
  64. CASELACE(0xa77e,0xa786),
  65. CASEMAP(0xff21,0xff3a,0xff41),
  66. { 0,0,0 }
  67. };
  68. static const unsigned short pairs[][2] = {
  69. { 'I', 0x0131 },
  70. { 'S', 0x017f },
  71. { 0x0130, 'i' },
  72. { 0x0178, 0x00ff },
  73. { 0x0181, 0x0253 },
  74. { 0x0182, 0x0183 },
  75. { 0x0184, 0x0185 },
  76. { 0x0186, 0x0254 },
  77. { 0x0187, 0x0188 },
  78. { 0x0189, 0x0256 },
  79. { 0x018a, 0x0257 },
  80. { 0x018b, 0x018c },
  81. { 0x018e, 0x01dd },
  82. { 0x018f, 0x0259 },
  83. { 0x0190, 0x025b },
  84. { 0x0191, 0x0192 },
  85. { 0x0193, 0x0260 },
  86. { 0x0194, 0x0263 },
  87. { 0x0196, 0x0269 },
  88. { 0x0197, 0x0268 },
  89. { 0x0198, 0x0199 },
  90. { 0x019c, 0x026f },
  91. { 0x019d, 0x0272 },
  92. { 0x019f, 0x0275 },
  93. { 0x01a6, 0x0280 },
  94. { 0x01a7, 0x01a8 },
  95. { 0x01a9, 0x0283 },
  96. { 0x01ac, 0x01ad },
  97. { 0x01ae, 0x0288 },
  98. { 0x01af, 0x01b0 },
  99. { 0x01b1, 0x028a },
  100. { 0x01b2, 0x028b },
  101. { 0x01b7, 0x0292 },
  102. { 0x01b8, 0x01b9 },
  103. { 0x01bc, 0x01bd },
  104. { 0x01c4, 0x01c6 },
  105. { 0x01c4, 0x01c5 },
  106. { 0x01c5, 0x01c6 },
  107. { 0x01c7, 0x01c9 },
  108. { 0x01c7, 0x01c8 },
  109. { 0x01c8, 0x01c9 },
  110. { 0x01ca, 0x01cc },
  111. { 0x01ca, 0x01cb },
  112. { 0x01cb, 0x01cc },
  113. { 0x01f1, 0x01f3 },
  114. { 0x01f1, 0x01f2 },
  115. { 0x01f2, 0x01f3 },
  116. { 0x01f4, 0x01f5 },
  117. { 0x01f6, 0x0195 },
  118. { 0x01f7, 0x01bf },
  119. { 0x0220, 0x019e },
  120. { 0x0386, 0x03ac },
  121. { 0x0388, 0x03ad },
  122. { 0x0389, 0x03ae },
  123. { 0x038a, 0x03af },
  124. { 0x038c, 0x03cc },
  125. { 0x038e, 0x03cd },
  126. { 0x038f, 0x03ce },
  127. { 0x0399, 0x0345 },
  128. { 0x0399, 0x1fbe },
  129. { 0x03a3, 0x03c2 },
  130. { 0x03f7, 0x03f8 },
  131. { 0x03fa, 0x03fb },
  132. { 0x1e60, 0x1e9b },
  133. { 0x1f59, 0x1f51 },
  134. { 0x1f5b, 0x1f53 },
  135. { 0x1f5d, 0x1f55 },
  136. { 0x1f5f, 0x1f57 },
  137. { 0x1fbc, 0x1fb3 },
  138. { 0x1fcc, 0x1fc3 },
  139. { 0x1fec, 0x1fe5 },
  140. { 0x1ffc, 0x1ff3 },
  141. { 0x23a, 0x2c65 },
  142. { 0x23b, 0x23c },
  143. { 0x23d, 0x19a },
  144. { 0x23e, 0x2c66 },
  145. { 0x241, 0x242 },
  146. { 0x243, 0x180 },
  147. { 0x244, 0x289 },
  148. { 0x245, 0x28c },
  149. { 0x3f4, 0x3b8 },
  150. { 0x3f9, 0x3f2 },
  151. { 0x3fd, 0x37b },
  152. { 0x3fe, 0x37c },
  153. { 0x3ff, 0x37d },
  154. { 0x4c0, 0x4cf },
  155. { 0x2126, 0x3c9 },
  156. { 0x212a, 'k' },
  157. { 0x212b, 0xe5 },
  158. { 0x2132, 0x214e },
  159. { 0x2183, 0x2184 },
  160. { 0x2c60, 0x2c61 },
  161. { 0x2c62, 0x26b },
  162. { 0x2c63, 0x1d7d },
  163. { 0x2c64, 0x27d },
  164. { 0x2c6d, 0x251 },
  165. { 0x2c6e, 0x271 },
  166. { 0x2c6f, 0x250 },
  167. { 0x2c72, 0x2c73 },
  168. { 0x2c75, 0x2c76 },
  169. { 0xa77d, 0x1d79 },
  170. /* bogus greek 'symbol' letters */
  171. { 0x376, 0x377 },
  172. { 0x39c, 0xb5 },
  173. { 0x392, 0x3d0 },
  174. { 0x398, 0x3d1 },
  175. { 0x3a6, 0x3d5 },
  176. { 0x3a0, 0x3d6 },
  177. { 0x39a, 0x3f0 },
  178. { 0x3a1, 0x3f1 },
  179. { 0x395, 0x3f5 },
  180. { 0x3cf, 0x3d7 },
  181. { 0,0 }
  182. };
  183. static wchar_t __towcase(wchar_t wc, int lower)
  184. {
  185. int i;
  186. int lmul = 2*lower-1;
  187. int lmask = lower-1;
  188. if ((unsigned)wc - 0x10400 < 0x50)
  189. return wc + lmul*0x28;
  190. /* no letters with case in these large ranges */
  191. if (!iswalpha(wc)
  192. || (unsigned)wc - 0x0600 <= 0x0fff-0x0600
  193. || (unsigned)wc - 0x2e00 <= 0xa6ff-0x2e00
  194. || (unsigned)wc - 0xa800 <= 0xfeff-0xa800)
  195. return wc;
  196. /* special case because the diff between upper/lower is too big */
  197. if ((unsigned)wc - 0x10a0 < 0x26 || (unsigned)wc - 0x2d00 < 0x26)
  198. return wc + lmul*(0x2d00-0x10a0);
  199. for (i=0; casemaps[i].len; i++) {
  200. int base = casemaps[i].upper + (lmask & casemaps[i].lower);
  201. if ((unsigned)wc-base < casemaps[i].len) {
  202. if (casemaps[i].lower == 1)
  203. return wc + lower - ((wc-casemaps[i].upper)&1);
  204. return wc + lmul*casemaps[i].lower;
  205. }
  206. }
  207. for (i=0; pairs[i][1-lower]; i++) {
  208. if (pairs[i][1-lower] == wc)
  209. return pairs[i][lower];
  210. }
  211. if ((unsigned)wc - 0x10428 + (lower<<5) + (lower<<3) < 0x28)
  212. return wc - 0x28 + (lower<<10) + (lower<<6);
  213. return wc;
  214. }
  215. wint_t towupper(wint_t wc)
  216. {
  217. return __towcase(wc, 0);
  218. }
  219. wint_t towlower(wint_t wc)
  220. {
  221. return __towcase(wc, 1);
  222. }