瀏覽代碼

update case mappings to unicode 10.0

the mapping tables and code are not automatically generated; they were
produced by comparing the output of towupper/towlower against the
mappings in the UCD, ignoring characters that were previously excluded
from case mappings or from alphabetic status (micro sign and circled
letters), and adding table entries or code for everything else
missing.

based very loosely on a patch by Reini Urban.
Rich Felker 7 年之前
父節點
當前提交
54941eddfd
共有 1 個文件被更改,包括 41 次插入2 次删除
  1. 41 2
      src/ctype/towctrans.c

+ 41 - 2
src/ctype/towctrans.c

@@ -29,7 +29,7 @@ static const struct {
 	CASELACE(0x4c1,0x4cd),
 	CASELACE(0x4c1,0x4cd),
 	CASELACE(0x4d0,0x50e),
 	CASELACE(0x4d0,0x50e),
 
 
-	CASELACE(0x514,0x526),
+	CASELACE(0x514,0x52e),
 	CASEMAP(0x531,0x556,0x561),
 	CASEMAP(0x531,0x556,0x561),
 
 
 	CASELACE(0x01a0,0x01a4),
 	CASELACE(0x01a0,0x01a4),
@@ -63,6 +63,10 @@ static const struct {
 	CASEMAP(0x1ff8,0x1ff9,0x1f78),
 	CASEMAP(0x1ff8,0x1ff9,0x1f78),
 	CASEMAP(0x1ffa,0x1ffb,0x1f7c),
 	CASEMAP(0x1ffa,0x1ffb,0x1f7c),
 
 
+	CASEMAP(0x13f0,0x13f5,0x13f8),
+	CASELACE(0xa698,0xa69a),
+	CASELACE(0xa796,0xa79e),
+
 	CASELACE(0x246,0x24e),
 	CASELACE(0x246,0x24e),
 	CASELACE(0x510,0x512),
 	CASELACE(0x510,0x512),
 	CASEMAP(0x2160,0x216f,0x2170),
 	CASEMAP(0x2160,0x216f,0x2170),
@@ -82,6 +86,8 @@ static const struct {
 	CASELACE(0xa790,0xa792),
 	CASELACE(0xa790,0xa792),
 	CASELACE(0xa7a0,0xa7a8),
 	CASELACE(0xa7a0,0xa7a8),
 
 
+	CASELACE(0xa7b4,0xa7b6),
+
 	CASEMAP(0xff21,0xff3a,0xff41),
 	CASEMAP(0xff21,0xff3a,0xff41),
 	{ 0,0,0 }
 	{ 0,0,0 }
 };
 };
@@ -216,6 +222,26 @@ static const unsigned short pairs[][2] = {
 	{ 0x395, 0x3f5 },
 	{ 0x395, 0x3f5 },
 	{ 0x3cf, 0x3d7 },
 	{ 0x3cf, 0x3d7 },
 
 
+	{ 0xa7ab, 0x25c },
+	{ 0xa7ac, 0x261 },
+	{ 0xa7ad, 0x26c },
+	{ 0xa7ae, 0x26a },
+	{ 0xa7b0, 0x29e },
+	{ 0xa7b1, 0x287 },
+	{ 0xa7b2, 0x29d },
+	{ 0xa7b3, 0xab53 },
+
+	/* special cyrillic lowercase forms */
+	{ 0x412, 0x1c80 },
+	{ 0x414, 0x1c81 },
+	{ 0x41e, 0x1c82 },
+	{ 0x421, 0x1c83 },
+	{ 0x422, 0x1c84 },
+	{ 0x422, 0x1c85 },
+	{ 0x42a, 0x1c86 },
+	{ 0x462, 0x1c87 },
+	{ 0xa64a, 0x1c88 },
+
 	{ 0,0 }
 	{ 0,0 }
 };
 };
 
 
@@ -229,7 +255,8 @@ static wchar_t __towcase(wchar_t wc, int lower)
 	if (!iswalpha(wc)
 	if (!iswalpha(wc)
 	 || (unsigned)wc - 0x0600 <= 0x0fff-0x0600
 	 || (unsigned)wc - 0x0600 <= 0x0fff-0x0600
 	 || (unsigned)wc - 0x2e00 <= 0xa63f-0x2e00
 	 || (unsigned)wc - 0x2e00 <= 0xa63f-0x2e00
-	 || (unsigned)wc - 0xa800 <= 0xfeff-0xa800)
+	 || (unsigned)wc - 0xa800 <= 0xab52-0xa800
+	 || (unsigned)wc - 0xabc0 <= 0xfeff-0xabc0)
 		return wc;
 		return wc;
 	/* special case because the diff between upper/lower is too big */
 	/* special case because the diff between upper/lower is too big */
 	if (lower && (unsigned)wc - 0x10a0 < 0x2e)
 	if (lower && (unsigned)wc - 0x10a0 < 0x2e)
@@ -238,6 +265,10 @@ static wchar_t __towcase(wchar_t wc, int lower)
 	if (!lower && (unsigned)wc - 0x2d00 < 0x26)
 	if (!lower && (unsigned)wc - 0x2d00 < 0x26)
 		if (wc>0x2d25 && wc != 0x2d27 && wc != 0x2d2d) return wc;
 		if (wc>0x2d25 && wc != 0x2d27 && wc != 0x2d2d) return wc;
 		else return wc + 0x10a0 - 0x2d00;
 		else return wc + 0x10a0 - 0x2d00;
+	if (lower && (unsigned)wc - 0x13a0 < 0x50)
+		return wc + 0xab70 - 0x13a0;
+	if (!lower && (unsigned)wc - 0xab70 < 0x50)
+		return wc + 0x13a0 - 0xab70;
 	for (i=0; casemaps[i].len; i++) {
 	for (i=0; casemaps[i].len; i++) {
 		int base = casemaps[i].upper + (lmask & casemaps[i].lower);
 		int base = casemaps[i].upper + (lmask & casemaps[i].lower);
 		if ((unsigned)wc-base < casemaps[i].len) {
 		if ((unsigned)wc-base < casemaps[i].len) {
@@ -252,6 +283,14 @@ static wchar_t __towcase(wchar_t wc, int lower)
 	}
 	}
 	if ((unsigned)wc - (0x10428 - 0x28*lower) < 0x28)
 	if ((unsigned)wc - (0x10428 - 0x28*lower) < 0x28)
 		return wc - 0x28 + 0x50*lower;
 		return wc - 0x28 + 0x50*lower;
+	if ((unsigned)wc - (0x104d8 - 0x28*lower) < 0x24)
+		return wc - 0x28 + 0x50*lower;
+	if ((unsigned)wc - (0x10cc0 - 0x40*lower) < 0x33)
+		return wc - 0x40 + 0x80*lower;
+	if ((unsigned)wc - (0x118c0 - 0x20*lower) < 0x20)
+		return wc - 0x20 + 0x40*lower;
+	if ((unsigned)wc - (0x1e922 - 0x22*lower) < 0x22)
+		return wc - 0x22 + 0x44*lower;
 	return wc;
 	return wc;
 }
 }