Browse Source

fix aliasing-based undefined behavior in mbsrtowcs

mbsrtowcs contains "vectorized" loops to quickly step over bytes
without the high bit set; these have undefined behavior by virtue of
aliasing uint32_t over top of char data for the accesses.

commit 4d0a82170a25464c39522d7190b9fe302045ddb2 fixed the
corresponding usage in string functions by using the may_alias
attribute conditional on __GNUC__ and disabled the vectorized code in
its absence. do the same for mbsrtowcs.
Rich Felker 5 years ago
parent
commit
716745e00e
1 changed files with 8 additions and 2 deletions
  1. 8 2
      src/multibyte/mbsrtowcs.c

+ 8 - 2
src/multibyte/mbsrtowcs.c

@@ -38,12 +38,15 @@ size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbs
 	}
 
 	if (!ws) for (;;) {
+#ifdef __GNUC__
+		typedef uint32_t __attribute__((__may_alias__)) w32;
 		if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
-			while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
+			while (!(( *(w32*)s | *(w32*)s-0x01010101) & 0x80808080)) {
 				s += 4;
 				wn -= 4;
 			}
 		}
+#endif
 		if (*s-1u < 0x7f) {
 			s++;
 			wn--;
@@ -69,8 +72,10 @@ resume0:
 			*src = (const void *)s;
 			return wn0;
 		}
+#ifdef __GNUC__
+		typedef uint32_t __attribute__((__may_alias__)) w32;
 		if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) {
-			while (wn>=5 && !(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) {
+			while (wn>=5 && !(( *(w32*)s | *(w32*)s-0x01010101) & 0x80808080)) {
 				*ws++ = *s++;
 				*ws++ = *s++;
 				*ws++ = *s++;
@@ -78,6 +83,7 @@ resume0:
 				wn -= 4;
 			}
 		}
+#endif
 		if (*s-1u < 0x7f) {
 			*ws++ = *s++;
 			wn--;