1
0

vfwscanf.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <stdarg.h>
  4. #include <ctype.h>
  5. #include <wchar.h>
  6. #include <wctype.h>
  7. #include <limits.h>
  8. #include <string.h>
  9. #include "stdio_impl.h"
  10. #include "shgetc.h"
  11. #include "intscan.h"
  12. #include "floatscan.h"
  13. #include "libc.h"
  14. #define SIZE_hh -2
  15. #define SIZE_h -1
  16. #define SIZE_def 0
  17. #define SIZE_l 1
  18. #define SIZE_L 2
  19. #define SIZE_ll 3
  20. static void store_int(void *dest, int size, unsigned long long i)
  21. {
  22. if (!dest) return;
  23. switch (size) {
  24. case SIZE_hh:
  25. *(char *)dest = i;
  26. break;
  27. case SIZE_h:
  28. *(short *)dest = i;
  29. break;
  30. case SIZE_def:
  31. *(int *)dest = i;
  32. break;
  33. case SIZE_l:
  34. *(long *)dest = i;
  35. break;
  36. case SIZE_ll:
  37. *(long long *)dest = i;
  38. break;
  39. }
  40. }
  41. static void *arg_n(va_list ap, unsigned int n)
  42. {
  43. void *p;
  44. unsigned int i;
  45. va_list ap2;
  46. va_copy(ap2, ap);
  47. for (i=n; i>1; i--) va_arg(ap2, void *);
  48. p = va_arg(ap2, void *);
  49. va_end(ap2);
  50. return p;
  51. }
  52. static int in_set(const wchar_t *set, int c)
  53. {
  54. int j;
  55. const wchar_t *p = set;
  56. if (*p == '-') {
  57. if (c=='-') return 1;
  58. p++;
  59. } else if (*p == ']') {
  60. if (c==']') return 1;
  61. p++;
  62. }
  63. for (; *p && *p != ']'; p++) {
  64. if (*p=='-' && p[1] && p[1] != ']')
  65. for (j=p++[-1]; j<*p; j++)
  66. if (c==j) return 1;
  67. if (c==*p) return 1;
  68. }
  69. return 0;
  70. }
  71. #if 1
  72. #undef getwc
  73. #define getwc(f) \
  74. ((f)->rpos < (f)->rend && *(f)->rpos < 128 ? *(f)->rpos++ : (getwc)(f))
  75. #undef ungetwc
  76. #define ungetwc(c,f) \
  77. ((f)->rend && (c)<128U ? *--(f)->rpos : ungetwc((c),(f)))
  78. #endif
  79. int vfwscanf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
  80. {
  81. int width;
  82. int size;
  83. int alloc;
  84. const wchar_t *p;
  85. int c, t;
  86. char *s;
  87. wchar_t *wcs;
  88. void *dest=NULL;
  89. int invert;
  90. int matches=0;
  91. off_t pos = 0, cnt;
  92. static const char size_pfx[][3] = { "hh", "h", "", "l", "L", "ll" };
  93. char tmp[3*sizeof(int)+10];
  94. const wchar_t *set;
  95. size_t i, k;
  96. FLOCK(f);
  97. for (p=fmt; *p; p++) {
  98. alloc = 0;
  99. if (iswspace(*p)) {
  100. while (iswspace(p[1])) p++;
  101. while (iswspace((c=getwc(f)))) pos++;
  102. ungetwc(c, f);
  103. continue;
  104. }
  105. if (*p != '%' || p[1] == '%') {
  106. p += *p=='%';
  107. c = getwc(f);
  108. if (c!=*p) {
  109. ungetwc(c, f);
  110. if (c<0) goto input_fail;
  111. goto match_fail;
  112. }
  113. pos++;
  114. continue;
  115. }
  116. p++;
  117. if (*p=='*') {
  118. dest = 0; p++;
  119. } else if (iswdigit(*p) && p[1]=='$') {
  120. dest = arg_n(ap, *p-'0'); p+=2;
  121. } else {
  122. dest = va_arg(ap, void *);
  123. }
  124. for (width=0; iswdigit(*p); p++) {
  125. width = 10*width + *p - '0';
  126. }
  127. if (*p=='m') {
  128. wcs = 0;
  129. s = 0;
  130. alloc = !!dest;
  131. p++;
  132. } else {
  133. alloc = 0;
  134. }
  135. size = SIZE_def;
  136. switch (*p++) {
  137. case 'h':
  138. if (*p == 'h') p++, size = SIZE_hh;
  139. else size = SIZE_h;
  140. break;
  141. case 'l':
  142. if (*p == 'l') p++, size = SIZE_ll;
  143. else size = SIZE_l;
  144. break;
  145. case 'j':
  146. size = SIZE_ll;
  147. break;
  148. case 'z':
  149. case 't':
  150. size = SIZE_l;
  151. break;
  152. case 'L':
  153. size = SIZE_L;
  154. break;
  155. case 'd': case 'i': case 'o': case 'u': case 'x':
  156. case 'a': case 'e': case 'f': case 'g':
  157. case 'A': case 'E': case 'F': case 'G': case 'X':
  158. case 's': case 'c': case '[':
  159. case 'S': case 'C':
  160. case 'p': case 'n':
  161. p--;
  162. break;
  163. default:
  164. goto fmt_fail;
  165. }
  166. t = *p;
  167. /* Transform S,C -> ls,lc */
  168. if ((t&0x2f)==3) {
  169. size = SIZE_l;
  170. t |= 32;
  171. }
  172. if (t != 'n') {
  173. if (t != '[' && (t|32) != 'c')
  174. while (iswspace((c=getwc(f)))) pos++;
  175. else
  176. c=getwc(f);
  177. if (c < 0) goto input_fail;
  178. ungetwc(c, f);
  179. }
  180. switch (t) {
  181. case 'n':
  182. store_int(dest, size, pos);
  183. /* do not increment match count, etc! */
  184. continue;
  185. case 's':
  186. case 'c':
  187. case '[':
  188. if (t == 'c') {
  189. if (width<1) width = 1;
  190. invert = 1;
  191. set = L"";
  192. } else if (t == 's') {
  193. invert = 1;
  194. set = (const wchar_t[]){
  195. ' ', '\t', '\n', '\r', 11, 12, 0x0085,
  196. 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
  197. 0x2006, 0x2008, 0x2009, 0x200a,
  198. 0x2028, 0x2029, 0x205f, 0x3000, 0 };
  199. } else {
  200. if (*++p == '^') p++, invert = 1;
  201. else invert = 0;
  202. set = p;
  203. if (*p==']') p++;
  204. while (*p!=']') {
  205. if (!*p) goto fmt_fail;
  206. p++;
  207. }
  208. }
  209. s = (size == SIZE_def) ? dest : 0;
  210. wcs = (size == SIZE_l) ? dest : 0;
  211. int gotmatch = 0;
  212. if (width < 1) width = -1;
  213. i = 0;
  214. if (alloc) {
  215. k = t=='c' ? width+1U : 31;
  216. if (size == SIZE_l) {
  217. wcs = malloc(k*sizeof(wchar_t));
  218. if (!wcs) goto alloc_fail;
  219. } else {
  220. s = malloc(k);
  221. if (!s) goto alloc_fail;
  222. }
  223. }
  224. while (width) {
  225. if ((c=getwc(f))<0) break;
  226. if (in_set(set, c) == invert)
  227. break;
  228. if (wcs) {
  229. wcs[i++] = c;
  230. if (alloc && i==k) {
  231. k += k+1;
  232. wchar_t *tmp = realloc(wcs, k*sizeof(wchar_t));
  233. if (!tmp) goto alloc_fail;
  234. wcs = tmp;
  235. }
  236. } else if (size != SIZE_l) {
  237. int l = wctomb(s?s+i:tmp, c);
  238. if (l<0) goto input_fail;
  239. i += l;
  240. if (alloc && i > k-4) {
  241. k += k+1;
  242. char *tmp = realloc(s, k);
  243. if (!tmp) goto alloc_fail;
  244. s = tmp;
  245. }
  246. }
  247. pos++;
  248. width-=(width>0);
  249. gotmatch=1;
  250. }
  251. if (width) {
  252. ungetwc(c, f);
  253. if (t == 'c' || !gotmatch) goto match_fail;
  254. }
  255. if (alloc) {
  256. if (size == SIZE_l) *(wchar_t **)dest = wcs;
  257. else *(char **)dest = s;
  258. }
  259. if (t != 'c') {
  260. if (wcs) wcs[i] = 0;
  261. if (s) s[i] = 0;
  262. }
  263. break;
  264. case 'd': case 'i': case 'o': case 'u': case 'x':
  265. case 'a': case 'e': case 'f': case 'g':
  266. case 'A': case 'E': case 'F': case 'G': case 'X':
  267. case 'p':
  268. if (width < 1) width = 0;
  269. snprintf(tmp, sizeof tmp, "%.*s%.0d%s%c%%lln",
  270. 1+!dest, "%*", width, size_pfx[size+2], t);
  271. cnt = 0;
  272. if (fscanf(f, tmp, dest?dest:&cnt, &cnt) == -1)
  273. goto input_fail;
  274. else if (!cnt)
  275. goto match_fail;
  276. pos += cnt;
  277. break;
  278. default:
  279. goto fmt_fail;
  280. }
  281. if (dest) matches++;
  282. }
  283. if (0) {
  284. fmt_fail:
  285. alloc_fail:
  286. input_fail:
  287. if (!matches) matches--;
  288. match_fail:
  289. if (alloc) {
  290. free(s);
  291. free(wcs);
  292. }
  293. }
  294. FUNLOCK(f);
  295. return matches;
  296. }
  297. weak_alias(vfwscanf,__isoc99_vfwscanf);