1
0

vfwscanf.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <stdarg.h>
  4. #include <ctype.h>
  5. #include <wchar.h>
  6. #include <wctype.h>
  7. #include <limits.h>
  8. #include <string.h>
  9. #include "stdio_impl.h"
  10. #include "shgetc.h"
  11. #include "intscan.h"
  12. #include "floatscan.h"
  13. #include "libc.h"
  14. #define SIZE_hh -2
  15. #define SIZE_h -1
  16. #define SIZE_def 0
  17. #define SIZE_l 1
  18. #define SIZE_L 2
  19. #define SIZE_ll 3
  20. static void store_int(void *dest, int size, unsigned long long i)
  21. {
  22. if (!dest) return;
  23. switch (size) {
  24. case SIZE_hh:
  25. *(char *)dest = i;
  26. break;
  27. case SIZE_h:
  28. *(short *)dest = i;
  29. break;
  30. case SIZE_def:
  31. *(int *)dest = i;
  32. break;
  33. case SIZE_l:
  34. *(long *)dest = i;
  35. break;
  36. case SIZE_ll:
  37. *(long long *)dest = i;
  38. break;
  39. }
  40. }
  41. static void *arg_n(va_list ap, unsigned int n)
  42. {
  43. void *p;
  44. unsigned int i;
  45. va_list ap2;
  46. va_copy(ap2, ap);
  47. for (i=n; i>1; i--) va_arg(ap2, void *);
  48. p = va_arg(ap2, void *);
  49. va_end(ap2);
  50. return p;
  51. }
  52. static int in_set(const wchar_t *set, int c)
  53. {
  54. int j;
  55. const wchar_t *p = set;
  56. if (*p == '-') {
  57. if (c=='-') return 1;
  58. p++;
  59. } else if (*p == ']') {
  60. if (c==']') return 1;
  61. p++;
  62. }
  63. for (; *p && *p != ']'; p++) {
  64. if (*p=='-' && p[1] && p[1] != ']')
  65. for (j=p++[-1]; j<*p; j++)
  66. if (c==j) return 1;
  67. if (c==*p) return 1;
  68. }
  69. return 0;
  70. }
  71. #if 1
  72. #undef getwc
  73. #define getwc(f) \
  74. ((f)->rpos < (f)->rend && *(f)->rpos < 128 ? *(f)->rpos++ : (getwc)(f))
  75. #undef ungetwc
  76. #define ungetwc(c,f) \
  77. ((f)->rend && (c)<128U ? *--(f)->rpos : ungetwc((c),(f)))
  78. #endif
  79. int vfwscanf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
  80. {
  81. int width;
  82. int size;
  83. int alloc;
  84. const wchar_t *p;
  85. int c, t;
  86. char *s;
  87. wchar_t *wcs;
  88. void *dest=NULL;
  89. int invert;
  90. int matches=0;
  91. off_t pos = 0, cnt;
  92. static const char size_pfx[][3] = { "hh", "h", "", "l", "L", "ll" };
  93. char tmp[3*sizeof(int)+10];
  94. const wchar_t *set;
  95. size_t i, k;
  96. FLOCK(f);
  97. f->mode |= f->mode+1;
  98. for (p=fmt; *p; p++) {
  99. alloc = 0;
  100. if (iswspace(*p)) {
  101. while (iswspace(p[1])) p++;
  102. while (iswspace((c=getwc(f)))) pos++;
  103. ungetwc(c, f);
  104. continue;
  105. }
  106. if (*p != '%' || p[1] == '%') {
  107. p += *p=='%';
  108. c = getwc(f);
  109. if (c!=*p) {
  110. ungetwc(c, f);
  111. if (c<0) goto input_fail;
  112. goto match_fail;
  113. }
  114. pos++;
  115. continue;
  116. }
  117. p++;
  118. if (*p=='*') {
  119. dest = 0; p++;
  120. } else if (iswdigit(*p) && p[1]=='$') {
  121. dest = arg_n(ap, *p-'0'); p+=2;
  122. } else {
  123. dest = va_arg(ap, void *);
  124. }
  125. for (width=0; iswdigit(*p); p++) {
  126. width = 10*width + *p - '0';
  127. }
  128. if (*p=='m') {
  129. wcs = 0;
  130. s = 0;
  131. alloc = !!dest;
  132. p++;
  133. } else {
  134. alloc = 0;
  135. }
  136. size = SIZE_def;
  137. switch (*p++) {
  138. case 'h':
  139. if (*p == 'h') p++, size = SIZE_hh;
  140. else size = SIZE_h;
  141. break;
  142. case 'l':
  143. if (*p == 'l') p++, size = SIZE_ll;
  144. else size = SIZE_l;
  145. break;
  146. case 'j':
  147. size = SIZE_ll;
  148. break;
  149. case 'z':
  150. case 't':
  151. size = SIZE_l;
  152. break;
  153. case 'L':
  154. size = SIZE_L;
  155. break;
  156. case 'd': case 'i': case 'o': case 'u': case 'x':
  157. case 'a': case 'e': case 'f': case 'g':
  158. case 'A': case 'E': case 'F': case 'G': case 'X':
  159. case 's': case 'c': case '[':
  160. case 'S': case 'C':
  161. case 'p': case 'n':
  162. p--;
  163. break;
  164. default:
  165. goto fmt_fail;
  166. }
  167. t = *p;
  168. /* Transform S,C -> ls,lc */
  169. if ((t&0x2f)==3) {
  170. size = SIZE_l;
  171. t |= 32;
  172. }
  173. if (t != 'n') {
  174. if (t != '[' && (t|32) != 'c')
  175. while (iswspace((c=getwc(f)))) pos++;
  176. else
  177. c=getwc(f);
  178. if (c < 0) goto input_fail;
  179. ungetwc(c, f);
  180. }
  181. switch (t) {
  182. case 'n':
  183. store_int(dest, size, pos);
  184. /* do not increment match count, etc! */
  185. continue;
  186. case 's':
  187. case 'c':
  188. case '[':
  189. if (t == 'c') {
  190. if (width<1) width = 1;
  191. invert = 1;
  192. set = L"";
  193. } else if (t == 's') {
  194. invert = 1;
  195. set = (const wchar_t[]){
  196. ' ', '\t', '\n', '\r', 11, 12, 0x0085,
  197. 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
  198. 0x2006, 0x2008, 0x2009, 0x200a,
  199. 0x2028, 0x2029, 0x205f, 0x3000, 0 };
  200. } else {
  201. if (*++p == '^') p++, invert = 1;
  202. else invert = 0;
  203. set = p;
  204. if (*p==']') p++;
  205. while (*p!=']') {
  206. if (!*p) goto fmt_fail;
  207. p++;
  208. }
  209. }
  210. s = (size == SIZE_def) ? dest : 0;
  211. wcs = (size == SIZE_l) ? dest : 0;
  212. int gotmatch = 0;
  213. if (width < 1) width = -1;
  214. i = 0;
  215. if (alloc) {
  216. k = t=='c' ? width+1U : 31;
  217. if (size == SIZE_l) {
  218. wcs = malloc(k*sizeof(wchar_t));
  219. if (!wcs) goto alloc_fail;
  220. } else {
  221. s = malloc(k);
  222. if (!s) goto alloc_fail;
  223. }
  224. }
  225. while (width) {
  226. if ((c=getwc(f))<0) break;
  227. if (in_set(set, c) == invert)
  228. break;
  229. if (wcs) {
  230. wcs[i++] = c;
  231. if (alloc && i==k) {
  232. k += k+1;
  233. wchar_t *tmp = realloc(wcs, k*sizeof(wchar_t));
  234. if (!tmp) goto alloc_fail;
  235. wcs = tmp;
  236. }
  237. } else if (size != SIZE_l) {
  238. int l = wctomb(s?s+i:tmp, c);
  239. if (l<0) goto input_fail;
  240. i += l;
  241. if (alloc && i > k-4) {
  242. k += k+1;
  243. char *tmp = realloc(s, k);
  244. if (!tmp) goto alloc_fail;
  245. s = tmp;
  246. }
  247. }
  248. pos++;
  249. width-=(width>0);
  250. gotmatch=1;
  251. }
  252. if (width) {
  253. ungetwc(c, f);
  254. if (t == 'c' || !gotmatch) goto match_fail;
  255. }
  256. if (alloc) {
  257. if (size == SIZE_l) *(wchar_t **)dest = wcs;
  258. else *(char **)dest = s;
  259. }
  260. if (t != 'c') {
  261. if (wcs) wcs[i] = 0;
  262. if (s) s[i] = 0;
  263. }
  264. break;
  265. case 'd': case 'i': case 'o': case 'u': case 'x':
  266. case 'a': case 'e': case 'f': case 'g':
  267. case 'A': case 'E': case 'F': case 'G': case 'X':
  268. case 'p':
  269. if (width < 1) width = 0;
  270. snprintf(tmp, sizeof tmp, "%.*s%.0d%s%c%%lln",
  271. 1+!dest, "%*", width, size_pfx[size+2], t);
  272. cnt = 0;
  273. if (fscanf(f, tmp, dest?dest:&cnt, &cnt) == -1)
  274. goto input_fail;
  275. else if (!cnt)
  276. goto match_fail;
  277. pos += cnt;
  278. break;
  279. default:
  280. goto fmt_fail;
  281. }
  282. if (dest) matches++;
  283. }
  284. if (0) {
  285. fmt_fail:
  286. alloc_fail:
  287. input_fail:
  288. if (!matches) matches--;
  289. match_fail:
  290. if (alloc) {
  291. free(s);
  292. free(wcs);
  293. }
  294. }
  295. FUNLOCK(f);
  296. return matches;
  297. }
  298. weak_alias(vfwscanf,__isoc99_vfwscanf);