__scanf.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <stdarg.h>
  4. #include <ctype.h>
  5. #include <wchar.h>
  6. #include <wctype.h>
  7. #include <limits.h>
  8. #include <string.h>
  9. #include <errno.h>
  10. #include <math.h>
  11. #include <float.h>
  12. #include "__scanf.h"
  13. static int read(rctx_t *r)
  14. {
  15. if (--r->w < 0) return r->w = -1;
  16. if (r->u) r->u = 0;
  17. else r->read(r);
  18. return r->c;
  19. }
  20. static void unread(rctx_t *r)
  21. {
  22. if (r->c < 0 || r->w < 0) return;
  23. r->w++;
  24. r->u = 1;
  25. }
  26. #define SIZE_hh -2
  27. #define SIZE_h -1
  28. #define SIZE_def 0
  29. #define SIZE_l 1
  30. #define SIZE_ll 2
  31. #define SIZE_L 3
  32. static void store_int(void *dest, int size, int neg, unsigned long long i)
  33. {
  34. if (!dest) return;
  35. if (neg) i = -i;
  36. switch (size) {
  37. case SIZE_hh:
  38. *(char *)dest = i;
  39. break;
  40. case SIZE_h:
  41. *(short *)dest = i;
  42. break;
  43. case SIZE_def:
  44. *(int *)dest = i;
  45. break;
  46. case SIZE_l:
  47. *(long *)dest = i;
  48. break;
  49. case SIZE_ll:
  50. *(long long *)dest = i;
  51. break;
  52. }
  53. }
  54. static void *arg_n(va_list ap, unsigned int n)
  55. {
  56. void *p;
  57. unsigned int i;
  58. va_list ap2;
  59. va_copy(ap2, ap);
  60. for (i=n; i>1; i--) va_arg(ap2, void *);
  61. p = va_arg(ap2, void *);
  62. va_end(ap2);
  63. return p;
  64. }
  65. int __scanf(rctx_t *r, const wchar_t *fmt, va_list ap)
  66. {
  67. int mode=0;
  68. int width;
  69. int size;
  70. const wchar_t *p, *z;
  71. int c, l, t, m;
  72. long long dummy;
  73. char *s;
  74. wchar_t *wcs;
  75. mbstate_t st;
  76. int wide = r->wide;
  77. void *dest=NULL;
  78. int invert;
  79. unsigned long long i=0;
  80. int neg=0;
  81. int matches=0;
  82. long double f;
  83. int (*is_space)(int) = r->is_space;
  84. for (p=fmt; *p; ) {
  85. if (is_space(*p)) {
  86. do p++; while (is_space(*p));
  87. do r->w=1; while (is_space(read(r)));
  88. unread(r);
  89. continue;
  90. } else if (*p != '%' || p[1] == '%') {
  91. if (*p == '%') p++;
  92. r->w = 1;
  93. if ((c = read(r)) < 0)
  94. goto input_fail;
  95. if (*p++ != c)
  96. goto match_fail;
  97. continue;
  98. }
  99. p++;
  100. if (mode != 1) {
  101. for (z=p; isdigit(*z); z++);
  102. if (*z != '$' && *z != '*') {
  103. if (mode == 0) mode = 1;
  104. else goto fmt_fail;
  105. } else if (*z != '*') {
  106. int pos = 0;
  107. mode = 2;
  108. for (; p<z; p++) {
  109. pos = 10*pos + *p - '0';
  110. }
  111. p++;
  112. if (!pos) goto fmt_fail;
  113. dest = arg_n(ap, pos);
  114. }
  115. }
  116. if (*p == '*') {
  117. dest = NULL;
  118. p++;
  119. } else if (mode == 1) {
  120. dest = va_arg(ap, void *);
  121. }
  122. if (!*p) goto fmt_fail;
  123. width = 0;
  124. for (; isdigit(*p); p++) {
  125. width = 10*width + *p - '0';
  126. }
  127. size = 0;
  128. switch (*p++) {
  129. case 0:
  130. goto fmt_fail;
  131. case 'h':
  132. if (*p == 'h') p++, size = SIZE_hh;
  133. else size = SIZE_h;
  134. break;
  135. case 'l':
  136. if (*p == 'l') p++, size = SIZE_ll;
  137. else size = SIZE_l;
  138. break;
  139. case 'j':
  140. size = SIZE_ll;
  141. break;
  142. case 'z':
  143. case 't':
  144. size = SIZE_l;
  145. break;
  146. case 'L':
  147. size = SIZE_L;
  148. break;
  149. case 'd': case 'i': case 'o': case 'u': case 'x':
  150. case 'a': case 'e': case 'f': case 'g':
  151. case 'A': case 'E': case 'F': case 'G': case 'X':
  152. case 's': case 'c': case '[':
  153. case 'S': case 'C':
  154. case 'p': case 'n':
  155. p--;
  156. break;
  157. default:
  158. goto fmt_fail;
  159. }
  160. t = *p++;
  161. switch (t) {
  162. case 'C':
  163. case 'c':
  164. if (width < 1) width = 1;
  165. case 's':
  166. if (size == SIZE_l) t &= ~0x20;
  167. case 'd': case 'i': case 'o': case 'u': case 'x':
  168. case 'a': case 'e': case 'f': case 'g':
  169. case 'A': case 'E': case 'F': case 'G': case 'X':
  170. case '[': case 'S':
  171. case 'p': case 'n':
  172. if (width < 1) width = INT_MAX;
  173. break;
  174. default:
  175. goto fmt_fail;
  176. }
  177. r->w = width;
  178. if (t != 'n') {
  179. if (read(r) < 0) goto input_fail;
  180. unread(r);
  181. }
  182. switch (t) {
  183. case 'n':
  184. store_int(dest, size, 0, r->l - r->u);
  185. /* do not increment match count, etc! */
  186. continue;
  187. case 'C':
  188. wcs = dest ? dest : (void *)&dummy;
  189. st = (mbstate_t){ 0 };
  190. while ((c=read(r)) >= 0) {
  191. if (wide) {
  192. if (dest) *wcs++ = c;
  193. } else {
  194. char ch = c;
  195. switch (mbrtowc(wcs, &ch, 1, &st)) {
  196. case -1:
  197. goto enc_fail;
  198. case -2:
  199. break;
  200. default:
  201. if (dest) wcs++;
  202. }
  203. }
  204. }
  205. if (r->w > 0) goto match_fail;
  206. break;
  207. case 'c':
  208. s = dest ? dest : (void *)&dummy;
  209. while ((c=read(r)) >= 0) {
  210. if (wide) {
  211. if ((l=wctomb(s, c)) < 0)
  212. goto enc_fail;
  213. if (dest) s += l;
  214. } else {
  215. if (dest) *s++ = c;
  216. }
  217. }
  218. if (r->w > 0) goto match_fail;
  219. break;
  220. case '[':
  221. wcs = dest ? dest : (void *)&dummy;
  222. s = dest ? dest : (void *)&dummy;
  223. if (!wide && size == SIZE_l) st = (mbstate_t){ 0 };
  224. if (*p == '^') p++, invert = 1;
  225. else invert = 0;
  226. if (wide) {
  227. for (m=0; (c=read(r)) >= 0; m=1) {
  228. for (z=p; *z && *z != c && (*z != ']' || z==p); z++);
  229. if (!*z) goto fmt_fail;
  230. if (*z == c && (*z != ']' || z==p)) {
  231. if (invert) break;
  232. } else {
  233. if (!invert) break;
  234. }
  235. if (size == SIZE_l) {
  236. if (dest) *wcs++ = c;
  237. } else {
  238. if ((l=wctomb(s, c)) < 0)
  239. goto enc_fail;
  240. if (dest) s += l;
  241. }
  242. }
  243. for (p++; *p && *p != ']'; p++);
  244. p++;
  245. } else {
  246. unsigned char scanset[257];
  247. memset(scanset, invert, sizeof scanset);
  248. scanset[0] = 0;
  249. for (z=p; *z && (*z != ']' || z==p); z++)
  250. scanset[1+*z] = 1-invert;
  251. if (!*z) goto fmt_fail;
  252. p=z+1;
  253. c=0;
  254. for (m=0; scanset[(c=read(r))+1]; m=1) {
  255. if (size == SIZE_l) {
  256. char ch = c;
  257. switch (mbrtowc(wcs, &ch, 1, &st)) {
  258. case -1:
  259. goto enc_fail;
  260. case -2:
  261. break;
  262. default:
  263. if (dest) wcs++;
  264. }
  265. } else {
  266. if (dest) *s++ = c;
  267. }
  268. }
  269. }
  270. if (!m) goto match_fail;
  271. if (dest) {
  272. if (size == SIZE_l) *wcs++ = 0;
  273. else *s++ = 0;
  274. }
  275. break;
  276. default:
  277. /* read unlimited number of spaces, then reset width */
  278. do r->w = 1; while (is_space(c = read(r)));
  279. if (c < 0) goto input_fail;
  280. unread(r);
  281. r->w = width;
  282. }
  283. switch (t) {
  284. case 'p':
  285. case 'X':
  286. t = 'x';
  287. case 'd':
  288. case 'i':
  289. case 'o':
  290. case 'u':
  291. case 'x':
  292. i = m = neg = 0;
  293. if ((c=read(r)) == '-') neg=1;
  294. else if (c != '+') unread(r);
  295. switch (t) {
  296. case 'i':
  297. case 'x':
  298. if ((c=read(r)) != '0') {
  299. if (t == 'i') t = 'd';
  300. unread(r);
  301. break;
  302. }
  303. if (((c=read(r))|0x20) != 'x') {
  304. if (t == 'i') {
  305. t = 'o';
  306. /* lone 0 is valid octal */
  307. if ((unsigned)(c-'0') >= 8) {
  308. m = 1;
  309. goto int_finish;
  310. }
  311. }
  312. unread(r);
  313. break;
  314. }
  315. t = 'x';
  316. }
  317. }
  318. switch (t) {
  319. case 'd':
  320. case 'u':
  321. for (m=0; isdigit(c=read(r)); m=1)
  322. i = 10*i + c-'0';
  323. goto int_finish;
  324. case 'o':
  325. for (m=0; (unsigned)(c=read(r))-'0' < 8; m=1)
  326. i = (i<<3) + c-'0';
  327. goto int_finish;
  328. case 'x':
  329. for (m=0; ; m=1) {
  330. if (isdigit(c=read(r))) {
  331. i = (i<<4) + c-'0';
  332. } else if ((unsigned)(c|0x20)-'a' < 6) {
  333. i = (i<<4) + (c|0x20)-'a'+10;
  334. } else break;
  335. }
  336. int_finish:
  337. if (!m) goto match_fail;
  338. store_int(dest, size, neg, i);
  339. break;
  340. case 'a':
  341. case 'e':
  342. case 'f':
  343. case 'g':
  344. f = 0.0;
  345. neg = m = 0;
  346. if ((c=read(r)) == '-') neg=1;
  347. else if (c != '+') unread(r);
  348. /* FIXME: check for INF/NAN strings here */
  349. if (read(r)=='0' && (m=1, (read(r)|0x20) == 'x'))
  350. goto hexfloat;
  351. else unread(r);
  352. for (; isdigit(c=read(r)); m=1)
  353. f = 10.0 * f + (c-'0');
  354. if (c=='.') {
  355. double mag = 10.0;
  356. for (; isdigit(c=read(r)); mag*=10.0)
  357. f += (c-'0')/mag;
  358. }
  359. if ((c|0x20)=='e') {
  360. int ex=0, en=0;
  361. m = 0;
  362. if ((c=read(r))=='-') en=1;
  363. else if (c!='+') unread(r);
  364. for (; isdigit(c=read(r)); m=1)
  365. if (ex < LDBL_MAX_10_EXP)
  366. ex = 10 * ex + (c-'0');
  367. if (ex > LDBL_MAX_10_EXP)
  368. f = en ? 0 : INFINITY;
  369. else {
  370. if (en) while (ex--) f/=10.0;
  371. else while (ex--) f*=10.0;
  372. }
  373. }
  374. goto writefloat;
  375. hexfloat:
  376. m = 0;
  377. for (; isxdigit(c=read(r)); m=1)
  378. if (isdigit(c)) f = 16.0*f + (c-'0');
  379. else f = 16.0*f + ((c|32)-'a'+10);
  380. if (c=='.') {
  381. double mag = 1/16.0;
  382. for (; isxdigit(c=read(r)); mag*=1/16.0)
  383. if (isdigit(c)) f += (c-'0')*mag;
  384. else f += ((c|32)-'a'+10)*mag;
  385. }
  386. if ((c|0x20)=='p') {
  387. int ex=0, en=0;
  388. m = 0;
  389. if ((c=read(r))=='-') en=1;
  390. else if (c!='+') unread(r);
  391. for (; isdigit(c=read(r)); m=1)
  392. if (ex < LDBL_MAX_EXP)
  393. ex = 10 * ex + (c-'0');
  394. if (ex > LDBL_MAX_EXP)
  395. f = en ? 0 : INFINITY;
  396. else {
  397. if (en) while (ex--) f*=0.5;
  398. else while (ex--) f*=2.0;
  399. }
  400. }
  401. writefloat:
  402. if (!m) goto match_fail;
  403. if (neg) f *= -1.0;
  404. if (dest) switch (size) {
  405. case SIZE_def:
  406. *(float *)dest = f;
  407. break;
  408. case SIZE_l:
  409. *(double *)dest = f;
  410. break;
  411. case SIZE_L:
  412. *(long double *)dest = f;
  413. break;
  414. }
  415. break;
  416. case 'S':
  417. wcs = dest ? dest : (void *)&dummy;
  418. st = (mbstate_t){ 0 };
  419. while((c=read(r)) >= 0) {
  420. if (wide) {
  421. if (is_space(c)) break;
  422. if (dest) *wcs++ = c;
  423. } else {
  424. char ch = c;
  425. if (is_space(c)) break;
  426. switch (mbrtowc(wcs, &ch, 1, &st)) {
  427. case -1:
  428. goto enc_fail;
  429. case -2:
  430. break;
  431. default:
  432. if (dest) wcs++;
  433. }
  434. }
  435. }
  436. if (dest) *wcs++ = 0;
  437. break;
  438. case 's':
  439. s = dest ? dest : (void *)&dummy;
  440. while((c=read(r)) >= 0) {
  441. if (wide) {
  442. if (is_space(c)) break;
  443. if ((l=wctomb(s, c)) < 0)
  444. goto enc_fail;
  445. if (dest) s += l;
  446. } else {
  447. if (is_space(c)) break;
  448. if (dest) *s++ = c;
  449. }
  450. }
  451. if (dest) *s++ = 0;
  452. break;
  453. }
  454. /* unread will do nothing if field width was exhausted */
  455. unread(r);
  456. if (dest) matches++;
  457. }
  458. return matches;
  459. enc_fail:
  460. errno = EILSEQ;
  461. fmt_fail:
  462. input_fail:
  463. if (!matches) matches--;
  464. match_fail:
  465. unread(r);
  466. return matches;
  467. }