__scanf.c 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <stdarg.h>
  4. #include <ctype.h>
  5. #include <wchar.h>
  6. #include <wctype.h>
  7. #include <limits.h>
  8. #include <string.h>
  9. #include <errno.h>
  10. #include <math.h>
  11. #include <float.h>
  12. #include "__scanf.h"
  13. static int read(rctx_t *r)
  14. {
  15. if (--r->w < 0) return r->w = -1;
  16. if (r->u) r->u = 0;
  17. else r->read(r);
  18. return r->c;
  19. }
  20. static void unread(rctx_t *r)
  21. {
  22. //if (r->u || r->w < 0) return;
  23. if (r->w < 0) return;
  24. r->w++;
  25. r->u = 1;
  26. }
  27. #define SIZE_hh -2
  28. #define SIZE_h -1
  29. #define SIZE_def 0
  30. #define SIZE_l 1
  31. #define SIZE_ll 2
  32. #define SIZE_L 3
  33. static void store_int(void *dest, int size, int neg, unsigned long long i)
  34. {
  35. if (!dest) return;
  36. if (neg) i = -i;
  37. switch (size) {
  38. case SIZE_hh:
  39. *(char *)dest = i;
  40. break;
  41. case SIZE_h:
  42. *(short *)dest = i;
  43. break;
  44. case SIZE_def:
  45. *(int *)dest = i;
  46. break;
  47. case SIZE_l:
  48. *(long *)dest = i;
  49. break;
  50. case SIZE_ll:
  51. *(long long *)dest = i;
  52. break;
  53. }
  54. }
  55. static void *arg_n(va_list ap, unsigned int n)
  56. {
  57. void *p;
  58. unsigned int i;
  59. va_list ap2;
  60. va_copy(ap2, ap);
  61. for (i=n; i>1; i--) va_arg(ap2, void *);
  62. p = va_arg(ap2, void *);
  63. va_end(ap2);
  64. return p;
  65. }
  66. int __scanf(rctx_t *r, const wchar_t *fmt, va_list ap)
  67. {
  68. int mode=0;
  69. int width;
  70. int size;
  71. const wchar_t *p, *z;
  72. int c, l, t, m;
  73. long long dummy;
  74. char *s;
  75. wchar_t *wcs;
  76. mbstate_t st;
  77. int wide = r->wide;
  78. void *dest=NULL;
  79. int invert;
  80. unsigned long long i=0;
  81. int neg=0;
  82. int matches=0;
  83. long double f;
  84. int (*is_space)(int) = r->is_space;
  85. for (p=fmt; *p; ) {
  86. if (is_space(*p)) {
  87. do p++; while (is_space(*p));
  88. do r->w=1; while (is_space(read(r)));
  89. unread(r);
  90. continue;
  91. } else if (*p != '%' || p[1] == '%') {
  92. if (*p == '%') p++;
  93. r->w = 1;
  94. if (*p++ != read(r))
  95. goto match_fail;
  96. continue;
  97. }
  98. p++;
  99. if (mode != 1) {
  100. for (z=p; isdigit(*z); z++);
  101. if (*z != '$' && *z != '*') {
  102. if (mode == 0) mode = 1;
  103. else goto fmt_fail;
  104. } else if (*z != '*') {
  105. int pos = 0;
  106. mode = 2;
  107. for (; p<z; p++) {
  108. pos = 10*pos + *p - '0';
  109. }
  110. p++;
  111. if (!pos) goto fmt_fail;
  112. dest = arg_n(ap, pos);
  113. }
  114. }
  115. if (*p == '*') {
  116. dest = NULL;
  117. p++;
  118. } else if (mode == 1) {
  119. dest = va_arg(ap, void *);
  120. }
  121. if (!*p) goto fmt_fail;
  122. width = 0;
  123. for (; isdigit(*p); p++) {
  124. width = 10*width + *p - '0';
  125. }
  126. size = 0;
  127. switch (*p++) {
  128. case 0:
  129. goto fmt_fail;
  130. case 'h':
  131. if (*p == 'h') p++, size = SIZE_hh;
  132. else size = SIZE_h;
  133. break;
  134. case 'l':
  135. if (*p == 'l') p++, size = SIZE_ll;
  136. else size = SIZE_l;
  137. break;
  138. case 'j':
  139. size = SIZE_ll;
  140. break;
  141. case 'z':
  142. case 't':
  143. size = SIZE_l;
  144. break;
  145. case 'L':
  146. size = SIZE_L;
  147. break;
  148. case 'd': case 'i': case 'o': case 'u': case 'x':
  149. case 'a': case 'e': case 'f': case 'g':
  150. case 'A': case 'E': case 'F': case 'G': case 'X':
  151. case 's': case 'c': case '[':
  152. case 'S': case 'C':
  153. case 'p': case 'n':
  154. p--;
  155. break;
  156. default:
  157. goto fmt_fail;
  158. }
  159. t = *p++;
  160. switch (t) {
  161. case 'C':
  162. case 'c':
  163. if (width < 1) width = 1;
  164. case 's':
  165. if (size == SIZE_l) t &= ~0x20;
  166. case 'd': case 'i': case 'o': case 'u': case 'x':
  167. case 'a': case 'e': case 'f': case 'g':
  168. case 'A': case 'E': case 'F': case 'G': case 'X':
  169. case '[': case 'S':
  170. case 'p': case 'n':
  171. if (width < 1) width = INT_MAX;
  172. break;
  173. default:
  174. goto fmt_fail;
  175. }
  176. r->w = width;
  177. if (t != 'n') {
  178. if (read(r) < 0) goto input_fail;
  179. unread(r);
  180. }
  181. switch (t) {
  182. case 'n':
  183. store_int(dest, size, 0, r->l - r->u);
  184. /* do not increment match count, etc! */
  185. continue;
  186. case 'C':
  187. wcs = dest ? dest : (void *)&dummy;
  188. st = (mbstate_t){ 0 };
  189. while ((c=read(r)) >= 0) {
  190. if (wide) {
  191. if (dest) *wcs++ = c;
  192. } else {
  193. char ch = c;
  194. switch (mbrtowc(wcs, &ch, 1, &st)) {
  195. case -1:
  196. goto enc_fail;
  197. case -2:
  198. break;
  199. default:
  200. if (dest) wcs++;
  201. }
  202. }
  203. }
  204. if (r->w > 0) goto match_fail;
  205. break;
  206. case 'c':
  207. s = dest ? dest : (void *)&dummy;
  208. while ((c=read(r)) >= 0) {
  209. if (wide) {
  210. if ((l=wctomb(s, c)) < 0)
  211. goto enc_fail;
  212. if (dest) s += l;
  213. } else {
  214. if (dest) *s++ = c;
  215. }
  216. }
  217. if (r->w > 0) goto match_fail;
  218. break;
  219. case '[':
  220. wcs = dest ? dest : (void *)&dummy;
  221. s = dest ? dest : (void *)&dummy;
  222. if (!wide && size == SIZE_l) st = (mbstate_t){ 0 };
  223. if (*p == '^') p++, invert = 1;
  224. else invert = 0;
  225. if (wide) {
  226. for (m=0; (c=read(r)) >= 0; m=1) {
  227. for (z=p; *z && *z != c && (*z != ']' || z==p); z++);
  228. if (!*z) goto fmt_fail;
  229. if (*z == c && (*z != ']' || z==p)) {
  230. if (invert) break;
  231. } else {
  232. if (!invert) break;
  233. }
  234. if (size == SIZE_l) {
  235. if (dest) *wcs++ = c;
  236. } else {
  237. if ((l=wctomb(s, c)) < 0)
  238. goto enc_fail;
  239. if (dest) s += l;
  240. }
  241. }
  242. for (p++; *p && *p != ']'; p++);
  243. p++;
  244. } else {
  245. unsigned char scanset[257];
  246. memset(scanset, invert, sizeof scanset);
  247. scanset[0] = 0;
  248. for (z=p; *z && (*z != ']' || z==p); z++)
  249. scanset[1+*z] = 1-invert;
  250. if (!*z) goto fmt_fail;
  251. p=z+1;
  252. c=0;
  253. for (m=0; scanset[(c=read(r))+1]; m=1) {
  254. if (size == SIZE_l) {
  255. char ch = c;
  256. switch (mbrtowc(wcs, &ch, 1, &st)) {
  257. case -1:
  258. goto enc_fail;
  259. case -2:
  260. break;
  261. default:
  262. if (dest) wcs++;
  263. }
  264. } else {
  265. if (dest) *s++ = c;
  266. }
  267. }
  268. }
  269. if (!m) goto match_fail;
  270. if (dest) {
  271. if (size == SIZE_l) *wcs++ = 0;
  272. else *s++ = 0;
  273. }
  274. break;
  275. default:
  276. /* read unlimited number of spaces, then reset width */
  277. do r->w = 1; while (is_space(c = read(r)));
  278. if (c < 0) goto input_fail;
  279. unread(r);
  280. r->w = width;
  281. }
  282. switch (t) {
  283. case 'p':
  284. case 'X':
  285. t = 'x';
  286. case 'd':
  287. case 'i':
  288. case 'o':
  289. case 'u':
  290. case 'x':
  291. i = m = neg = 0;
  292. if ((c=read(r)) == '-') neg=1;
  293. else if (c != '+') unread(r);
  294. switch (t) {
  295. case 'i':
  296. case 'x':
  297. if ((c=read(r)) != '0') {
  298. if (t == 'i') t = 'd';
  299. unread(r);
  300. break;
  301. }
  302. if (((c=read(r))|0x20) != 'x') {
  303. if (t == 'i') {
  304. t = 'o';
  305. /* lone 0 is valid octal */
  306. if ((unsigned)(c-'0') >= 8) {
  307. m = 1;
  308. goto int_finish;
  309. }
  310. }
  311. unread(r);
  312. break;
  313. }
  314. t = 'x';
  315. }
  316. }
  317. switch (t) {
  318. case 'd':
  319. case 'u':
  320. for (m=0; isdigit(c=read(r)); m=1)
  321. i = 10*i + c-'0';
  322. goto int_finish;
  323. case 'o':
  324. for (m=0; (unsigned)(c=read(r))-'0' < 8; m=1)
  325. i = (i<<3) + c-'0';
  326. goto int_finish;
  327. case 'x':
  328. for (m=0; ; m=1) {
  329. if (isdigit(c=read(r))) {
  330. i = (i<<4) + c-'0';
  331. } else if ((unsigned)(c|0x20)-'a' < 6) {
  332. i = (i<<4) + (c|0x20)-'a'+10;
  333. } else break;
  334. }
  335. int_finish:
  336. if (!m) goto match_fail;
  337. store_int(dest, size, neg, i);
  338. break;
  339. case 'a':
  340. case 'e':
  341. case 'f':
  342. case 'g':
  343. f = 0.0;
  344. neg = m = 0;
  345. if ((c=read(r)) == '-') neg=1;
  346. else if (c != '+') unread(r);
  347. /* FIXME: check for INF/NAN strings here */
  348. if (read(r)=='0' && (m=1, (read(r)|0x20) == 'x'))
  349. goto hexfloat;
  350. else unread(r);
  351. for (; isdigit(c=read(r)); m=1)
  352. f = 10.0 * f + (c-'0');
  353. if (c=='.') {
  354. double mag = 10.0;
  355. for (; isdigit(c=read(r)); mag*=10.0)
  356. f += (c-'0')/mag;
  357. }
  358. if ((c|0x20)=='e') {
  359. int ex=0, en=0;
  360. m = 0;
  361. if ((c=read(r))=='-') en=1;
  362. else if (c!='+') unread(r);
  363. for (; isdigit(c=read(r)); m=1)
  364. if (ex < LDBL_MAX_10_EXP)
  365. ex = 10 * ex + (c-'0');
  366. if (ex > LDBL_MAX_10_EXP)
  367. f = en ? 0 : INFINITY;
  368. else {
  369. if (en) while (ex--) f/=10.0;
  370. else while (ex--) f*=10.0;
  371. }
  372. }
  373. goto writefloat;
  374. hexfloat:
  375. m = 0;
  376. for (; isxdigit(c=read(r)); m=1)
  377. if (isdigit(c)) f = 16.0*f + (c-'0');
  378. else f = 16.0*f + ((c|32)-'a'+10);
  379. if (c=='.') {
  380. double mag = 1/16.0;
  381. for (; isxdigit(c=read(r)); mag*=1/16.0)
  382. if (isdigit(c)) f += (c-'0')*mag;
  383. else f += ((c|32)-'a'+10)*mag;
  384. }
  385. if ((c|0x20)=='p') {
  386. int ex=0, en=0;
  387. m = 0;
  388. if ((c=read(r))=='-') en=1;
  389. else if (c!='+') unread(r);
  390. for (; isdigit(c=read(r)); m=1)
  391. if (ex < LDBL_MAX_EXP)
  392. ex = 10 * ex + (c-'0');
  393. if (ex > LDBL_MAX_EXP)
  394. f = en ? 0 : INFINITY;
  395. else {
  396. if (en) while (ex--) f*=0.5;
  397. else while (ex--) f*=2.0;
  398. }
  399. }
  400. writefloat:
  401. if (!m) goto match_fail;
  402. if (neg) f *= -1.0;
  403. if (dest) switch (size) {
  404. case SIZE_def:
  405. *(float *)dest = f;
  406. break;
  407. case SIZE_l:
  408. *(double *)dest = f;
  409. break;
  410. case SIZE_L:
  411. *(long double *)dest = f;
  412. break;
  413. }
  414. break;
  415. case 'S':
  416. wcs = dest ? dest : (void *)&dummy;
  417. st = (mbstate_t){ 0 };
  418. while((c=read(r)) >= 0) {
  419. if (wide) {
  420. if (is_space(c)) break;
  421. if (dest) *wcs++ = c;
  422. } else {
  423. char ch = c;
  424. if (is_space(c)) break;
  425. switch (mbrtowc(wcs, &ch, 1, &st)) {
  426. case -1:
  427. goto enc_fail;
  428. case -2:
  429. break;
  430. default:
  431. if (dest) wcs++;
  432. }
  433. }
  434. }
  435. if (dest) *wcs++ = 0;
  436. break;
  437. case 's':
  438. s = dest ? dest : (void *)&dummy;
  439. while((c=read(r)) >= 0) {
  440. if (wide) {
  441. if (is_space(c)) break;
  442. if ((l=wctomb(s, c)) < 0)
  443. goto enc_fail;
  444. if (dest) s += l;
  445. } else {
  446. if (is_space(c)) break;
  447. if (dest) *s++ = c;
  448. }
  449. }
  450. if (dest) *s++ = 0;
  451. break;
  452. }
  453. /* unread will do nothing if field width was exhausted */
  454. unread(r);
  455. if (dest) matches++;
  456. }
  457. return matches;
  458. enc_fail:
  459. errno = EILSEQ;
  460. fmt_fail:
  461. input_fail:
  462. if (!matches) matches--;
  463. match_fail:
  464. unread(r);
  465. return matches;
  466. }