|
@@ -1,157 +1,299 @@
|
|
|
-#include <fnmatch.h>
|
|
|
-#include <wctype.h>
|
|
|
+/*
|
|
|
+ * An implementation of what I call the "Sea of Stars" algorithm for
|
|
|
+ * POSIX fnmatch(). The basic idea is that we factor the pattern into
|
|
|
+ * a head component (which we match first and can reject without ever
|
|
|
+ * measuring the length of the string), an optional tail component
|
|
|
+ * (which only exists if the pattern contains at least one star), and
|
|
|
+ * an optional "sea of stars", a set of star-separated components
|
|
|
+ * between the head and tail. After the head and tail matches have
|
|
|
+ * been removed from the input string, the components in the "sea of
|
|
|
+ * stars" are matched sequentially by searching for their first
|
|
|
+ * occurrence past the end of the previous match.
|
|
|
+ *
|
|
|
+ * - Rich Felker, April 2012
|
|
|
+ */
|
|
|
+
|
|
|
#include <string.h>
|
|
|
-#include <wchar.h>
|
|
|
+#include <fnmatch.h>
|
|
|
#include <stdlib.h>
|
|
|
-#include <limits.h>
|
|
|
+#include <wchar.h>
|
|
|
+#include <wctype.h>
|
|
|
|
|
|
-static int next(const char **s)
|
|
|
+#define END -1
|
|
|
+#define UNMATCHABLE -2
|
|
|
+#define BRACKET -3
|
|
|
+#define QUESTION -4
|
|
|
+#define STAR -5
|
|
|
+
|
|
|
+static int str_next(const char *str, size_t n, size_t *step)
|
|
|
{
|
|
|
- wchar_t c;
|
|
|
- int l = mbtowc(&c, *s, MB_LEN_MAX);
|
|
|
- /* hack to allow literal matches of invalid byte sequences */
|
|
|
- if (l < 0) return (unsigned char)*(*s)++ - 0x100;
|
|
|
- *s += l;
|
|
|
- return c;
|
|
|
+ if (!n) {
|
|
|
+ *step = 0;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ if (str[0] >= 128U) {
|
|
|
+ wchar_t wc;
|
|
|
+ int k = mbtowc(&wc, str, n);
|
|
|
+ if (k<0) {
|
|
|
+ *step = 1;
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+ *step = k;
|
|
|
+ return wc;
|
|
|
+ }
|
|
|
+ *step = 1;
|
|
|
+ return str[0];
|
|
|
}
|
|
|
|
|
|
-#define BRACKET_ERROR -0x100
|
|
|
-#define BRACKET_NOCHAR -0x101
|
|
|
-
|
|
|
-static int bracket_next(const char **s)
|
|
|
+static int pat_next(const char *pat, size_t m, size_t *step, int flags)
|
|
|
{
|
|
|
- int c;
|
|
|
- int type;
|
|
|
- if (**s == '[') {
|
|
|
- type = *(*s+1);
|
|
|
- if (type == '.' || type == '=') {
|
|
|
- *s += 2;
|
|
|
- c = next(s);
|
|
|
- if (c <= 0) return BRACKET_ERROR;
|
|
|
- if (**s == type && *(*s+1) == ']') {
|
|
|
- *s += 2;
|
|
|
- return c;
|
|
|
+ int esc = 0;
|
|
|
+ if (!m || !*pat) {
|
|
|
+ *step = 0;
|
|
|
+ return END;
|
|
|
+ }
|
|
|
+ *step = 1;
|
|
|
+ if (pat[0]=='\\' && !(flags & FNM_NOESCAPE)) {
|
|
|
+ *step = 2;
|
|
|
+ pat++;
|
|
|
+ esc = 1;
|
|
|
+ goto escaped;
|
|
|
+ }
|
|
|
+ if (pat[0]=='[') {
|
|
|
+ size_t k = 1;
|
|
|
+ if (k<m) if (pat[k] == '^' || pat[k] == '!') k++;
|
|
|
+ if (k<m) if (pat[k] == ']') k++;
|
|
|
+ for (; k<m && pat[k] && pat[k]!=']'; k++) {
|
|
|
+ if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) {
|
|
|
+ int z = pat[k+1];
|
|
|
+ k+=2;
|
|
|
+ if (k<m && pat[k]) k++;
|
|
|
+ while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++;
|
|
|
+ if (k==m || !pat[k]) break;
|
|
|
}
|
|
|
- for (; **s && (**s != type || *(*s+1) != ']'); (*s)++);
|
|
|
- if (!**s) return BRACKET_ERROR;
|
|
|
- *s += 2;
|
|
|
- return BRACKET_NOCHAR;
|
|
|
}
|
|
|
+ if (k==m || !pat[k]) {
|
|
|
+ *step = 1;
|
|
|
+ return '[';
|
|
|
+ }
|
|
|
+ *step = k+1;
|
|
|
+ return BRACKET;
|
|
|
+ }
|
|
|
+ if (pat[0] == '*')
|
|
|
+ return STAR;
|
|
|
+ if (pat[0] == '?')
|
|
|
+ return QUESTION;
|
|
|
+escaped:
|
|
|
+ if (pat[0] >= 128U) {
|
|
|
+ wchar_t wc;
|
|
|
+ int k = mbtowc(&wc, pat, m);
|
|
|
+ if (k<0) {
|
|
|
+ *step = 0;
|
|
|
+ return UNMATCHABLE;
|
|
|
+ }
|
|
|
+ *step = k + esc;
|
|
|
+ return wc;
|
|
|
}
|
|
|
- c = next(s);
|
|
|
- if (c <= 0) return BRACKET_ERROR;
|
|
|
- return c;
|
|
|
+ return pat[0];
|
|
|
}
|
|
|
|
|
|
-#define __FNM_CONT 0x8000
|
|
|
+static int match_bracket(const char *p, int k)
|
|
|
+{
|
|
|
+ wchar_t wc;
|
|
|
+ int inv = 0;
|
|
|
+ p++;
|
|
|
+ if (*p=='^' || *p=='!') {
|
|
|
+ inv = 1;
|
|
|
+ p++;
|
|
|
+ }
|
|
|
+ if (*p==']') {
|
|
|
+ if (k==']') return !inv;
|
|
|
+ p++;
|
|
|
+ } else if (*p=='-') {
|
|
|
+ if (k=='-') return !inv;
|
|
|
+ p++;
|
|
|
+ }
|
|
|
+ wc = p[-1];
|
|
|
+ for (; *p != ']'; p++) {
|
|
|
+ if (p[0]=='-' && p[1]!=']') {
|
|
|
+ wchar_t wc2;
|
|
|
+ int l = mbtowc(&wc2, p+1, 4);
|
|
|
+ if (l < 0) return 0;
|
|
|
+ if (wc<=wc2 && (unsigned)k-wc <= wc2-wc) return !inv;
|
|
|
+ p += l-1;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) {
|
|
|
+ const char *p0 = p+2;
|
|
|
+ int z = p[1];
|
|
|
+ p+=3;
|
|
|
+ while (p[-1]!=z || p[0]!=']') p++;
|
|
|
+ if (z == ':' && p-1-p0 < 16) {
|
|
|
+ char buf[16];
|
|
|
+ memcpy(buf, p0, p-1-p0);
|
|
|
+ buf[p-1-p0] = 0;
|
|
|
+ if (iswctype(k, wctype(buf))) return !inv;
|
|
|
+ }
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (*p < 128U) {
|
|
|
+ wc = (unsigned char)*p;
|
|
|
+ } else {
|
|
|
+ int l = mbtowc(&wc, p, 4);
|
|
|
+ if (l < 0) return 0;
|
|
|
+ p += l-1;
|
|
|
+ }
|
|
|
+ if (wc==k) return !inv;
|
|
|
+ }
|
|
|
+ return inv;
|
|
|
+}
|
|
|
|
|
|
-int fnmatch(const char *p, const char *s, int flags)
|
|
|
+static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags)
|
|
|
{
|
|
|
- int c, d, k;
|
|
|
- int not;
|
|
|
- int match;
|
|
|
- int first;
|
|
|
- int no_slash = (flags & FNM_PATHNAME) ? '/' : 0;
|
|
|
- int no_period = (flags & FNM_PERIOD) && !(flags & __FNM_CONT) ? '.' : 0x100;
|
|
|
- const char *p1;
|
|
|
-
|
|
|
- flags |= __FNM_CONT;
|
|
|
-
|
|
|
- while ((c = *p++)) {
|
|
|
- switch (c) {
|
|
|
- case '?':
|
|
|
- k = next(&s);
|
|
|
- if (!k || k == no_period || k == no_slash)
|
|
|
- return FNM_NOMATCH;
|
|
|
+ const char *p, *ptail, *endpat;
|
|
|
+ const char *s, *stail, *endstr;
|
|
|
+ size_t pinc, sinc, tailcnt=0;
|
|
|
+ int c, k;
|
|
|
+
|
|
|
+ if (flags & FNM_PERIOD) {
|
|
|
+ if (*str == '.' && *pat != '.')
|
|
|
+ return FNM_NOMATCH;
|
|
|
+ }
|
|
|
+ for (;;) {
|
|
|
+ switch ((c = pat_next(pat, m, &pinc, flags))) {
|
|
|
+ case UNMATCHABLE:
|
|
|
+ return FNM_NOMATCH;
|
|
|
+ case STAR:
|
|
|
+ pat++;
|
|
|
+ m--;
|
|
|
break;
|
|
|
- case '\\':
|
|
|
- if (!(flags & FNM_NOESCAPE)) {
|
|
|
- c = *p++;
|
|
|
- goto literal;
|
|
|
+ default:
|
|
|
+ k = str_next(str, n, &sinc);
|
|
|
+ if (k <= 0)
|
|
|
+ return (c==END) ? 0 : FNM_NOMATCH;
|
|
|
+ str += sinc;
|
|
|
+ n -= sinc;
|
|
|
+ if (c == BRACKET) {
|
|
|
+ if (!match_bracket(pat, k))
|
|
|
+ return FNM_NOMATCH;
|
|
|
+ } else if (c != QUESTION && k != c) {
|
|
|
+ return FNM_NOMATCH;
|
|
|
}
|
|
|
- if (*s++ != c) return FNM_NOMATCH;
|
|
|
+ pat+=pinc;
|
|
|
+ m-=pinc;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Compute real pat length if it was initially unknown/-1 */
|
|
|
+ m = strnlen(pat, m);
|
|
|
+ endpat = pat + m;
|
|
|
+
|
|
|
+ /* Find the last * in pat and count chars needed after it */
|
|
|
+ for (p=ptail=pat; p<endpat; p+=pinc) {
|
|
|
+ switch (pat_next(p, endpat-p, &pinc, flags)) {
|
|
|
+ case UNMATCHABLE:
|
|
|
+ return FNM_NOMATCH;
|
|
|
+ case STAR:
|
|
|
+ tailcnt=0;
|
|
|
+ ptail = p+1;
|
|
|
break;
|
|
|
- case '*':
|
|
|
- for (; *p == '*'; p++);
|
|
|
- if (*p && !*s) return FNM_NOMATCH;
|
|
|
- if (*s == no_period)
|
|
|
+ default:
|
|
|
+ tailcnt++;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Past this point we need not check for UNMATCHABLE in pat,
|
|
|
+ * because all of pat has already been parsed once. */
|
|
|
+
|
|
|
+ /* Compute real str length if it was initially unknown/-1 */
|
|
|
+ n = strnlen(str, n);
|
|
|
+ endstr = str + n;
|
|
|
+ if (n < tailcnt) return FNM_NOMATCH;
|
|
|
+
|
|
|
+ /* Find the final tailcnt chars of str, accounting for UTF-8.
|
|
|
+ * On illegal sequences we may get it wrong, but in that case
|
|
|
+ * we necessarily have a matching failure anyway. */
|
|
|
+ for (s=endstr; s>str && tailcnt; tailcnt--) {
|
|
|
+ if (s[-1] < 128U) s--;
|
|
|
+ else while ((unsigned char)*--s-0x80U<0x40 && s>str);
|
|
|
+ }
|
|
|
+ if (tailcnt) return FNM_NOMATCH;
|
|
|
+ stail = s;
|
|
|
+
|
|
|
+ /* Check that the pat and str tails match */
|
|
|
+ p = ptail;
|
|
|
+ for (;;) {
|
|
|
+ c = pat_next(p, endpat-p, &pinc, flags);
|
|
|
+ p += pinc;
|
|
|
+ if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
|
|
|
+ if (c != END) return FNM_NOMATCH;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ s += sinc;
|
|
|
+ if (c == BRACKET) {
|
|
|
+ if (!match_bracket(p-pinc, k))
|
|
|
return FNM_NOMATCH;
|
|
|
- if (!*p && (!no_slash || !strchr(s, no_slash)))
|
|
|
- return 0;
|
|
|
- for (; *s; s++)
|
|
|
- if (!fnmatch(p, s, flags))
|
|
|
- return 0;
|
|
|
- else if (*s == no_slash)
|
|
|
- break;
|
|
|
+ } else if (c != QUESTION && k != c) {
|
|
|
return FNM_NOMATCH;
|
|
|
- case '[':
|
|
|
- p1 = p-1;
|
|
|
- not = (*p == '!' || *p == '^');
|
|
|
- if (not) p++;
|
|
|
- k = next(&s);
|
|
|
- if (!k || k == no_slash || k == no_period)
|
|
|
- return FNM_NOMATCH;
|
|
|
- match = 0;
|
|
|
- first = 1;
|
|
|
- for (;;) {
|
|
|
- if (!*p) goto literal_bracket;
|
|
|
- if (*p == ']' && !first) break;
|
|
|
- first = 0;
|
|
|
- if (*p == '[' && *(p+1) == ':') {
|
|
|
- const char *z;
|
|
|
- p += 2;
|
|
|
- for (z=p; *z && (*z != ':' || *(z+1) != ']'); z++);
|
|
|
- if (!*z || z-p > 32) { /* FIXME: symbolic const? */
|
|
|
- return FNM_NOMATCH;
|
|
|
- } else {
|
|
|
- char class[33];
|
|
|
- memcpy(class, p, z-p);
|
|
|
- class[z-p] = 0;
|
|
|
- if (iswctype(k, wctype(class)))
|
|
|
- match = 1;
|
|
|
- }
|
|
|
- p = z+2;
|
|
|
- continue;
|
|
|
- }
|
|
|
- c = bracket_next(&p);
|
|
|
- if (c == BRACKET_ERROR) {
|
|
|
-literal_bracket:
|
|
|
- match = (k=='[');
|
|
|
- p = p1;
|
|
|
- not = 0;
|
|
|
- break;
|
|
|
- }
|
|
|
- if (c == BRACKET_NOCHAR)
|
|
|
- continue;
|
|
|
- if (*p == '-' && *(p+1) != ']') {
|
|
|
- p++;
|
|
|
- d = bracket_next(&p);
|
|
|
- if (d == BRACKET_ERROR)
|
|
|
- goto literal_bracket;
|
|
|
- if (d == BRACKET_NOCHAR)
|
|
|
- continue;
|
|
|
- if (k >= c && k <= d)
|
|
|
- match = 1;
|
|
|
- continue;
|
|
|
- }
|
|
|
- if (k == c) match = 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* We're all done with the tails now, so throw them out */
|
|
|
+ endstr = stail;
|
|
|
+ endpat = ptail;
|
|
|
+
|
|
|
+ /* Match pattern components until there are none left */
|
|
|
+ while (pat<endpat) {
|
|
|
+ p = pat;
|
|
|
+ s = str;
|
|
|
+ for (;;) {
|
|
|
+ c = pat_next(p, endpat-p, &pinc, flags);
|
|
|
+ p += pinc;
|
|
|
+ /* Encountering * completes/commits a component */
|
|
|
+ if (c == STAR) {
|
|
|
+ pat = p;
|
|
|
+ str = s;
|
|
|
+ break;
|
|
|
}
|
|
|
- p++;
|
|
|
- if (not == match)
|
|
|
+ k = str_next(s, endstr-s, &sinc);
|
|
|
+ if (!k)
|
|
|
return FNM_NOMATCH;
|
|
|
- break;
|
|
|
- default:
|
|
|
- literal:
|
|
|
- if (*s++ != c)
|
|
|
- return FNM_NOMATCH;
|
|
|
- if (c == no_slash && (flags & FNM_PERIOD)) {
|
|
|
- no_period = '.';
|
|
|
- continue;
|
|
|
+ if (c == BRACKET) {
|
|
|
+ if (!match_bracket(p-pinc, k))
|
|
|
+ break;
|
|
|
+ } else if (c != QUESTION && k != c) {
|
|
|
+ break;
|
|
|
}
|
|
|
- break;
|
|
|
+ s += sinc;
|
|
|
}
|
|
|
- no_period = 0x100;
|
|
|
+ if (c == STAR) continue;
|
|
|
+ /* If we failed, advance str, by 1 char if it's a valid
|
|
|
+ * char, or past all invalid bytes otherwise. */
|
|
|
+ k = str_next(str, endstr-str, &sinc);
|
|
|
+ if (k > 0) str += sinc;
|
|
|
+ else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
|
|
|
}
|
|
|
- if (*s) return FNM_NOMATCH;
|
|
|
+
|
|
|
return 0;
|
|
|
}
|
|
|
+
|
|
|
+int fnmatch(const char *pat, const char *str, int flags)
|
|
|
+{
|
|
|
+ const char *s, *p;
|
|
|
+ size_t inc;
|
|
|
+ int c;
|
|
|
+ if (flags & FNM_PATHNAME) for (;;) {
|
|
|
+ for (s=str; *s && *s!='/'; s++);
|
|
|
+ for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc);
|
|
|
+ if (*s && *p!=*s) return FNM_NOMATCH;
|
|
|
+ if (fnmatch_internal(pat, p-pat, str, s-str, flags))
|
|
|
+ return FNM_NOMATCH;
|
|
|
+ if (!*s && c==END) return 0;
|
|
|
+ str = s+1;
|
|
|
+ pat = p+1;
|
|
|
+ }
|
|
|
+ return fnmatch_internal(pat, -1, str, -1, flags);
|
|
|
+}
|