Browse Source

implement non-default plural rules for ngettext translations

the new code in dcngettext was written by me, and the expression
evaluator by Szabolcs Nagy (nsz).
Rich Felker 10 years ago
parent
commit
73d2a3bfda
2 changed files with 243 additions and 8 deletions
  1. 46 8
      src/locale/dcngettext.c
  2. 197 0
      src/locale/pleval.c

+ 46 - 8
src/locale/dcngettext.c

@@ -4,6 +4,7 @@
 #include <errno.h>
 #include <limits.h>
 #include <sys/stat.h>
+#include <ctype.h>
 #include "locale_impl.h"
 #include "libc.h"
 #include "atomic.h"
@@ -95,6 +96,8 @@ struct msgcat {
 	struct msgcat *next;
 	const void *map;
 	size_t map_size;
+	void *plural_rule;
+	int nplurals;
 	char name[];
 };
 
@@ -107,6 +110,7 @@ weak_alias(dummy_gettextdomain, __gettextdomain);
 
 const unsigned char *__map_file(const char *, size_t *);
 int __munmap(void *, size_t);
+unsigned long __pleval(const char *, unsigned long);
 
 char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long int n, int category)
 {
@@ -190,19 +194,53 @@ notrans:
 	const char *trans = __mo_lookup(p->map, p->map_size, msgid1);
 	if (!trans) goto notrans;
 
-	/* FIXME: support alternate plural rules */
-	if (n != 1) {
-		size_t l = strlen(trans);
-		if (l+1 >= p->map_size - (trans - (char *)p->map))
-			goto notrans;
-		trans += l+1;
+	/* Non-plural-processing gettext forms pass a null pointer as
+	 * msgid2 to request that dcngettext suppress plural processing. */
+	if (!msgid2) return (char *)trans;
+
+	if (!p->plural_rule) {
+		const char *rule = "n!=1;";
+		unsigned long np = 2;
+		const char *r = __mo_lookup(p->map, p->map_size, "");
+		char *z;
+		while (r && strncmp(r, "Plural-Forms:", 13)) {
+			z = strchr(r, '\n');
+			r = z ? z+1 : 0;
+		}
+		if (r) {
+			r += 13;
+			while (isspace(*r)) r++;
+			if (!strncmp(r, "nplurals=", 9)) {
+				np = strtoul(r+9, &z, 10);
+				r = z;
+			}
+			while (*r && *r != ';') r++;
+			if (*r) {
+				r++;
+				while (isspace(*r)) r++;
+				if (!strncmp(r, "plural=", 7))
+					rule = r+7;
+			}
+		}
+		a_store(&p->nplurals, np);
+		a_cas_p(&p->plural_rule, 0, (void *)rule);
+	}
+	if (p->nplurals) {
+		unsigned long plural = __pleval(p->plural_rule, n);
+		if (plural > p->nplurals) goto notrans;
+		while (plural--) {
+			size_t l = strlen(trans);
+			if (l+1 >= p->map_size - (trans - (char *)p->map))
+				goto notrans;
+			trans += l+1;
+		}
 	}
 	return (char *)trans;
 }
 
 char *dcgettext(const char *domainname, const char *msgid, int category)
 {
-	return dcngettext(domainname, msgid, msgid, 1, category);
+	return dcngettext(domainname, msgid, 0, 1, category);
 }
 
 char *dngettext(const char *domainname, const char *msgid1, const char *msgid2, unsigned long int n)
@@ -212,5 +250,5 @@ char *dngettext(const char *domainname, const char *msgid1, const char *msgid2,
 
 char *dgettext(const char *domainname, const char *msgid)
 {
-	return dcngettext(domainname, msgid, msgid, 1, LC_MESSAGES);
+	return dcngettext(domainname, msgid, 0, 1, LC_MESSAGES);
 }

+ 197 - 0
src/locale/pleval.c

@@ -0,0 +1,197 @@
+#include <stdlib.h>
+#include <ctype.h>
+
+/*
+grammar:
+
+Start = Expr ';'
+Expr  = Or | Or '?' Expr ':' Expr
+Or    = And | Or '||' And
+And   = Eq | And '&&' Eq
+Eq    = Rel | Eq '==' Rel | Eq '!=' Rel
+Rel   = Add | Rel '<=' Add | Rel '>=' Add | Rel '<' Add | Rel '>' Add
+Add   = Mul | Add '+' Mul | Add '-' Mul
+Mul   = Term | Mul '*' Term | Mul '/' Term | Mul '%' Term
+Term  = '(' Expr ')' | '!' Term | decimal | 'n'
+
+internals:
+
+recursive descent expression evaluator with stack depth limit.
+eval* functions return the value of the subexpression and set
+the current string pointer to the next non-space char.
+*/
+
+struct st {
+	const char *s;
+	unsigned long n;
+	int err;
+};
+
+static const char *skipspace(const char *s)
+{
+	while (isspace(*s)) s++;
+	return s;
+}
+
+static unsigned long evalconst(struct st *st)
+{
+	char *e;
+	unsigned long n;
+	n = strtoul(st->s, &e, 10);
+	if (!isdigit(*st->s) || e == st->s || n == -1)
+		st->err = 1;
+	st->s = skipspace(e);
+	return n;
+}
+
+static unsigned long evalexpr(struct st *st, int d);
+
+static unsigned long evalterm(struct st *st, int d)
+{
+	unsigned long a;
+	if (d <= 0) {
+		st->err = 1;
+		return 0;
+	}
+	st->s = skipspace(st->s);
+	if (*st->s == '!') {
+		st->s++;
+		return !evalterm(st, d-1);
+	}
+	if (*st->s == '(') {
+		st->s++;
+		a = evalexpr(st, d-1);
+		if (*st->s != ')') {
+			st->err = 1;
+			return 0;
+		}
+		st->s = skipspace(st->s + 1);
+		return a;
+	}
+	if (*st->s == 'n') {
+		st->s = skipspace(st->s + 1);
+		return st->n;
+	}
+	return evalconst(st);
+}
+
+static unsigned long evalmul(struct st *st, int d)
+{
+	unsigned long b, a = evalterm(st, d-1);
+	int op;
+	for (;;) {
+		op = *st->s;
+		if (op != '*' && op != '/' && op != '%')
+			return a;
+		st->s++;
+		b = evalterm(st, d-1);
+		if (op == '*') {
+			a *= b;
+		} else if (!b) {
+			st->err = 1;
+			return 0;
+		} else if (op == '%') {
+			a %= b;
+		} else {
+			a /= b;
+		}
+	}
+}
+
+static unsigned long evaladd(struct st *st, int d)
+{
+	unsigned long a = 0;
+	int add = 1;
+	for (;;) {
+		a += (add?1:-1) * evalmul(st, d-1);
+		if (*st->s != '+' && *st->s != '-')
+			return a;
+		add = *st->s == '+';
+		st->s++;
+	}
+}
+
+static unsigned long evalrel(struct st *st, int d)
+{
+	unsigned long b, a = evaladd(st, d-1);
+	int less, eq;
+	for (;;) {
+		if (*st->s != '<' && *st->s != '>')
+			return a;
+		less = st->s[0] == '<';
+		eq = st->s[1] == '=';
+		st->s += 1 + eq;
+		b = evaladd(st, d-1);
+		a = (less ? a < b : a > b) || (eq && a == b);
+	}
+}
+
+static unsigned long evaleq(struct st *st, int d)
+{
+	unsigned long a = evalrel(st, d-1);
+	int neg;
+	for (;;) {
+		if ((st->s[0] != '=' && st->s[0] != '!') || st->s[1] != '=')
+			return a;
+		neg = st->s[0] == '!';
+		st->s += 2;
+		a = evalrel(st, d-1) == a;
+		a ^= neg;
+	}
+}
+
+static unsigned long evaland(struct st *st, int d)
+{
+	unsigned long a = evaleq(st, d-1);
+	for (;;) {
+		if (st->s[0] != '&' || st->s[1] != '&')
+			return a;
+		st->s += 2;
+		a = evaleq(st, d-1) && a;
+	}
+}
+
+static unsigned long evalor(struct st *st, int d)
+{
+	unsigned long a = evaland(st, d-1);
+	for (;;) {
+		if (st->s[0] != '|' || st->s[1] != '|')
+			return a;
+		st->s += 2;
+		a = evaland(st, d-1) || a;
+	}
+}
+
+static unsigned long evalexpr(struct st *st, int d)
+{
+	unsigned long a1, a2, a3;
+	if (d <= 0) {
+		st->err = 1;
+		return 0;
+	}
+	a1 = evalor(st, d-1);
+	if (*st->s != '?')
+		return a1;
+	st->s++;
+	a2 = evalexpr(st, d-1);
+	if (*st->s != ':') {
+		st->err = 1;
+		return 0;
+	}
+	st->s++;
+	a3 = evalexpr(st, d-1);
+	return a1 ? a2 : a3;
+}
+
+unsigned long __pleval(const char *s, unsigned long n)
+{
+	unsigned long a;
+	struct st st;
+	st.s = s;
+	st.n = n;
+	st.err = 0;
+	a = evalexpr(&st, 100);
+	if (st.err || *st.s != ';')
+		return -1;
+	return a;
+}