diff -r 39f33c15243b Lib/gettext.py --- a/Lib/gettext.py Sat Oct 29 10:50:00 2016 +0300 +++ b/Lib/gettext.py Mon Nov 07 01:36:07 2016 +0800 @@ -47,6 +47,7 @@ import locale, copy, io, os, re, struct, sys +import operator as op from errno import ENOENT @@ -60,54 +61,147 @@ _default_localedir = os.path.join(sys.base_prefix, 'share', 'locale') -def c2py(plural): - """Gets a C expression as used in PO files for plural forms and returns a - Python lambda function that implements an equivalent expression. - """ - # Security check, allow only the "n" identifier - import token, tokenize - tokens = tokenize.generate_tokens(io.StringIO(plural).readline) - try: - danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n'] - except tokenize.TokenError: - raise ValueError('plural forms expression error, maybe unbalanced parenthesis') - else: - if danger: - raise ValueError('plural forms expression could be dangerous') +class _Plural: + """A simple C expression parser for plural form based on GNU gettext.""" - # Replace some C operators by their Python equivalents - plural = plural.replace('&&', ' and ') - plural = plural.replace('||', ' or ') + token_pattern = re.compile(r""" + (?P[1-9][0-9]*|0(?!\d|X|x)) | # decimal integer + (?P[ \t]) | # space and horizontal tab + (?Pn) | # only n is allowed + (?P[()]) | + (?P[-*/%+?:]|[>, >=, <, + # <=, ==, !, !=, ?:, &&, || + # unary and bitwise ops + # not allowed + (?P.) # invalid token + """, re.VERBOSE) - expr = re.compile(r'\!([^=])') - plural = expr.sub(' not \\1', plural) + registered = {} - # Regular expression and replacement function used to transform - # "a?b:c" to "b if a else c". - expr = re.compile(r'(.*?)\?(.*?):(.*)') - def repl(x): - return "(%s if %s else %s)" % (x.group(2), x.group(1), - expr.sub(repl, x.group(3))) + def register_token(id, bp=0, *, table=registered): + class Token: + id = value = None + first = second = third = None + def __init__(self, value=None, parser=None): + self.value = value + self.parser = parser + def nud(self): raise SyntaxError + def led(self, left): raise SyntaxError + Token.id = id + Token.lbp = bp + table[id] = Token + return Token - # Code to transform the plural expression, taking care of parentheses - stack = [''] - for c in plural: - if c == '(': - stack.append('') - elif c == ')': - if len(stack) == 1: - # Actually, we never reach this code, because unbalanced - # parentheses get caught in the security check at the - # beginning. - raise ValueError('unbalanced parenthesis in plural form') - s = expr.sub(repl, stack.pop()) - stack[-1] += '(%s)' % s + # helpers + + def method(s): + def bind(fn): + setattr(s, fn.__name__, fn) + return bind + + def infix(id, bp, action, *, register=register_token): + def led(self, left): + self.first = left + self.second = self.parser._parse(bp) + return action(self.first, self.second) + register(id, bp).led = led + + # register tokens + + register_token('(name)').nud = lambda self: self.value + register_token('(number)').nud = lambda self: self.value + register_token('(end)') + @method(registered['(number)']) + def __init__(self, value, parser=None): + self.value = int(value) + self.parser = parser + + infix('*', 80, op.mul) + infix('/', 80, op.floordiv) + infix('%', 80, op.mod) + infix('+', 70, op.add) + infix('-', 70, op.sub) + infix('>', 60, op.gt) + infix('>=', 60, op.ge) + infix('<', 60, op.lt) + infix('<=', 60, op.le) + infix('==', 60, op.eq) + infix('!=', 60, op.ne) + infix('&&', 50, op.and_) + infix('||', 40, op.or_) + + @method(register_token('!', 90)) + def nud(self): + self.first = self.parser._parse(self.lbp) + return not self.first + + register_token(')') + @method(register_token('(', 100)) + def nud(self): + expr = self.parser._parse() + self.parser._advance(')') + return expr + + register_token(':') + @method(register_token('?', 30)) + def led(self, left): + self.first = left + self.second = self.parser._parse() + self.parser._advance(':') + self.third = self.parser._parse(self.lbp-1) + return self.second if self.first else self.third + + def __init__(self, expression): + self.tokens = [] + for mo in re.finditer(self.token_pattern, expression): + kind = mo.lastgroup + value = mo.group(kind) + if kind == 'NUMBER': + token = self.registered['(number)'](value) + elif kind == 'NAME': + token = self.registered['(name)'](value) + elif kind == 'WHITESPACE': + continue + elif kind == 'PARENTHESIS' or kind == 'OPERATOR': + token = self.registered[value](parser=self) + else: + raise ValueError('Invalid token in plural expression.') + self.tokens.append(token) else: - stack[-1] += c - plural = expr.sub(repl, stack.pop()) + self.tokens.append(self.registered['(end)']()) - return eval('lambda n: int(%s)' % plural) + def __call__(self, n): + if not isinstance(n, int): + # n should be a number extracted from plural form. + # So use ValueError instead of TypeError. + raise ValueError('Plural value must be an integer.') + for token in self.tokens: + if token.id == '(name)': + token.value = n + self.iter_tokens = iter(self.tokens) + self.token = next(self.iter_tokens) + result = int(self._parse()) + if self.token.id != '(end)': + raise ValueError('Invalid plural form syntax.') + return result + def _parse(self, rbp=0): + t = self.token + try: + self.token = next(self.iter_tokens) + left = t.nud() + while rbp < self.token.lbp: + t = self.token + self.token = next(self.iter_tokens) + left = t.led(left) + return left + except (SyntaxError, StopIteration): + raise ValueError('Invalid plural form syntax.') from None + + def _advance(self, id): + if id and self.token.id != id: + raise SyntaxError + self.token = next(self.iter_tokens) def _expand_lang(loc): @@ -294,7 +388,7 @@ elif k == 'plural-forms': v = v.split(';') plural = v[1].split('plural=')[1] - self.plural = c2py(plural) + self.plural = _Plural(plural) # Note: we unconditionally convert both msgids and msgstrs to # Unicode using the character encoding specified in the charset # parameter of the Content-Type header. The gettext documentation diff -r 39f33c15243b Lib/test/test_gettext.py --- a/Lib/test/test_gettext.py Sat Oct 29 10:50:00 2016 +0300 +++ b/Lib/test/test_gettext.py Mon Nov 07 01:36:07 2016 +0800 @@ -307,63 +307,93 @@ def test_hu(self): eq = self.assertEqual - f = gettext.c2py('0') + f = gettext._Plural('0') s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000") def test_de(self): eq = self.assertEqual - f = gettext.c2py('n != 1') + f = gettext._Plural('n != 1') s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "10111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111") def test_fr(self): eq = self.assertEqual - f = gettext.c2py('n>1') + f = gettext._Plural('n>1') s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "00111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111") def test_gd(self): eq = self.assertEqual - f = gettext.c2py('n==1 ? 0 : n==2 ? 1 : 2') + f = gettext._Plural('n==1 ? 0 : n==2 ? 1 : 2') s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "20122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222") def test_gd2(self): eq = self.assertEqual # Tests the combination of parentheses and "?:" - f = gettext.c2py('n==1 ? 0 : (n==2 ? 1 : 2)') + f = gettext._Plural('n==1 ? 0 : (n==2 ? 1 : 2)') s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "20122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222") def test_lt(self): eq = self.assertEqual - f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2') + f = gettext._Plural('n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2') s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "20111111112222222222201111111120111111112011111111201111111120111111112011111111201111111120111111112011111111222222222220111111112011111111201111111120111111112011111111201111111120111111112011111111") def test_ru(self): eq = self.assertEqual - f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2') + f = gettext._Plural('n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2') s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "20111222222222222222201112222220111222222011122222201112222220111222222011122222201112222220111222222011122222222222222220111222222011122222201112222220111222222011122222201112222220111222222011122222") def test_pl(self): eq = self.assertEqual - f = gettext.c2py('n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2') + f = gettext._Plural('n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2') s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "20111222222222222222221112222222111222222211122222221112222222111222222211122222221112222222111222222211122222222222222222111222222211122222221112222222111222222211122222221112222222111222222211122222") def test_sl(self): eq = self.assertEqual - f = gettext.c2py('n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3') + f = gettext._Plural('n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3') s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "30122333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333012233333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333") def test_security(self): raises = self.assertRaises # Test for a dangerous expression - raises(ValueError, gettext.c2py, "os.chmod('/etc/passwd',0777)") + raises(ValueError, gettext._Plural, "os.chmod('/etc/passwd',0777)") + # issue28563 + raises(ValueError, gettext._Plural, '"(eval(foo) && ""') + raises(ValueError, gettext._Plural, 'f"{os.system(\'sh\')}"') + + def test_chained_comparsion(self): + # C doesn't chain comparison as Python so 2 == 2 == 2 gets different results + self.assertNotEqual(gettext._Plural('2==2==2')(1), 2==2==2) + self.assertNotEqual(gettext._Plural('2!=3!=1')(1), 2!=3!=1) + self.assertNotEqual(gettext._Plural('3>2>1')(1), 3>2>1) + + def test_invalid_syntax(self): + invalid_expressions = [ + 'x>1', '(n>1', 'n>1)', '42**42**42', '0123', '0xa', + 'n>0x1', '1.0', '+n', '-n', 'n()', 'n(1)', '1+' + ] + for expr in invalid_expressions: + with self.assertRaises(ValueError): + plural = gettext._Plural(expr) + plural(1) + + def test_nested_condition_operator(self): + self.assertEqual(gettext._Plural('n?1?2:3:4')(0), 4) + self.assertEqual(gettext._Plural('n?1?2:3:4')(1), 2) + self.assertEqual(gettext._Plural('n?1:3?4:5')(0), 4) + self.assertEqual(gettext._Plural('n?1:3?4:5')(1), 1) + + def test_plural_number(self): + self.assertRaises(ValueError, gettext._Plural('1'), '1') + self.assertRaises(ValueError, gettext._Plural('1'), []) + self.assertRaises(ValueError, gettext._Plural('1'), object()) class GNUTranslationParsingTest(GettextBaseTest): def test_plural_form_error_issue17898(self): @@ -441,7 +471,7 @@ class MiscTestCase(unittest.TestCase): def test__all__(self): - blacklist = {'c2py', 'ENOENT'} + blacklist = {'_Plural', 'ENOENT'} support.check__all__(self, gettext, blacklist=blacklist)