diff -r a2c9f06ada28 Lib/gettext.py --- a/Lib/gettext.py Sun Nov 06 18:47:03 2016 +0200 +++ b/Lib/gettext.py Mon Nov 07 19:07:34 2016 +0200 @@ -59,55 +59,104 @@ from errno import ENOENT _default_localedir = os.path.join(sys.base_prefix, 'share', 'locale') +_token_pattern = re.compile(r""" + (?P[ \t]+) | # spaces and horizontal tabs + (?P[0-9]+\b) | # decimal integer + (?Pn\b) | # only n is allowed + (?P[()]) | + (?P[-*/%+?:]|[>, + # <=, >=, ==, !=, &&, ||, + # ? : + # unary and bitwise ops + # not allowed + (?P\w+|.) # invalid token + """, re.VERBOSE|re.DOTALL) + +def _tokenize(plural): + for mo in re.finditer(_token_pattern, plural): + kind = mo.lastgroup + if kind == 'WHITESPACES': + continue + value = mo.group(kind) + if kind == 'INVALID': + raise ValueError('invalid token in plural form: %s' % value) + yield value + yield '' + +def _error(value): + if value: + return ValueError('unexpected token in plural form: %s' % value) + else: + return ValueError('unexpected end of plural form') + +_binary_ops = [ + ('||',), + ('&&',), + ('==', '!='), + ('<', '>', '<=', '>='), + ('+', '-'), + ('*', '/', '%'), +] +_binary_ops = {op: i for i, ops in enumerate(_binary_ops, 1) for op in ops} + +_op_map = {'||': 'or', '&&': 'and', '/': '//'} + +def _parse(tokens, priority=0): + result = '' + nexttok = next(tokens) + while nexttok == '!': + result += 'not ' + nexttok = next(tokens) + + if nexttok == '(': + sub, nexttok = _parse(tokens) + result = '%s(%s)' % (result, sub) + if nexttok != ')': + raise ValueError('unbalanced parenthesis in plural form') + elif nexttok == 'n': + result = '%s%s' % (result, nexttok) + else: + try: + value = int(nexttok, 10) + except ValueError: + raise _error(nexttok) from None + result = '%s%d' % (result, value) + nexttok = next(tokens) + + while nexttok in _binary_ops: + i = _binary_ops[nexttok] + if i < priority: + break + # Replace some C operators by their Python equivalents + op = _op_map.get(nexttok, nexttok) + right, nexttok = _parse(tokens, i + 1) + result = '(%s) %s (%s)' % (result, op, right) + + if nexttok == '?' and priority == 0: + if_true, nexttok = _parse(tokens) + if nexttok != ':': + raise _error(nexttok) + if_false, nexttok = _parse(tokens) + result = '(%s) if (%s) else (%s)' % (if_true, result, if_false) + + return result, nexttok def c2py(plural): """Gets a C expression as used in PO files for plural forms and returns a - Python lambda function that implements an equivalent expression. + Python function that implements an equivalent expression. """ - # Security check, allow only the "n" identifier - import token, tokenize - tokens = tokenize.generate_tokens(io.StringIO(plural).readline) - try: - danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n'] - except tokenize.TokenError: + + result, nexttok = _parse(_tokenize(plural)) + if nexttok: raise ValueError('plural forms expression error, maybe unbalanced parenthesis') - else: - if danger: - raise ValueError('plural forms expression could be dangerous') - - # Replace some C operators by their Python equivalents - plural = plural.replace('&&', ' and ') - plural = plural.replace('||', ' or ') - - expr = re.compile(r'\!([^=])') - plural = expr.sub(' not \\1', plural) - - # Regular expression and replacement function used to transform - # "a?b:c" to "b if a else c". - expr = re.compile(r'(.*?)\?(.*?):(.*)') - def repl(x): - return "(%s if %s else %s)" % (x.group(2), x.group(1), - expr.sub(repl, x.group(3))) - - # Code to transform the plural expression, taking care of parentheses - stack = [''] - for c in plural: - if c == '(': - stack.append('') - elif c == ')': - if len(stack) == 1: - # Actually, we never reach this code, because unbalanced - # parentheses get caught in the security check at the - # beginning. - raise ValueError('unbalanced parenthesis in plural form') - s = expr.sub(repl, stack.pop()) - stack[-1] += '(%s)' % s - else: - stack[-1] += c - plural = expr.sub(repl, stack.pop()) - - return eval('lambda n: int(%s)' % plural) - + ns = {} + exec('''if True: + def func(n): + if not isinstance(n, int): + raise ValueError('Plural value must be an integer.') + return int(%s) + ''' % result, ns) + return ns['func'] def _expand_lang(loc): diff -r a2c9f06ada28 Lib/test/test_gettext.py --- a/Lib/test/test_gettext.py Sun Nov 06 18:47:03 2016 +0200 +++ b/Lib/test/test_gettext.py Mon Nov 07 19:07:34 2016 +0200 @@ -305,7 +305,7 @@ class PluralFormsTestCase(GettextBaseTes x = t.ngettext('There is %s file', 'There are %s files', 2) eq(x, 'Hay %s ficheros') - def test_hu(self): + def test_ja(self): eq = self.assertEqual f = gettext.c2py('0') s = ''.join([ str(f(x)) for x in range(200) ]) @@ -323,6 +323,12 @@ class PluralFormsTestCase(GettextBaseTes s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "00111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111") + def test_lv(self): + eq = self.assertEqual + f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2') + s = ''.join([ str(f(x)) for x in range(200) ]) + eq(s, "20111111111111111111101111111110111111111011111111101111111110111111111011111111101111111110111111111011111111111111111110111111111011111111101111111110111111111011111111101111111110111111111011111111") + def test_gd(self): eq = self.assertEqual f = gettext.c2py('n==1 ? 0 : n==2 ? 1 : 2') @@ -336,6 +342,12 @@ class PluralFormsTestCase(GettextBaseTes s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "20122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222") + def test_ro(self): + eq = self.assertEqual + f = gettext.c2py('n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2') + s = ''.join([ str(f(x)) for x in range(200) ]) + eq(s, "10111111111111111111222222222222222222222222222222222222222222222222222222222222222222222222222222222111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222") + def test_lt(self): eq = self.assertEqual f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2') @@ -348,6 +360,12 @@ class PluralFormsTestCase(GettextBaseTes s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "20111222222222222222201112222220111222222011122222201112222220111222222011122222201112222220111222222011122222222222222220111222222011122222201112222220111222222011122222201112222220111222222011122222") + def test_cs(self): + eq = self.assertEqual + f = gettext.c2py('(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2') + s = ''.join([ str(f(x)) for x in range(200) ]) + eq(s, "20111222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222") + def test_pl(self): eq = self.assertEqual f = gettext.c2py('n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2') @@ -360,10 +378,65 @@ class PluralFormsTestCase(GettextBaseTes s = ''.join([ str(f(x)) for x in range(200) ]) eq(s, "30122333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333012233333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333") + def test_ar(self): + eq = self.assertEqual + f = gettext.c2py('n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5') + s = ''.join([ str(f(x)) for x in range(200) ]) + eq(s, "01233333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444445553333333344444444444444444444444444444444444444444444444444444444444444444444444444444444444444444") + def test_security(self): raises = self.assertRaises # Test for a dangerous expression raises(ValueError, gettext.c2py, "os.chmod('/etc/passwd',0777)") + # issue28563 + raises(ValueError, gettext.c2py, '"(eval(foo) && ""') + raises(ValueError, gettext.c2py, 'f"{os.system(\'sh\')}"') + + def test_chained_comparsion(self): + # C doesn't chain comparison as Python so 2 == 2 == 2 gets different results + f = gettext.c2py('n == n == n') + self.assertEqual(''.join(str(f(x)) for x in range(3)), '010') + f = gettext.c2py('1 < n == n') + self.assertEqual(''.join(str(f(x)) for x in range(3)), '100') + f = gettext.c2py('n == n < 2') + self.assertEqual(''.join(str(f(x)) for x in range(3)), '010') + f = gettext.c2py('0 < n < 2') + self.assertEqual(''.join(str(f(x)) for x in range(3)), '111') + + def test_decimal_number(self): + self.assertEqual(gettext.c2py('0123')(1), 123) + + def test_invalid_syntax(self): + invalid_expressions = [ + 'x>1', '(n>1', 'n>1)', '42**42**42', '0xa', '1.0', '1e2', + 'n>0x1', '+n', '-n', 'n()', 'n(1)', '1+', 'nn', 'n n', + ] + for expr in invalid_expressions: + with self.assertRaises(ValueError): + gettext.c2py(expr) + + def test_nested_condition_operator(self): + self.assertEqual(gettext.c2py('n?1?2:3:4')(0), 4) + self.assertEqual(gettext.c2py('n?1?2:3:4')(1), 2) + self.assertEqual(gettext.c2py('n?1:3?4:5')(0), 4) + self.assertEqual(gettext.c2py('n?1:3?4:5')(1), 1) + + def test_division(self): + f = gettext.c2py('2/n*3') + self.assertEqual(f(1), 6) + self.assertEqual(f(2), 3) + self.assertEqual(f(3), 0) + self.assertEqual(f(-1), -6) + self.assertRaises(ZeroDivisionError, f, 0) + + def test_plural_number(self): + f = gettext.c2py('1') + self.assertEqual(f(1), 1) + self.assertRaises(ValueError, f, 1.0) + self.assertRaises(ValueError, f, '1') + self.assertRaises(ValueError, f, []) + self.assertRaises(ValueError, f, object()) + class GNUTranslationParsingTest(GettextBaseTest): def test_plural_form_error_issue17898(self):