diff --git a/Lib/gettext.py b/Lib/gettext.py --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -48,6 +48,7 @@ import locale, copy, io, os, re, struct, sys from errno import ENOENT +import ast __all__ = ['NullTranslations', 'GNUTranslations', 'Catalog', @@ -58,34 +59,77 @@ _default_localedir = os.path.join(sys.base_prefix, 'share', 'locale') +class _PluralAST(ast.NodeVisitor): + """ + """ + + allowed_classes = ( + # name, number + ast.Name, ast.Num, + # ops + ast.BoolOp, ast.BinOp, + # cmp, if else, and, or + ast.Compare, ast.IfExp, ast.And, ast.Or, + # ==, !=, >, >=, <, <= + ast.Eq, ast.NotEq, ast.Gt, ast.GtE, ast.Lt, ast.LtE, + # %, unary +, + ast.Mod, ast.UAdd, ast.USub, + # not seen in the wild +, -, <<, >>, &, |, ^ + #ast.Add, ast.Sub, ast.LShift, ast.RShift, + #ast.BitAnd, ast.BitOr, ast.BitXor + ) + + # safe guard against deeply nested rule. The most complex example has 53. + max_visits = 200 + + def __init__(self, plural): + self.plural = plural + self.visits = 0 + + def check(self): + expr = ast.parse(self.plural, mode='eval') + return self.generic_visit(expr) + + def visit(self, node): + if not isinstance(node, self.allowed_classes): + raise ValueError('%r not allowed in plural form at %s.\n%s' % + (type(node).__name__, + getattr(node, 'col_offset', '?'), + self.plural)) + self.visits += 1 + if self.visits > self.max_visits: + raise ValueError('Expression %r is too complex' % self.plural) + return ast.NodeVisitor.visit(self, node) + + def visit_Name(self, node): + if node.id != 'n': + raise ValueError('Variable name %r not allowed' % node.id) + return node + + def visit_Num(self, node): + if not isinstance(node.n, int): + raise ValueError('%r at offset %i' % (node.n, node.col_offset)) + return node + +_RE_NOT = re.compile(r'\!([^=])') +_RE_TERNARY = re.compile(r'(.*?)\?(.*?):(.*)') + def c2py(plural): """Gets a C expression as used in PO files for plural forms and returns a Python lambda function that implements an equivalent expression. """ - # Security check, allow only the "n" identifier - import token, tokenize - tokens = tokenize.generate_tokens(io.StringIO(plural).readline) - try: - danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n'] - except tokenize.TokenError: - raise ValueError('plural forms expression error, maybe unbalanced parenthesis') - else: - if danger: - raise ValueError('plural forms expression could be dangerous') - # Replace some C operators by their Python equivalents plural = plural.replace('&&', ' and ') plural = plural.replace('||', ' or ') - expr = re.compile(r'\!([^=])') - plural = expr.sub(' not \\1', plural) + # "!n" to "not n" + plural = _RE_NOT.sub(' not \\1', plural) # Regular expression and replacement function used to transform # "a?b:c" to "b if a else c". - expr = re.compile(r'(.*?)\?(.*?):(.*)') def repl(x): return "(%s if %s else %s)" % (x.group(2), x.group(1), - expr.sub(repl, x.group(3))) + _RE_TERNARY.sub(repl, x.group(3))) # Code to transform the plural expression, taking care of parentheses stack = [''] @@ -94,20 +138,19 @@ stack.append('') elif c == ')': if len(stack) == 1: - # Actually, we never reach this code, because unbalanced - # parentheses get caught in the security check at the - # beginning. raise ValueError('unbalanced parenthesis in plural form') - s = expr.sub(repl, stack.pop()) + s = _RE_TERNARY.sub(repl, stack.pop()) stack[-1] += '(%s)' % s else: stack[-1] += c - plural = expr.sub(repl, stack.pop()) - + plural = _RE_TERNARY.sub(repl, stack.pop()) + try: + _PluralAST(plural).check() + except SyntaxError as e: + raise ValueError from e return eval('lambda n: int(%s)' % plural) - def _expand_lang(loc): loc = locale.normalize(loc) COMPONENT_CODESET = 1 << 0 diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -356,6 +356,41 @@ self.assertEqual(t.__class__, DummyGNUTranslations) +class GettextPluralTest(GettextBaseTest): + # Examples from http://www.gnu.org/software/gettext/manual/gettext.html + plural_formulas = [ + 'n>1', + 'n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2', + 'n==1 ? 0 : n==2 ? 1 : 2', + 'n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2', + 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2', + 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2', + '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2', + 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2', + 'n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3', + ] + + broken_rules = [ + 'x>1', + '(n>1' + 'n>1)' + 'n+1' + '42**42**42', + 'int(n)', + 'os.chmod("/etc/passwd", 0o777)', + '"egg"', + '1.0', + ] + + def test_plural_formula(self): + for pf in self.plural_formulas: + func = gettext.c2py(pf) + for i in range(100): + func(i) + for pf in self.broken_rules: + self.assertRaises(ValueError, gettext.c2py, pf) + + def test_main(): support.run_unittest(__name__)