From 47c21cfd786acb10e4682c7d9d4bd1db756024eb Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sun, 30 May 2021 07:17:21 +0300 Subject: [PATCH] bpo-44267: fix parsing Decimal's with underscores --- Lib/_pydecimal.py | 22 +++++++++++----------- Lib/test/test_decimal.py | 7 +++++++ Modules/_decimal/_decimal.c | 16 +++++++++++++--- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index ff23322ed5..d8e94a727d 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -547,7 +547,7 @@ def __new__(cls, value="0", context=None): # From a string # REs insist on real strings, so we can too. if isinstance(value, str): - m = _parser(value.strip().replace("_", "")) + m = _parser(value.strip()) if m is None: if context is None: context = getcontext() @@ -6114,20 +6114,20 @@ def _convert_for_comparison(self, other, equality_op=False): # lookahead expression '(?=\d|\.\d)' checks this. import re -_parser = re.compile(r""" # A numeric string consists of: +_parser = re.compile(r""" # A numeric string consists of: # \s* - (?P[-+])? # an optional sign, followed by either... + (?P[-+])? # an optional sign, followed by either... ( - (?=\d|\.\d) # ...a number (with at least one digit) - (?P\d*) # having a (possibly empty) integer part - (\.(?P\d*))? # followed by an optional fractional part - (E(?P[-+]?\d+))? # followed by an optional exponent, or... + (?=\d|\.\d) # ...a number (with at least one digit) + (?P\d*|\d+(_\d+)*) # having a (possibly empty) integer part + (\.(?P\d*|\d+(_\d+)*))? # followed by an optional fractional part + (E(?P[-+]?\d+(_\d+)*))? # followed by an optional exponent, or... | - Inf(inity)? # ...an infinity, or... + Inf(inity)? # ...an infinity, or... | - (?Ps)? # ...an (optionally signaling) - NaN # NaN - (?P\d*) # with (possibly empty) diagnostic info. + (?Ps)? # ...an (optionally signaling) + NaN # NaN + (?P\d*|\d+(_\d+)*) # with (possibly empty) diagnostic info. ) # \s* \Z diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 179a9ea704..1720ec8517 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -583,6 +583,13 @@ def test_explicit_from_string(self): # underscores don't prevent errors self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003") + # multiple underscores + self.assertRaises(InvalidOperation, Decimal, "1.2__3") + + # underscores in wrong places + self.assertRaises(InvalidOperation, Decimal, "_1.23") + self.assertRaises(InvalidOperation, Decimal, "1.23_") + @cpython_only @requires_legacy_unicode_capi @warnings_helper.ignore_warnings(category=DeprecationWarning) diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index 9b89fa40c9..5a21634992 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -1900,7 +1900,7 @@ numeric_as_ascii(const PyObject *u, int strip_ws, int ignore_underscores) { enum PyUnicode_Kind kind; const void *data; - Py_UCS4 ch; + Py_UCS4 ch, prev_ch = 0; char *res, *cp; Py_ssize_t j, len; int d; @@ -1932,8 +1932,13 @@ numeric_as_ascii(const PyObject *u, int strip_ws, int ignore_underscores) for (; j < len; j++) { ch = PyUnicode_READ(kind, data, j); if (ignore_underscores && ch == '_') { + if (prev_ch == 0 || prev_ch == '_') { + goto err_invalid; + } + prev_ch = ch; continue; } + prev_ch = ch; if (0 < ch && ch <= 127) { *cp++ = ch; continue; @@ -1945,13 +1950,18 @@ numeric_as_ascii(const PyObject *u, int strip_ws, int ignore_underscores) d = Py_UNICODE_TODECIMAL(ch); if (d < 0) { /* empty string triggers ConversionSyntax */ - *res = '\0'; - return res; + goto err_invalid; } *cp++ = '0' + d; } + if (prev_ch == '_') { + goto err_invalid; + } *cp = '\0'; return res; +err_invalid: + *res = '\0'; + return res; } /* Return a new PyDecObject or a subtype from a C string. Use the context -- 2.30.2