diff -r 36df19f9e94b Lib/doctest.py --- a/Lib/doctest.py Sun Jul 03 17:23:22 2011 -0500 +++ b/Lib/doctest.py Sun Jul 03 19:13:52 2011 -0400 @@ -101,7 +101,8 @@ import sys import traceback import unittest -from io import StringIO +import tokenize +from io import StringIO, BytesIO from collections import namedtuple TestResults = namedtuple('TestResults', 'failed attempted') @@ -658,15 +659,11 @@ return source, options, want, exc_msg - # This regular expression looks for option directives in the - # source code of an example. Option directives are comments - # starting with "doctest:". Warning: this may give false - # positives for string-literals that contain the string - # "#doctest:". Eliminating these false positives would require - # actually parsing the string; but we limit them by ignoring any - # line containing "#doctest:" that is *followed* by a quote mark. - _OPTION_DIRECTIVE_RE = re.compile(r'#\s*doctest:\s*([^\n\'"]*)$', - re.MULTILINE) + # This regular expression checks if a comment is an option + # directive. Option directives are comments + # starting with "doctest:". + _OPTION_DIRECTIVE_RE = re.compile(r'^#\s*doctest:\s*([^\'"]*)$') + def _find_options(self, source, name, lineno): """ @@ -678,7 +675,13 @@ """ options = {} # (note: with the current regexp, this will match at most once:) - for m in self._OPTION_DIRECTIVE_RE.finditer(source): + readline = BytesIO(source.encode('utf-8')).readline + for token in tokenize._tokenize(readline, 'utf-8'): + if token.type != tokenize.COMMENT: + continue + m = self._OPTION_DIRECTIVE_RE.match(token.string) + if m is None: + continue option_strings = m.group(1).replace(',', ' ').split() for option in option_strings: if (option[0] not in '+-' or diff -r 36df19f9e94b Lib/test/test_doctest.py --- a/Lib/test/test_doctest.py Sun Jul 03 17:23:22 2011 -0500 +++ b/Lib/test/test_doctest.py Sun Jul 03 19:13:52 2011 -0400 @@ -1487,6 +1487,28 @@ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] TestResults(failed=2, attempted=3) +It used to be the case that option directives were parsed out of the source +text using a regular expression, which resulted in problems. +That is now corrected, and only real comments count as directives. + +In older versions of doctest, the below would fail, because +DONT_ACCEPT_BLANKLINE would be set, even though it wasn't a real directive. + + >>> def printsomething(f): print('') + >>> @printsomething + ... def foo(): + ... '''This is a pointless function with a comment that isn't a comment + ... + ... Example of a doctest containing a doctest! This is probably + ... dangerous. + ... + ... >>> print('') # doctest: +DONT_ACCEPT_BLANKLINE + ... + ... + ... ''' + ... + + Multiple options may be modified by a single option directive. They may be separated by whitespace, commas, or both: diff -r 36df19f9e94b Lib/test/test_doctest4.txt --- a/Lib/test/test_doctest4.txt Sun Jul 03 17:23:22 2011 -0500 +++ b/Lib/test/test_doctest4.txt Sun Jul 03 19:13:52 2011 -0400 @@ -9,3 +9,11 @@ >>> 'bąr' 'b\u0105r' + +Encoding declarations should be ignored, too: + + >>> # coding=utf-32 + ... x = 'bąr'; print(repr(x)) + 'b\u0105r' + >>> print(x is not None) + True \ No newline at end of file