diff -r d2c464d2ee82 Doc/library/doctest.rst --- a/Doc/library/doctest.rst Sat Jul 03 15:57:30 2010 +0200 +++ b/Doc/library/doctest.rst Thu Aug 19 07:03:46 2010 -0400 @@ -1570,11 +1570,11 @@ :ref:`doctest-options` for more information about option flags. - .. method:: output_difference(example, got, optionflags) + .. method:: output_difference(want, got, optionflags) - Return a string describing the differences between the expected output for a - given example (*example*) and the actual output (*got*). *optionflags* is the - set of option flags used to compare *want* and *got*. + Return a string describing the differences between the expected output + (*want) and the actual output (*got*). *optionflags* is the set of + option flags used to compare *want* and *got*. .. _doctest-debugging: diff -r d2c464d2ee82 Lib/doctest.py --- a/Lib/doctest.py Sat Jul 03 15:57:30 2010 +0200 +++ b/Lib/doctest.py Thu Aug 19 07:03:46 2010 -0400 @@ -264,15 +264,178 @@ if hasattr(self, "softspace"): del self.softspace +class MatchBlock (object): + def __init__(self, want, got, match): + self.want = want + self.got = got + self.match = match + +def _split_normalize_whitespace_blocks(string): + r""" + >>> list(_split_normalize_whitespace_blocks('abc def\tghi\n')) + [('', 'abc'), (' ', 'def'), ('\t', 'ghi'), ('\n', '')] + """ + regexp = re.compile('(\s*)(\S*)') + block = startpos = 0 + while startpos < len(string) and block != '': + sep,block = regexp.match(string[startpos:]).groups() + yield (sep, block) + startpos += len(sep) + len(block) + +# Worst-case linear-time normalize whitespace matching. +def _normalize_whitespace_matches(want, got): + r""" + >>> want = 'abc def\tghi\njkl' + >>> got = '\tabc DEF\ndef ghi\tJKL mno pqr' + >>> matches = _normalize_whitespace_matches(want, got) + >>> for m in matches: + ... print (m.want, m.got, m.match) # doctest: +REPORT_UDIFF + ('abc', '\tabc', True) + ('', ' DEF', False) + (' def', '\ndef', True) + ('\tghi', ' ghi', True) + ('\njkl', '', False) + ('', '\tJKL mno pqr', False) + """ + ws = list(_split_normalize_whitespace_blocks(want)) + if len(ws) < 2: + yield MatchBlock(want, got, want.split() == got.split()) + return + gs = _split_normalize_whitespace_blocks(got) + failed_g = [] + for sep,w in ws: + g = None + while g != w: + try: + gsep,g = gs.next() + except StopIteration: + break + if g != w: + failed_g.extend([gsep, g]) + if g == w: + if failed_g: + yield MatchBlock(want='', got=''.join(failed_g), match=False) + failed_g = [] + yield MatchBlock(want=sep + w, got=gsep + g, match=True) + else: + yield MatchBlock(want=sep + w, got='', match=False) + for gsep,g in gs: + failed_g.extend([gsep, g]) + if failed_g: + yield MatchBlock(want='', got=''.join(failed_g), match=False) + +def _normalize_whitespace_equal(a, b): + return ' '.join(a.split()) == ' '.join(b.split()) + +def _equal(a, b): + return a == b + +def _normalize_whitespace_find(string, prefix, start=None, end=None, + from_right=False): + r""" + >>> _normalize_whitespace_find('abc\t def\n ghi def\t', ' def') + (True, '\t def', 3, 8) + >>> _normalize_whitespace_find('abc\t def\n ghi\ndef\t', ' def', + ... from_right=True) + (True, '\ndef', 13, 17) + """ + if start == None: + start = 0 + if end == None: + end = len(string) + blocks = prefix.split() + if len(blocks) > 0 and prefix[-1].isspace(): + blocks.append('') + if len(prefix) > 0 and prefix[0].isspace(): + blocks.insert(0, '') + regexp = re.compile('\A(.*%s)(%s)(.*%s)\Z' + % ('' if from_right else '?', + '\s+'.join([re.escape(b) for b in blocks]), + '?' if from_right else ''), + flags=re.MULTILINE | re.DOTALL) + match = regexp.match(string[start:end]) + if match == None: + return (False, '', start, start) + groups = list(match.groups()) + if from_right == True: # greedy .* absorbs match's initial spaces + before = groups[0] + groups[0] = groups[0].rstrip() + groups[1] = before[len(groups[0]):] + groups[1] + startpos = start+len(groups[0]) + matchlen = len(groups[1]) + endpos = end - len(groups[2]) + return (True, groups[1], startpos, endpos) + +def _find(string, prefix, start=None, end=None): + if start == None: + start = 0 + if end == None: + end = len(string) + i = string.find(prefix, start, end) + if i >= 0: + return (True, prefix, i, i+len(prefix)) + return (False, '', start, start) + +def _normalize_whitespace_startswith(string, prefix, start=None, end=None): + r""" + >>> _normalize_whitespace_startswith('\t def\n ghi', ' def') + (True, '\t def') + """ + if start == None: + start = 0 + if end == None: + end = len(string) + found,match,startpos,endpos = _normalize_whitespace_find( + string=string, prefix=prefix, start=start, end=end) + if not found or startpos != start: + return (False, match) + return (found, match) + +def _startswith(string, prefix, start=None, end=None): + found = string.startswith(prefix, start, end) + return (found, prefix if found else '') + +def _normalize_whitespace_endswith(string, prefix, start=None, end=None): + r""" + >>> _normalize_whitespace_endswith('abc\t def\n ghi', '\tghi') + (True, '\n ghi') + """ + if start == None: + start = 0 + if end == None: + end = len(string) + found,match,startpos,endpos = _normalize_whitespace_find( + string=string, prefix=prefix, start=start, end=end, from_right=True) + if not found or endpos != end: + return (False, match) + return (found, match) + +def _endswith(string, prefix, start=None, end=None): + found = string.endswith(prefix, start, end) + return (found, prefix if found else '') + # Worst-case linear-time ellipsis matching. -def _ellipsis_match(want, got): +def _ellipsis_matches(want, got, normalize_whitespace=False): """ - Essentially the only subtle case: - >>> _ellipsis_match('aa...aa', 'aaa') - False + >>> matches = _ellipsis_matches('aa...aa', 'aaa') + >>> [(m.want, m.got, m.match) for m in matches] + [('aa', 'aa', True), ('...aa', 'a', False)] """ + if normalize_whitespace: + equal = _normalize_whitespace_equal + find = _normalize_whitespace_find + startswith = _normalize_whitespace_startswith + endswith = _normalize_whitespace_endswith + else: + equal = _equal + find = _find + startswith = _startswith + endswith = _endswith + if ELLIPSIS_MARKER not in want: - return want == got + match = equal(want, got) + yield MatchBlock(want=want, got=got, match=match) + return # Find "the real" strings. ws = want.split(ELLIPSIS_MARKER) @@ -282,23 +445,31 @@ startpos, endpos = 0, len(got) w = ws[0] if w: # starts with exact match - if got.startswith(w): - startpos = len(w) - del ws[0] - else: - return False + found,match = startswith(got, w, startpos, endpos) + m = MatchBlock(want=w, got=match, match=found) + startpos += len(match) + yield m + del ws[0] + tail_m = None w = ws[-1] if w: # ends with exact match - if got.endswith(w): - endpos -= len(w) - del ws[-1] - else: - return False - - if startpos > endpos: - # Exact end matches required more characters than we have, as in - # _ellipsis_match('aa...aa', 'aaa') - return False + found,match = endswith(got, w, startpos, endpos) + tail_m = MatchBlock(want=ELLIPSIS_MARKER + w, + got=match, + match=found) + if tail_m.match: + endpos -= len(match) + if startpos > endpos: + # Exact end matches required more characters than we have, as in + # _ellipsis_matches('aa...aa', 'aaa') + tail_m.match = False + tail_m.got = got[startpos:] + for w in ws[:-1]: + yield MatchBlock(want=ELLIPSIS_MARKER + w, + got='', match=False) + yield tail_m + return + del ws[-1] # For the rest, we only need to find the leftmost non-overlapping # match for each piece. If there's no overall match that way alone, @@ -307,11 +478,27 @@ # w may be '' at times, if there are consecutive ellipses, or # due to an ellipsis at the start or end of `want`. That's OK. # Search for an empty string succeeds, and doesn't change startpos. - startpos = got.find(w, startpos, endpos) - if startpos < 0: + old_startpos = startpos + found,match,startpos,match_endpos = find(got, w, startpos, endpos) + m = MatchBlock(want=ELLIPSIS_MARKER + w, + got=got[old_startpos:match_endpos], + match=found) + startpos = match_endpos + yield m + + if tail_m != None: + tail_m.got = got[startpos:] + yield tail_m + +def _ellipsis_match(want, got): + """ + Essentially the only subtle case: + >>> _ellipsis_match('aa...aa', 'aaa') + False + """ + for match in _ellipsis_matches(want, got): + if match.match == False: return False - startpos += len(w) - return True def _comment_line(line): @@ -1158,7 +1345,8 @@ Report that the given example failed. """ out(self._failure_header(test, example) + - self._checker.output_difference(example, got, self.optionflags)) + self._checker.output_difference( + example.want, got, self.optionflags)) def report_unexpected_exception(self, out, test, example, exc_info): """ @@ -1529,14 +1717,6 @@ REPORT_NDIFF): return False - # If expected output uses ellipsis, a meaningful fancy diff is - # too hard ... or maybe not. In two real-life failures Tim saw, - # a diff was a major help anyway, so this is commented out. - # [todo] _ellipsis_match() knows which pieces do and don't match, - # and could be the basis for a kick-ass diff in this case. - ##if optionflags & ELLIPSIS and ELLIPSIS_MARKER in want: - ## return False - # ndiff does intraline difference marking, so can be useful even # for 1-line differences. if optionflags & REPORT_NDIFF: @@ -1545,14 +1725,44 @@ # The other diff types need at least a few lines to be helpful. return want.count('\n') > 2 and got.count('\n') > 2 - def output_difference(self, example, got, optionflags): + def _diff_splitlines(self, want, got, optionflags): + if optionflags & ELLIPSIS and ELLIPSIS_MARKER in want: + want_lines = [] + got_lines = [] + matches = _ellipsis_matches( + want, got, + normalize_whitespace=optionflags & NORMALIZE_WHITESPACE) + last_matched = None + for match in matches: + want_lines.extend(match.want.splitlines(True)) + if match.match and last_matched: + got_lines.extend(match.want.splitlines(True)) + else: + got_lines.extend(match.got.splitlines(True)) + last_matched = match.match + elif optionflags & NORMALIZE_WHITESPACE: + ws = [] + gs = [] + for match in _normalize_whitespace_matches(want, got): + ws.append(match.want) + if match.match: + gs.append(match.want) + else: + gs.append(match.got) + want_lines = ''.join(ws).splitlines(True) # True == keep line ends + got_lines = ''.join(gs).splitlines(True) + else: + want_lines = want.splitlines(True) # True == keep line ends + got_lines = got.splitlines(True) + return (want_lines, got_lines) + + def output_difference(self, want, got, optionflags): """ Return a string describing the differences between the - expected output for a given example (`example`) and the actual - output (`got`). `optionflags` is the set of option flags used - to compare `want` and `got`. + expected output (`want`) and the actual output (`got`). + `optionflags` is the set of option flags used to compare + `want` and `got`. """ - want = example.want # If s are being used, then replace blank lines # with in the actual output string. if not (optionflags & DONT_ACCEPT_BLANKLINE): @@ -1561,8 +1771,8 @@ # Check if we should use diff. if self._do_a_fancy_diff(want, got, optionflags): # Split want & got into lines. - want_lines = want.splitlines(True) # True == keep line ends - got_lines = got.splitlines(True) + want_lines,got_lines = self._diff_splitlines( + want, got, optionflags) # Use difflib to find their differences. if optionflags & REPORT_UDIFF: diff = difflib.unified_diff(want_lines, got_lines, n=2) diff -r d2c464d2ee82 Lib/test/test_doctest.py --- a/Lib/test/test_doctest.py Sat Jul 03 15:57:30 2010 +0200 +++ b/Lib/test/test_doctest.py Thu Aug 19 07:03:46 2010 -0400 @@ -1263,6 +1263,108 @@ ? + ++ ^ TestResults(failed=1, attempted=1) +REPORT_*DIFF output can be complicated if ELLIPSIS and/or +NORMALIZE_WHITESPACE flags are also set. The output should only show +the nonmatching sections taking the comparison flags into account. + +REPORT_UDIFF and ELLIPSIS: + + >>> def f(x): + ... r''' + ... >>> print '\n'.join(['abc def ghi','bcd efg hij', 'cde fgh ijk']) + ... abc... ghi + ... BCD... hij + ... cde... ijk + ... ''' + + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.REPORT_UDIFF | doctest.ELLIPSIS + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + print '\n'.join(['abc def ghi','bcd efg hij', 'cde fgh ijk']) + Differences (unified diff with -expected +actual): + @@ -1,6 +1,5 @@ + abc + -... ghi + -BCD + -... hij + + def ghi + +bcd efg hij + cde + ... ijk + TestResults(failed=1, attempted=1) + +REPORT_UDIFF and NORMALIZE_WHITESPACE: + + >>> def f(x): + ... r''' + ... >>> print '\n'.join(['abc def ghi','bcd efg hij', 'cde fgh ijk']) + ... abc + ... def + ... ghi + ... hij + ... cde + ... fgh + ... ijk + ... ''' + + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = doctest.REPORT_UDIFF | doctest.NORMALIZE_WHITESPACE + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + print '\n'.join(['abc def ghi','bcd efg hij', 'cde fgh ijk']) + Differences (unified diff with -expected +actual): + @@ -2,4 +2,5 @@ + def + ghi + +bcd efg + hij + cde + TestResults(failed=1, attempted=1) + +REPORT_UDIFF, ELLIPSIS, and NORMALIZE_WHITESPACE: + + >>> def f(x): + ... r''' + ... >>> print '\n'.join(['abc def ghi','bcd efg hij', 'cde fgh ijk']) + ... abc... + ... ghi + ... efg... + ... hij + ... cde... + ... ijk + ... ''' + + >>> test = doctest.DocTestFinder().find(f)[0] + >>> flags = (doctest.REPORT_UDIFF + ... | doctest.ELLIPSIS + ... | doctest.NORMALIZE_WHITESPACE) + >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test) + ... # doctest: +ELLIPSIS + ********************************************************************** + File ..., line 3, in f + Failed example: + print '\n'.join(['abc def ghi','bcd efg hij', 'cde fgh ijk']) + Differences (unified diff with -expected +actual): + @@ -1,8 +1,5 @@ + abc + -... + - ghi + -efg + -... + - hij + + def ghi + +bcd efg hij + cde + ... + TestResults(failed=1, attempted=1) + The REPORT_ONLY_FIRST_FAILURE supresses result output after the first failing example: