*** ..\latest\diff.py Thu Jul 15 12:59:40 2004
--- ..\patched\diff.py Thu Jul 15 15:27:10 2004
***************
*** 1,8 ****
! """ Command line interface to difflib.py providing diffs in three formats:
* ndiff: lists every line and highlights interline changes.
! * context: highlights clusters of changes in a before/after format
* unified: highlights clusters of changes in an inline format.
"""
--- 1,9 ----
! """ Command line interface to difflib.py providing diffs in four formats:
* ndiff: lists every line and highlights interline changes.
! * context: highlights clusters of changes in a before/after format.
* unified: highlights clusters of changes in an inline format.
+ * html: generates side by side comparison with change highlights.
"""
***************
*** 12,17 ****
--- 13,19 ----
parser = optparse.OptionParser(usage)
parser.add_option("-c", action="store_true", default=False, help='Produce a context format diff (default)')
parser.add_option("-u", action="store_true", default=False, help='Produce a unified format diff')
+ parser.add_option("-m", action="store_true", default=False, help='Produce HTML side by side diff (can use -c and -l in conjunction)')
parser.add_option("-n", action="store_true", default=False, help='Produce a ndiff format diff')
parser.add_option("-l", "--lines", type="int", default=3, help='Set number of context lines (default 3)')
(options, args) = parser.parse_args()
***************
*** 34,39 ****
--- 36,43 ----
diff = difflib.unified_diff(fromlines, tolines, fromfile, tofile, fromdate, todate, n=n)
elif options.n:
diff = difflib.ndiff(fromlines, tolines)
+ elif options.m:
+ diff = difflib.HtmlDiff().make_file(fromlines,tolines,fromfile,tofile,context=options.c,numlines=n)
else:
diff = difflib.context_diff(fromlines, tolines, fromfile, tofile, fromdate, todate, n=n)
*** ..\latest\difflib.py Thu Jul 15 12:56:52 2004
--- ..\patched\difflib.py Sun Jul 25 12:41:16 2004
***************
*** 23,33 ****
Class Differ:
For producing human-readable deltas from sequences of lines of text.
"""
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
! 'unified_diff']
import heapq
--- 23,36 ----
Class Differ:
For producing human-readable deltas from sequences of lines of text.
+
+ Class HtmlDiff:
+ For producing HTML side by side comparison with change highlights.
"""
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
! 'unified_diff', 'HtmlDiff']
import heapq
***************
*** 1101,1108 ****
return ch in ws
- del re
-
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
tofiledate='', n=3, lineterm='\n'):
--- 1104,1109 ----
***************
*** 1276,1281 ****
--- 1277,1849 ----
+ emu
"""
return Differ(linejunk, charjunk).compare(a, b)
+
+ def _mdiff(fromlines, tolines, chgfmt, linefmt, context=None, sep=None,
+ linejunk=None, charjunk=IS_CHARACTER_JUNK):
+ """Returns generator yielding marked up from/to side by side difference lines.
+
+ Arguments:
+ fromlines -- text lines which will be iterated over and compared to tolines
+ tolines -- text lines which will be iterated over and compared to fromlines
+ chgfmt -- function to markup add/delete/change differences in text lines
+ (see example below)
+ linefmt -- function to format line of text for display (see example below)
+ context -- number of context lines to display on each side of difference,
+ if None or less that 1, the all from/to text lines will be
+ generated.
+ sep -- separator string to use between context differences.
+ linejunk -- passed on to ndiff (see ndiff documentation)
+ charjunk -- passed on to ndiff (see ndiff documentation)
+
+ This function returns an interator which returns a tuple of a "from"
+ line, a corresponding "to" line and a boolean indicating if either the
+ "from" or "to" line contains a difference.
+
+ This function/iterator was originally developed to generate side by side
+ file difference for making HTML pages. The function requires functions to
+ be passed in as arguments to allow it to be configurable to generate any
+ type of markup such as HTML or XHTML. The function supports generating a
+ full file difference report or just contextual differences.
+
+ See HtmlDiff class for an example usage of this function. Note, this
+ function utilizes the ndiff function to generate the side by side
+ difference markup. Optional ndiff arguments may be passed to this function
+ and they in turn will be passed to ndiff.
+ """
+ import re
+
+ # adjust number of context lines to include the line with the change
+ if context:
+ context += 1
+
+ # regular expression for finding intraline change indices
+ change_re = re.compile('(\++|\-+|\^+)')
+
+ # regular expression to find hidden markers
+ marker_re = re.compile('\0([+-^])(.*?)\1',re.DOTALL)
+
+ # create the difference iterator to generate the differences
+ diff_lines_iterator = ndiff(fromlines,tolines,linejunk,charjunk)
+
+ def _make_line(lines, format_key, side, num_lines=[0,0]):
+ """Returns line of text with user's change markup and line formatting.
+
+ lines -- list of lines from the ndiff generator to produce a line of
+ text from. When producing the line of text to return, the
+ lines used are removed from this list.
+ format_key -- '+' return first line in list with "add" markup around
+ the entire line.
+ '-' return first line in list with "delete" markup around
+ the entire line.
+ '?' return first line in list with add/delete/change
+ intraline markup (indices obtained from second line)
+ None return first line in list with no markup
+ side -- indice into the num_lines list (0=from,1=to)
+ num_lines -- from/to current line number. This is NOT intended to be a
+ passed parameter. It is present as a keyword argument to
+ maintain memory of the current line numbers between calls
+ of this function.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ num_lines[side] += 1
+ # Handle case where no user markup is to be added, just return line of
+ # text with user's line format to allow for usage of the line number.
+ if format_key is None:
+ return linefmt(side,num_lines[side],lines.pop(0)[2:])
+ # Handle case of intraline changes
+ if format_key == '?':
+ text, markers = lines.pop(0), lines.pop(0)
+ # find intraline changes (store change type and indices in tuples)
+ sub_info = []
+ def record_sub_info(match_object,sub_info=sub_info):
+ sub_info.append([match_object.group(1)[0],match_object.span()])
+ return match_object.group(1)
+ change_re.sub(record_sub_info,markers)
+ # process each tuple inserting our special marks that won't be
+ # noticed by an xml/html escaper.
+ for key,(begin,end) in sub_info[::-1]:
+ text = text[0:begin]+'\0'+key+text[begin:end]+'\1'+text[end:]
+ text = text[2:]
+ # Handle case of add/delete entire line
+ else:
+ text = lines.pop(0)[2:]
+ # if line of text is just a newline, insert a space so there is
+ # something for the user to highlight and see.
+ if len(text) <= 1:
+ text = ' '+text
+ # insert marks that won't be noticed by an xml/html escaper.
+ text = '\0' + format_key + text + '\1'
+ # Return line of text, first allow user's line formatter to do it's
+ # thing (such as adding the line number) then replace the special
+ # marks with what the user's change markup.
+ line_num = num_lines[side]
+ replacer = lambda m : chgfmt(side,line_num,m.group(2),m.group(1))
+ return marker_re.sub(replacer,linefmt(side,line_num,text))
+
+ def _line_iterator():
+ """Yields from/to lines of text with a change indication.
+
+ This function is an iterator. It itself pulls lines from a
+ differencing iterator, processes them and yields them. When it can
+ it yields both a "from" and a "to" line, otherwise it will yield one
+ or the other. Processing includes formatting the line with the user's
+ line formatter (for adding line numbering) and formatting differences
+ using the user's change format function. In addition to yielding the
+ lines of from/to text, a boolean flag is yielded to indicate if the
+ text line(s) have differences in them.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ lines = []
+ num_blanks_pending, num_blanks_to_yield = 0, 0
+ while True:
+ # Load up next 4 lines so we can look ahead, create strings which
+ # are a concatenation of the first character of each of the 4 lines
+ # so we can do some very readable comparisons.
+ while len(lines) < 4:
+ try:
+ lines.append(diff_lines_iterator.next())
+ except StopIteration:
+ lines.append('X')
+ s = ''.join([line[0] for line in lines])
+ if s.startswith('X'):
+ # When no more lines, pump out any remaining blank lines so the
+ # corresponding add/delete lines get a matching blank line so
+ # all line pairs get yielded at the next level.
+ num_blanks_to_yield = num_blanks_pending
+ elif s.startswith('-?+?'):
+ # simple intraline change
+ yield _make_line(lines,'?',0), _make_line(lines,'?',1), True
+ continue
+ elif s.startswith('--++'):
+ # in delete block, add block coming: we do NOT want to get
+ # caught up on blank lines yet, just process the delete line
+ num_blanks_pending -= 1
+ yield _make_line(lines,'-',0), None, True
+ continue
+ elif s.startswith('--?+') or s.startswith('--+') or \
+ s.startswith('- '):
+ # in delete block and see a intraline change or unchanged line
+ # coming: yield the delete line and then blanks
+ from_line,to_line = _make_line(lines,'-',0), None
+ num_blanks_to_yield,num_blanks_pending = num_blanks_pending-1,0
+ elif s.startswith('-+?'):
+ # intraline change
+ yield _make_line(lines,None,0), _make_line(lines,'?',1), True
+ continue
+ elif s.startswith('-?+'):
+ # intraline change
+ yield _make_line(lines,'?',0), _make_line(lines,None,1), True
+ continue
+ elif s.startswith('-'):
+ # delete FROM line
+ num_blanks_pending -= 1
+ yield _make_line(lines,'-',0), None, True
+ continue
+ elif s.startswith('+--'):
+ # in add block, delete block coming: we do NOT want to get
+ # caught up on blank lines yet, just process the add line
+ num_blanks_pending += 1
+ yield None, _make_line(lines,'+',1), True
+ continue
+ elif s.startswith('+ ') or s.startswith('+-'):
+ # will be leaving an add block: yield blanks then add line
+ from_line, to_line = None, _make_line(lines,'+',1)
+ num_blanks_to_yield,num_blanks_pending = num_blanks_pending+1,0
+ elif s.startswith('+'):
+ # inside an add block, yield the add line
+ num_blanks_pending += 1
+ yield None, _make_line(lines,'+',1), True
+ continue
+ elif s.startswith(' '):
+ # unchanged text, yield it to both sides
+ yield _make_line(lines[:],None,0),_make_line(lines,None,1),False
+ continue
+ # Catch up on the blank lines so when we yield the next from/to
+ # pair, they are lined up.
+ while(num_blanks_to_yield < 0):
+ num_blanks_to_yield += 1
+ yield None,linefmt(1,None,'\n'),True
+ while(num_blanks_to_yield > 0):
+ num_blanks_to_yield -= 1
+ yield linefmt(0,None,'\n'),None,True
+ if s.startswith('X'):
+ raise StopIteration
+ else:
+ yield from_line,to_line,True
+
+ def _line_pair_iterator():
+ """Yields from/to lines of text with a change indication.
+
+ This function is an iterator. It itself pulls lines from the line
+ iterator. It's difference from that iterator is that this function
+ always yields a pair of from/to text lines (with the change
+ indication). If necessary it will collect single from/to lines
+ until it has a matching pair from/to pair to yield.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ line_iterator = _line_iterator()
+ fromlines,tolines=[],[]
+ while True:
+ # Collecting lines of text until we have a from/to pair
+ while (len(fromlines)==0 or len(tolines)==0):
+ from_line, to_line, found_diff =line_iterator.next()
+ if from_line is not None:
+ fromlines.append((from_line,found_diff))
+ if to_line is not None:
+ tolines.append((to_line,found_diff))
+ # Once we have a pair, remove them from the collection and yield it
+ from_line, fromDiff = fromlines.pop(0)
+ to_line, to_diff = tolines.pop(0)
+ yield (from_line,to_line,fromDiff or to_diff)
+
+ # Handle case where user does not want context differencing, just yield
+ # them up without doing anything else with them.
+ line_pair_iterator = _line_pair_iterator()
+ if context is None or context <= 0:
+ while True:
+ yield line_pair_iterator.next()
+ # Handle case where user wants context differencing. We must do some
+ # storage of lines until we know for sure that they are to be yielded.
+ else:
+ lines_to_write = 0
+ insert_separator = False
+ while True:
+ # Store lines up until we find a difference, note use of a
+ # circular queue because we only need to keep around what
+ # we need for context.
+ index, contextLines = 0, [None]*(context)
+ found_diff = False
+ while(found_diff is False):
+ from_line, to_line, found_diff = line_pair_iterator.next()
+ i = index % context
+ contextLines[i] = (from_line, to_line, found_diff)
+ index += 1
+ # Yield lines that we have collected so far, but first yield
+ # the user's separator.
+ if insert_separator:
+ yield sep, sep, None
+ else:
+ insert_separator = True
+ if index > context:
+ lines_to_write = context
+ else:
+ lines_to_write = index
+ index = 0
+ while(lines_to_write):
+ i = index % context
+ index += 1
+ yield contextLines[i]
+ lines_to_write -= 1
+ # Now yield the context lines after the change
+ lines_to_write = context-1
+ while(lines_to_write):
+ from_line, to_line, found_diff = line_pair_iterator.next()
+ # If another change within the context, extend the context
+ if found_diff:
+ lines_to_write = context
+ else:
+ lines_to_write -= 1
+ yield from_line, to_line, found_diff
+
+
+ _file_template = """
+
+
+
+
+
+
+ %(title)s
+
+
+
+
+ %(header)s
+ %(table)s%(legend)s
+
+
+ """
+
+ _styles = """
+ table.diff {font-family:Courier; border:medium;}
+ .diff_header {background-color:#e0e0e0}
+ td.diff_header {text-align:right}
+ .diff_next {background-color:#c0c0c0}
+ .diff_add {background-color:#aaffaa}
+ .diff_chg {background-color:#ffff77}
+ .diff_sub {background-color:#ffaaaa}"""
+
+ _table_template = """
+
+
+
+ %(header_row)s
+
+ %(data_rows)s
+
"""
+
+ _legend = """
+
+ Legends |
+
+ Colors |
+ Added |
+ Changed |
+ Deleted |
+ |
+
+ Links |
+ (f)irst change |
+ (n)ext change |
+ (t)op |
+ |
+
"""
+
+ class HtmlDiff(object):
+ """For producing HTML side by side comparison with change highlights.
+
+ This class can be used to create an HTML table (or a complete HTML file
+ containing the table) showing a side by side, line by line comparision
+ of text with inter-line and intra-line change highlights. The table can
+ be generated in either full or contextual difference mode. Additional
+ control of the format of the generated difference table can be controlled
+ by subclassing and overriding the appropriate template or method.
+
+ The following templates and methods are intended for subclass overriding:
+
+ file_template -- controls HTML file format
+ styles -- style specifications for change highlights
+ table_template -- controls difference table format
+ linenum_template -- controls format of line number column
+ legend -- legend table content
+
+ format_line -- method to markup each line
+ format_change -- method to provide change highlight markup
+
+ The following methods are provided for HTML generation:
+
+ make_table -- generates HTML for a single side by side table
+ make_file -- generates complete HTML file with a single side by side table
+
+ See tools/scripts/diff.py for an example usage of this class.
+ """
+
+ file_template = _file_template
+ styles = _styles
+ linenum_template = "%d"
+ table_template = _table_template
+ legend = _legend
+
+ def __init__(self, linejunk=None,charjunk=IS_CHARACTER_JUNK):
+ """HtmlDiff instance initializer
+
+ Arguments:
+ linejunk -- passed on to ndiff (see ndiff documentation)
+ charjunk -- passed on to ndiff (see ndiff documentation)
+ """
+ self._default_prefix = 0
+ self._linejunk = linejunk
+ self._charjunk = charjunk
+
+ def make_file(self,fromlines,tolines,fromdesc='',todesc='',context=False,
+ numlines=5,fromprefix=None,toprefix=None,summary='',title='',
+ header=''):
+ """Returns HTML file of side by side comparison with change highlights
+
+ Arguments:
+ fromlines -- list of "from" lines
+ tolines -- list of "to" lines
+ fromdesc -- "from" file column header string
+ todesc -- "to" file column header string
+ context -- set to True for contextual differences
+ numlines -- number of context lines (needed for full differences to
+ place the "next" anchor a few lines ahead of the next change)
+ summary -- summary attribute of table string
+ fromprefix -- from line anchor name prefix
+ toprefix -- to line anchor name prefix
+ title -- window title string
+ header -- header HTML string to be placed above table
+
+ fromprefix, toprefix are used to generate unique anchors for each line
+ in the table so that specific lines may be hyperlinked to. If no
+ arguments are specified, unique prefixs are automatically generated
+ for each table.
+ """
+
+ return self.file_template % dict(
+ styles = self.styles,
+ legend = self.legend,
+ title = title,
+ header = header,
+ table = self.make_table(fromlines,tolines,fromdesc,todesc,
+ context=context,numlines=numlines,
+ summary=summary,fromprefix=fromprefix,
+ toprefix=toprefix))
+
+ def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False,
+ numlines=5,fromprefix=None,toprefix=None,summary=''):
+ """Returns HTML table of side by side comparison with change highlights
+
+ Arguments:
+ fromlines -- list of "from" lines
+ tolines -- list of "to" lines
+ fromdesc -- "from" file column header string
+ todesc -- "to" file column header string
+ context -- set to True for contextual differences
+ numlines -- number of context lines (needed for full differences to
+ place the "next" anchor a few lines ahead of the next change)
+ summary -- summary attribute of table
+ fromprefix -- from line anchor name prefix
+ toprefix -- to line anchor name prefix
+
+ fromprefix, toprefix are used to generate unique anchors for each line
+ in the table so that specific lines may be hyperlinked to. If no
+ arguments are specified, unique prefixs are automatically generated
+ for each table.
+ """
+
+ if context:
+ context = numlines
+ else:
+ context = 0
+ # if no prefix specified, generate a unique one (so multiple tables
+ # can exist on the same HTML page without conflicts).
+ if fromprefix is None:
+ fromprefix = "from%d_" % self._default_prefix
+ if toprefix is None:
+ toprefix = "to%d_" % self._default_prefix
+ self._default_prefix += 1
+ # store prefixes so line format method has access
+ self._prefix = [fromprefix,toprefix]
+ # collect up from/to lines in string, difference flags in a list
+ from_text, to_text, diff_flags = [],[],[]
+ diffs = _mdiff(fromlines,tolines,self.format_change,self.format_line,
+ context,None,linejunk=self._linejunk,
+ charjunk=self._charjunk)
+ for from_line, to_line, found_diff in diffs:
+ from_text.append(from_line)
+ to_text.append(to_line)
+ diff_flags.append(found_diff)
+ # process change flags, generating middle column of next anchors/links
+ next_id = ['']*len(diff_flags)
+ next_href = ['']*len(diff_flags)
+ num_chg, in_change = 0, False
+ last = 0
+ for i,flag in enumerate(diff_flags):
+ if flag:
+ if not in_change:
+ in_change = True
+ last = i
+ # at the beginning of a change, drop an anchor a few lines
+ # (the context lines) before the change for the previous
+ # link
+ i = max([0,i-numlines])
+ next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix,num_chg)
+ # at the beginning of a change, drop a link to the next
+ # change
+ num_chg += 1
+ next_href[last] = 'n' % (
+ toprefix,num_chg)
+ else:
+ in_change = False
+ # check for cases where there is no content to avoid exceptions
+ if not diff_flags:
+ diff_flags = [False]
+ next_id = ['']
+ next_href = ['']
+ last = 0
+ if context:
+ from_text = [' No Differences Found | ']
+ to_text = from_text
+ else:
+ from_text = to_text = [' Empty File | ']
+ # if not a change on first line, drop a link
+ if not diff_flags[0]:
+ next_href[0] = 'f' % toprefix
+ # redo the last link to link to the top
+ next_href[last] = 't' % (toprefix)
+ import cStringIO
+ s = cStringIO.StringIO()
+ for i in range(len(diff_flags)):
+ if diff_flags[i] is None:
+ # mdiff yields None on separator lines
+ s.write(' \n \n')
+ else:
+ fmt = ' %s | %s' + \
+ '%s | %s
\n'
+ s.write( fmt % (next_id[i],next_href[i],from_text[i],
+ next_href[i],to_text[i]))
+ if fromdesc or todesc:
+ header_row = '%s%s%s%s
' % (
+ '
| ',
+ '' % fromdesc,
+ '
| ',
+ '' % todesc)
+ else:
+ header_row = ''
+ return self.table_template % dict(
+ summary=summary,
+ data_rows=s.getvalue(),
+ header_row=header_row,
+ prefix=toprefix)
+
+ def format_line(self,side,linenum,text):
+ """Returns marked up "from" or "to" text line
+
+ mdiff() will call this function with the following arguments:
+
+ side -- 0 or 1 indicating "from" or "to" text
+ linenum -- line number (used for line number column)
+ text -- line text to be marked up
+ """
+ try:
+ linenum = self.linenum_template % linenum
+ id = ' id="%s%s"' % (self._prefix[side],linenum)
+ except TypeError:
+ # handle blank lines where linenum is None
+ linenum = ''
+ id = ''
+ # replace those things that would get confused with HTML symbols
+ text = text.replace("&", "&")
+ text = text.replace(">", ">")
+ text = text.replace("<", "<")
+ # replace spaces with non-breakable space so they don't get compressed
+ # or line wrapped
+ text = text.replace(' ',' ').rstrip()
+ fmt = '%s | '
+ return fmt % (id,linenum,text)
+
+ def format_change(self,side,linenum,text,type):
+ """Returns HTML highlighted text
+
+ mdiff() will call this function with the following arguments:
+ side -- 0 or 1 indicating "from" or "to" text
+ linenum -- line number that contains the text (used for creating
+ "next" links)
+ text -- text to be highlighted
+ type -- +/-/^ indicating type of change
+ """
+ if type == '+':
+ return '%s' % text
+ elif type == '-':
+ return '%s' % text
+ # must be '^':
+ return '%s' % text
+
+ del re
def restore(delta, which):
r"""
*** ..\latest\test_difflib.py Thu Jul 15 11:10:44 2004
--- ..\patched\test_difflib.py Sun Jul 25 12:51:05 2004
***************
*** 19,24 ****
diff_gen = difflib.unified_diff([], [])
self.assertRaises(StopIteration, diff_gen.next)
Doctests = doctest.DocTestSuite(difflib)
! test_support.run_unittest(TestSFbugs, Doctests)
--- 19,70 ----
diff_gen = difflib.unified_diff([], [])
self.assertRaises(StopIteration, diff_gen.next)
+ patch914575_from = '''
+ 1. Beautiful is beTTer than ugly.
+ 2. Explicit is better than implicit.
+ 3. Simple is better than complex.
+ 4. Complex is better than complicated.
+ '''
+
+ patch914575_to = '''
+ 1. Beautiful is better than ugly.
+ 3. Simple is better than complex.
+ 4. Complicated is better than complex.
+ 5. Flat is better than nested.
+ '''
+
+ class TestSFpatches(unittest.TestCase):
+
+ def test_html_diff(self):
+ # Check SF patch 914575 for generating HTML differences
+ a = patch914575_from + '123\n'*10
+ a = a * 3
+ b = patch914575_to + '123\n'*10
+ b = b * 3
+ i = difflib.HtmlDiff()
+ full = i.make_file(a.splitlines(True),b.splitlines(True),'from','to',
+ context=False,numlines=5,summary='summary',
+ title='Side by Side Difference Expectations',
+ fromprefix='fromX',toprefix='toX',
+ header='Full
')
+ a = a.splitlines()
+ b = b.splitlines()
+ tables = '\n'.join(
+ ['Context
', i.make_table(a,b,'from','to',context=True),
+ 'Same Context
', i.make_table(a,a,context=True),
+ 'Same Full
', i.make_table(a,a,context=False),
+ 'Empty Context
',i.make_table([],[],context=True),
+ 'Empty Full
', i.make_table([],[],context=False)])
+ actual = full.replace('