..\latest\libdifflib.tex			..\patched\libdifflib.tex
1	\section{\module{difflib} ---	f	1	\section{\module{difflib} ---
2	Helpers for computing deltas}		2	Helpers for computing deltas}
3			3
4	\declaremodule{standard}{difflib}		4	\declaremodule{standard}{difflib}
5	\modulesynopsis{Helpers for computing differences between objects.}		5	\modulesynopsis{Helpers for computing differences between objects.}
6	\moduleauthor{Tim Peters}{tim_one@users.sourceforge.net}		6	\moduleauthor{Tim Peters}{tim_one@users.sourceforge.net}
7	\sectionauthor{Tim Peters}{tim_one@users.sourceforge.net}		7	\sectionauthor{Tim Peters}{tim_one@users.sourceforge.net}
8	% LaTeXification by Fred L. Drake, Jr. <fdrake@acm.org>.		8	% LaTeXification by Fred L. Drake, Jr. <fdrake@acm.org>.
9			9
10	\versionadded{2.1}		10	\versionadded{2.1}
11			11
12			12
13	\begin{classdesc*}{SequenceMatcher}		13	\begin{classdesc*}{SequenceMatcher}
14	This is a flexible class for comparing pairs of sequences of any		14	This is a flexible class for comparing pairs of sequences of any
15	type, so long as the sequence elements are hashable. The basic		15	type, so long as the sequence elements are hashable. The basic
16	algorithm predates, and is a little fancier than, an algorithm		16	algorithm predates, and is a little fancier than, an algorithm
17	published in the late 1980's by Ratcliff and Obershelp under the		17	published in the late 1980's by Ratcliff and Obershelp under the
18	hyperbolic name ``gestalt pattern matching.'' The idea is to find		18	hyperbolic name ``gestalt pattern matching.'' The idea is to find
19	the longest contiguous matching subsequence that contains no		19	the longest contiguous matching subsequence that contains no
20	``junk'' elements (the Ratcliff and Obershelp algorithm doesn't		20	``junk'' elements (the Ratcliff and Obershelp algorithm doesn't
21	address junk). The same idea is then applied recursively to the		21	address junk). The same idea is then applied recursively to the
22	pieces of the sequences to the left and to the right of the matching		22	pieces of the sequences to the left and to the right of the matching
23	subsequence. This does not yield minimal edit sequences, but does		23	subsequence. This does not yield minimal edit sequences, but does
24	tend to yield matches that ``look right'' to people.		24	tend to yield matches that ``look right'' to people.
25			25
26	\strong{Timing:} The basic Ratcliff-Obershelp algorithm is cubic		26	\strong{Timing:} The basic Ratcliff-Obershelp algorithm is cubic
27	time in the worst case and quadratic time in the expected case.		27	time in the worst case and quadratic time in the expected case.
28	\class{SequenceMatcher} is quadratic time for the worst case and has		28	\class{SequenceMatcher} is quadratic time for the worst case and has
29	expected-case behavior dependent in a complicated way on how many		29	expected-case behavior dependent in a complicated way on how many
30	elements the sequences have in common; best case time is linear.		30	elements the sequences have in common; best case time is linear.
31	\end{classdesc*}		31	\end{classdesc*}
32			32
33	\begin{classdesc*}{Differ}		33	\begin{classdesc*}{Differ}
34	This is a class for comparing sequences of lines of text, and		34	This is a class for comparing sequences of lines of text, and
35	producing human-readable differences or deltas. Differ uses		35	producing human-readable differences or deltas. Differ uses
36	\class{SequenceMatcher} both to compare sequences of lines, and to		36	\class{SequenceMatcher} both to compare sequences of lines, and to
37	compare sequences of characters within similar (near-matching)		37	compare sequences of characters within similar (near-matching)
38	lines.		38	lines.
39			39
40	Each line of a \class{Differ} delta begins with a two-letter code:		40	Each line of a \class{Differ} delta begins with a two-letter code:
41			41
42	\begin{tableii}{l\|l}{code}{Code}{Meaning}		42	\begin{tableii}{l\|l}{code}{Code}{Meaning}
43	\lineii{'- '}{line unique to sequence 1}		43	\lineii{'- '}{line unique to sequence 1}
44	\lineii{'+ '}{line unique to sequence 2}		44	\lineii{'+ '}{line unique to sequence 2}
45	\lineii{' '}{line common to both sequences}		45	\lineii{' '}{line common to both sequences}
46	\lineii{'? '}{line not present in either input sequence}		46	\lineii{'? '}{line not present in either input sequence}
47	\end{tableii}		47	\end{tableii}
48			48
49	Lines beginning with `\code{?~}' attempt to guide the eye to		49	Lines beginning with `\code{?~}' attempt to guide the eye to
50	intraline differences, and were not present in either input		50	intraline differences, and were not present in either input
51	sequence. These lines can be confusing if the sequences contain tab		51	sequence. These lines can be confusing if the sequences contain tab
52	characters.		52	characters.
53	\end{classdesc*}		53	\end{classdesc*}
54			54
		n	55	\begin{classdesc*}{HtmlDiff}
			56
			57	This class can be used to create an HTML table (or a complete HTML file
			58	containing the table) showing a side by side, line by line comparision
			59	of text with inter-line and intra-line changes highlighted. The table can
			60	be generated showing either the full files or just contextual differences.
			61
			62	This class may be subclassed to override certain methods and members to
			63	customize the HTML output. In general this is not necessary but for special
			64	applications where it is, please refer to the class documentation string in
			65	the \module{difflib} module. Note, this class utilizes the
			66	\function{ndiff()} function. Its optional keyword arguments for filtering
			67	are supported by the \code{__init__()} method of this class.
			68
			69	The following methods are public:
			70
			71	\begin{funcdesc}{make_file}{fromlines, tolines
			72	\optional{, fromdesc
			73	\optional{, todesc
			74	\optional{, context
			75	\optional{, numlines
			76	\optional{, title
			77	\optional{, header
			78	\optional{, summary}}}}}}}}
			79	Compares \var{fromlines} and \var{tolines} (lists of strings) and returns
			80	a string which is a complete HTML file containing a table showing line by
			81	line differences with inter-line and intra-line changes highlighted.
			82
			83	\var{fromdesc} and \var{todesc} are optional keyword arguments to specify
			84	from/to file column header strings (both default to an empty string).
			85
			86	\var{context} and \var{numlines} are both optional keyword arguments.
			87	Set \var{context} to \code{True} when contextual differences are to be
			88	shown, else the default is \code{False} to show the full files.
			89	\var{numlines} defaults to \code{5}. When \var{context} is \code{True}
			90	\var{numlines} controls the number of context lines which surround the
			91	difference highlights. When \var{context} is \code{False} \var{numlines}
			92	controls the number of lines which are shown before a difference
			93	highlight when using the "next" hyperlinks (setting to zero would cause
			94	the "next" hyperlinks to place the next difference highlight at the top of
			95	the browser without any leading context).
			96
			97	\var{title}, \var{header}, and \var{summary} are all optional keyword
			98	arguments and default to an empty string. Use \var{title} to specify the
			99	window title string, \var{header} to specify the HTML markup string to be
			100	placed above the table and \var{summary} to specify the table's 'summary'
			101	attribute string.
			102	\end{funcdesc}
			103
			104	\begin{funcdesc}{make_table}{fromlines, tolines
			105	\optional{, fromdesc
			106	\optional{, todesc
			107	\optional{, context
			108	\optional{, numlines
			109	\optional{, summary}}}}}}
			110	Compares \var{fromlines} and \var{tolines} (lists of strings) and returns
			111	a string which is a complete HTML table showing line by line differences
			112	with inter-line and intra-line changes highlighted.
			113
			114	The arguments of this method are a subset of those for the
			115	\code{make_file} method. Please refer to the \code{make_file} method
			116	documentation.
			117	\end{funcdesc}
			118
			119	\file{Tools/scripts/ndiff.py} is a command-line front-end to this class
			120	and contains a good example of its use.
			121	\end{classdesc*}
			122
55	\begin{funcdesc}{context_diff}{a, b\optional{, fromfile\optional{, tofile		123	\begin{funcdesc}{context_diff}{a, b\optional{, fromfile\optional{, tofile
56	\optional{, fromfiledate\optional{, tofiledate\optional{, n		124	\optional{, fromfiledate\optional{, tofiledate\optional{, n
57	\optional{, lineterm}}}}}}}		125	\optional{, lineterm}}}}}}}
58	Compare \var{a} and \var{b} (lists of strings); return a		126	Compare \var{a} and \var{b} (lists of strings); return a
59	delta (a generator generating the delta lines) in context diff		127	delta (a generator generating the delta lines) in context diff
60	format.		128	format.
61			129
62	Context diffs are a compact way of showing just the lines that have		130	Context diffs are a compact way of showing just the lines that have
63	changed plus a few lines of context. The changes are shown in a		131	changed plus a few lines of context. The changes are shown in a
64	before/after style. The number of context lines is set by \var{n}		132	before/after style. The number of context lines is set by \var{n}
65	which defaults to three.		133	which defaults to three.
66			134
67	By default, the diff control lines (those with \code{***} or \code{---})		135	By default, the diff control lines (those with \code{***} or \code{---})
68	are created with a trailing newline. This is helpful so that inputs created		136	are created with a trailing newline. This is helpful so that inputs created
69	from \function{file.readlines()} result in diffs that are suitable for use		137	from \function{file.readlines()} result in diffs that are suitable for use
70	with \function{file.writelines()} since both the inputs and outputs have		138	with \function{file.writelines()} since both the inputs and outputs have
71	trailing newlines.		139	trailing newlines.
72			140
73	For inputs that do not have trailing newlines, set the \var{lineterm}		141	For inputs that do not have trailing newlines, set the \var{lineterm}
74	argument to \code{""} so that the output will be uniformly newline free.		142	argument to \code{""} so that the output will be uniformly newline free.
75			143
76	The context diff format normally has a header for filenames and		144	The context diff format normally has a header for filenames and
77	modification times. Any or all of these may be specified using strings for		145	modification times. Any or all of these may be specified using strings for
78	\var{fromfile}, \var{tofile}, \var{fromfiledate}, and \var{tofiledate}.		146	\var{fromfile}, \var{tofile}, \var{fromfiledate}, and \var{tofiledate}.
79	The modification times are normally expressed in the format returned by		147	The modification times are normally expressed in the format returned by
80	\function{time.ctime()}. If not specified, the strings default to blanks.		148	\function{time.ctime()}. If not specified, the strings default to blanks.
81			149
82	\file{Tools/scripts/diff.py} is a command-line front-end for this		150	\file{Tools/scripts/diff.py} is a command-line front-end for this
83	function.		151	function.
84			152
85	\versionadded{2.3}		153	\versionadded{2.3}
86	\end{funcdesc}		154	\end{funcdesc}
87			155
88	\begin{funcdesc}{get_close_matches}{word, possibilities\optional{,		156	\begin{funcdesc}{get_close_matches}{word, possibilities\optional{,
89	n\optional{, cutoff}}}		157	n\optional{, cutoff}}}
90	Return a list of the best ``good enough'' matches. \var{word} is a		158	Return a list of the best ``good enough'' matches. \var{word} is a
91	sequence for which close matches are desired (typically a string),		159	sequence for which close matches are desired (typically a string),
92	and \var{possibilities} is a list of sequences against which to		160	and \var{possibilities} is a list of sequences against which to
93	match \var{word} (typically a list of strings).		161	match \var{word} (typically a list of strings).
94			162
95	Optional argument \var{n} (default \code{3}) is the maximum number		163	Optional argument \var{n} (default \code{3}) is the maximum number
96	of close matches to return; \var{n} must be greater than \code{0}.		164	of close matches to return; \var{n} must be greater than \code{0}.
97			165
98	Optional argument \var{cutoff} (default \code{0.6}) is a float in		166	Optional argument \var{cutoff} (default \code{0.6}) is a float in
99	the range [0, 1]. Possibilities that don't score at least that		167	the range [0, 1]. Possibilities that don't score at least that
100	similar to \var{word} are ignored.		168	similar to \var{word} are ignored.
101			169
102	The best (no more than \var{n}) matches among the possibilities are		170	The best (no more than \var{n}) matches among the possibilities are
103	returned in a list, sorted by similarity score, most similar first.		171	returned in a list, sorted by similarity score, most similar first.
104			172
105	\begin{verbatim}		173	\begin{verbatim}
106	>>> get_close_matches('appel', ['ape', 'apple', 'peach', 'puppy'])		174	>>> get_close_matches('appel', ['ape', 'apple', 'peach', 'puppy'])
107	['apple', 'ape']		175	['apple', 'ape']
108	>>> import keyword		176	>>> import keyword
109	>>> get_close_matches('wheel', keyword.kwlist)		177	>>> get_close_matches('wheel', keyword.kwlist)
110	['while']		178	['while']
111	>>> get_close_matches('apple', keyword.kwlist)		179	>>> get_close_matches('apple', keyword.kwlist)
112	[]		180	[]
113	>>> get_close_matches('accept', keyword.kwlist)		181	>>> get_close_matches('accept', keyword.kwlist)
114	['except']		182	['except']
115	\end{verbatim}		183	\end{verbatim}
		t	184	\end{funcdesc}
			185
			186	\begin{funcdesc}{mdiff}{fromlines, tolines, chgfmt, linefmt
			187	\optional{, context
			188	\optional{, sep
			189	\optional{, linejunk
			190	\optional{, charjunk}}}}}
			191	Compare \var{fromlines} and \var{tolines} (lists of strings); return a
			192	generator generating marked up differences to be used for creating side
			193	by side differences. Note, most users should use the \class{HtmlDiff} class
			194	to produce HTML side by side differences.
			195
			196	\var{chgfmt} and \var{linefmt} are for filter functions to markup intra-line
			197	differences and complete lines respectively.
			198
			199	Optional keyword parameter \var{context} specifies number of lines of
			200	context (default is \code{None} for full differences) while \var{sep}
			201	specifies the separator string between contextual differences (default
			202	is \code{None}).
			203
			204	Optional keyword parameters \var{linejunk} and \var{charjunk} are
			205	for filter functions (or \code{None}) and their description can be found
			206	in the \function{ndiff()} documentation (\function{mdiff()} utilizes
			207	\function{ndiff()} and passes these parameters to it).
			208
			209	This function was originally developed for use by the \class{HtmlDiff} for
			210	generating side by side HTML differences but can be used for generating
			211	side by side difference representations for any markup language. See
			212	\class{HtmlDiff} for an example usage of this function.
116	\end{funcdesc}		213	\end{funcdesc}
117			214
118	\begin{funcdesc}{ndiff}{a, b\optional{, linejunk\optional{,		215	\begin{funcdesc}{ndiff}{a, b\optional{, linejunk\optional{,
119	charjunk}}}		216	charjunk}}}
120	Compare \var{a} and \var{b} (lists of strings); return a		217	Compare \var{a} and \var{b} (lists of strings); return a
121	\class{Differ}-style delta (a generator generating the delta lines).		218	\class{Differ}-style delta (a generator generating the delta lines).
122			219
123	Optional keyword parameters \var{linejunk} and \var{charjunk} are		220	Optional keyword parameters \var{linejunk} and \var{charjunk} are
124	for filter functions (or \code{None}):		221	for filter functions (or \code{None}):
125			222
126	\var{linejunk}: A function that accepts a single string		223	\var{linejunk}: A function that accepts a single string
127	argument, and returns true if the string is junk, or false if not.		224	argument, and returns true if the string is junk, or false if not.
128	The default is (\code{None}), starting with Python 2.3. Before then,		225	The default is (\code{None}), starting with Python 2.3. Before then,
129	the default was the module-level function		226	the default was the module-level function
130	\function{IS_LINE_JUNK()}, which filters out lines without visible		227	\function{IS_LINE_JUNK()}, which filters out lines without visible
131	characters, except for at most one pound character (\character{\#}).		228	characters, except for at most one pound character (\character{\#}).
132	As of Python 2.3, the underlying \class{SequenceMatcher} class		229	As of Python 2.3, the underlying \class{SequenceMatcher} class
133	does a dynamic analysis of which lines are so frequent as to		230	does a dynamic analysis of which lines are so frequent as to
134	constitute noise, and this usually works better than the pre-2.3		231	constitute noise, and this usually works better than the pre-2.3
135	default.		232	default.
136			233
137	\var{charjunk}: A function that accepts a character (a string of		234	\var{charjunk}: A function that accepts a character (a string of
138	length 1), and returns if the character is junk, or false if not.		235	length 1), and returns if the character is junk, or false if not.
139	The default is module-level function \function{IS_CHARACTER_JUNK()},		236	The default is module-level function \function{IS_CHARACTER_JUNK()},
140	which filters out whitespace characters (a blank or tab; note: bad		237	which filters out whitespace characters (a blank or tab; note: bad
141	idea to include newline in this!).		238	idea to include newline in this!).
142			239
143	\file{Tools/scripts/ndiff.py} is a command-line front-end to this		240	\file{Tools/scripts/ndiff.py} is a command-line front-end to this
144	function.		241	function.
145			242
146	\begin{verbatim}		243	\begin{verbatim}
147	>>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),		244	>>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
148	... 'ore\ntree\nemu\n'.splitlines(1))		245	... 'ore\ntree\nemu\n'.splitlines(1))
149	>>> print ''.join(diff),		246	>>> print ''.join(diff),
150	- one		247	- one
151	? ^		248	? ^
152	+ ore		249	+ ore
153	? ^		250	? ^
154	- two		251	- two
155	- three		252	- three
156	? -		253	? -
157	+ tree		254	+ tree
158	+ emu		255	+ emu
159	\end{verbatim}		256	\end{verbatim}
160	\end{funcdesc}		257	\end{funcdesc}
161			258
162	\begin{funcdesc}{restore}{sequence, which}		259	\begin{funcdesc}{restore}{sequence, which}
163	Return one of the two sequences that generated a delta.		260	Return one of the two sequences that generated a delta.
164			261
165	Given a \var{sequence} produced by \method{Differ.compare()} or		262	Given a \var{sequence} produced by \method{Differ.compare()} or
166	\function{ndiff()}, extract lines originating from file 1 or 2		263	\function{ndiff()}, extract lines originating from file 1 or 2
167	(parameter \var{which}), stripping off line prefixes.		264	(parameter \var{which}), stripping off line prefixes.
168			265
169	Example:		266	Example:
170			267
171	\begin{verbatim}		268	\begin{verbatim}
172	>>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),		269	>>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
173	... 'ore\ntree\nemu\n'.splitlines(1))		270	... 'ore\ntree\nemu\n'.splitlines(1))
174	>>> diff = list(diff) # materialize the generated delta into a list		271	>>> diff = list(diff) # materialize the generated delta into a list
175	>>> print ''.join(restore(diff, 1)),		272	>>> print ''.join(restore(diff, 1)),
176	one		273	one
177	two		274	two
178	three		275	three
179	>>> print ''.join(restore(diff, 2)),		276	>>> print ''.join(restore(diff, 2)),
180	ore		277	ore
181	tree		278	tree
182	emu		279	emu
183	\end{verbatim}		280	\end{verbatim}
184			281
185	\end{funcdesc}		282	\end{funcdesc}
186			283
187	\begin{funcdesc}{unified_diff}{a, b\optional{, fromfile\optional{, tofile		284	\begin{funcdesc}{unified_diff}{a, b\optional{, fromfile\optional{, tofile
188	\optional{, fromfiledate\optional{, tofiledate\optional{, n		285	\optional{, fromfiledate\optional{, tofiledate\optional{, n
189	\optional{, lineterm}}}}}}}		286	\optional{, lineterm}}}}}}}
190	Compare \var{a} and \var{b} (lists of strings); return a		287	Compare \var{a} and \var{b} (lists of strings); return a
191	delta (a generator generating the delta lines) in unified diff		288	delta (a generator generating the delta lines) in unified diff
192	format.		289	format.
193			290
194	Unified diffs are a compact way of showing just the lines that have		291	Unified diffs are a compact way of showing just the lines that have
195	changed plus a few lines of context. The changes are shown in a		292	changed plus a few lines of context. The changes are shown in a
196	inline style (instead of separate before/after blocks). The number		293	inline style (instead of separate before/after blocks). The number
197	of context lines is set by \var{n} which defaults to three.		294	of context lines is set by \var{n} which defaults to three.
198			295
199	By default, the diff control lines (those with \code{---}, \code{+++},		296	By default, the diff control lines (those with \code{---}, \code{+++},
200	or \code{@@}) are created with a trailing newline. This is helpful so		297	or \code{@@}) are created with a trailing newline. This is helpful so
201	that inputs created from \function{file.readlines()} result in diffs		298	that inputs created from \function{file.readlines()} result in diffs
202	that are suitable for use with \function{file.writelines()} since both		299	that are suitable for use with \function{file.writelines()} since both
203	the inputs and outputs have trailing newlines.		300	the inputs and outputs have trailing newlines.
204			301
205	For inputs that do not have trailing newlines, set the \var{lineterm}		302	For inputs that do not have trailing newlines, set the \var{lineterm}
206	argument to \code{""} so that the output will be uniformly newline free.		303	argument to \code{""} so that the output will be uniformly newline free.
207			304
208	The context diff format normally has a header for filenames and		305	The context diff format normally has a header for filenames and
209	modification times. Any or all of these may be specified using strings for		306	modification times. Any or all of these may be specified using strings for
210	\var{fromfile}, \var{tofile}, \var{fromfiledate}, and \var{tofiledate}.		307	\var{fromfile}, \var{tofile}, \var{fromfiledate}, and \var{tofiledate}.
211	The modification times are normally expressed in the format returned by		308	The modification times are normally expressed in the format returned by
212	\function{time.ctime()}. If not specified, the strings default to blanks.		309	\function{time.ctime()}. If not specified, the strings default to blanks.
213			310
214	\file{Tools/scripts/diff.py} is a command-line front-end for this		311	\file{Tools/scripts/diff.py} is a command-line front-end for this
215	function.		312	function.
216			313
217	\versionadded{2.3}		314	\versionadded{2.3}
218	\end{funcdesc}		315	\end{funcdesc}
219			316
220	\begin{funcdesc}{IS_LINE_JUNK}{line}		317	\begin{funcdesc}{IS_LINE_JUNK}{line}
221	Return true for ignorable lines. The line \var{line} is ignorable		318	Return true for ignorable lines. The line \var{line} is ignorable
222	if \var{line} is blank or contains a single \character{\#},		319	if \var{line} is blank or contains a single \character{\#},
223	otherwise it is not ignorable. Used as a default for parameter		320	otherwise it is not ignorable. Used as a default for parameter
224	\var{linejunk} in \function{ndiff()} before Python 2.3.		321	\var{linejunk} in \function{ndiff()} before Python 2.3.
225	\end{funcdesc}		322	\end{funcdesc}
226			323
227			324
228	\begin{funcdesc}{IS_CHARACTER_JUNK}{ch}		325	\begin{funcdesc}{IS_CHARACTER_JUNK}{ch}
229	Return true for ignorable characters. The character \var{ch} is		326	Return true for ignorable characters. The character \var{ch} is
230	ignorable if \var{ch} is a space or tab, otherwise it is not		327	ignorable if \var{ch} is a space or tab, otherwise it is not
231	ignorable. Used as a default for parameter \var{charjunk} in		328	ignorable. Used as a default for parameter \var{charjunk} in
232	\function{ndiff()}.		329	\function{ndiff()}.
233	\end{funcdesc}		330	\end{funcdesc}
234			331
235			332
236	\begin{seealso}		333	\begin{seealso}
237	\seetitle[http://www.ddj.com/documents/s=1103/ddj8807c/]		334	\seetitle[http://www.ddj.com/documents/s=1103/ddj8807c/]
238	{Pattern Matching: The Gestalt Approach}{Discussion of a		335	{Pattern Matching: The Gestalt Approach}{Discussion of a
239	similar algorithm by John W. Ratcliff and D. E. Metzener.		336	similar algorithm by John W. Ratcliff and D. E. Metzener.
240	This was published in		337	This was published in
241	\citetitle[http://www.ddj.com/]{Dr. Dobb's Journal} in		338	\citetitle[http://www.ddj.com/]{Dr. Dobb's Journal} in
242	July, 1988.}		339	July, 1988.}
243	\end{seealso}		340	\end{seealso}
244			341
245			342
246	\subsection{SequenceMatcher Objects \label{sequence-matcher}}		343	\subsection{SequenceMatcher Objects \label{sequence-matcher}}
247			344
248	The \class{SequenceMatcher} class has this constructor:		345	The \class{SequenceMatcher} class has this constructor:
249			346
250	\begin{classdesc}{SequenceMatcher}{\optional{isjunk\optional{,		347	\begin{classdesc}{SequenceMatcher}{\optional{isjunk\optional{,
251	a\optional{, b}}}}		348	a\optional{, b}}}}
252	Optional argument \var{isjunk} must be \code{None} (the default) or		349	Optional argument \var{isjunk} must be \code{None} (the default) or
253	a one-argument function that takes a sequence element and returns		350	a one-argument function that takes a sequence element and returns
254	true if and only if the element is ``junk'' and should be ignored.		351	true if and only if the element is ``junk'' and should be ignored.
255	Passing \code{None} for \var{b} is equivalent to passing		352	Passing \code{None} for \var{b} is equivalent to passing
256	\code{lambda x: 0}; in other words, no elements are ignored. For		353	\code{lambda x: 0}; in other words, no elements are ignored. For
257	example, pass:		354	example, pass:
258			355
259	\begin{verbatim}		356	\begin{verbatim}
260	lambda x: x in " \t"		357	lambda x: x in " \t"
261	\end{verbatim}		358	\end{verbatim}
262			359
263	if you're comparing lines as sequences of characters, and don't want		360	if you're comparing lines as sequences of characters, and don't want
264	to synch up on blanks or hard tabs.		361	to synch up on blanks or hard tabs.
265			362
266	The optional arguments \var{a} and \var{b} are sequences to be		363	The optional arguments \var{a} and \var{b} are sequences to be
267	compared; both default to empty strings. The elements of both		364	compared; both default to empty strings. The elements of both
268	sequences must be hashable.		365	sequences must be hashable.
269	\end{classdesc}		366	\end{classdesc}
270			367
271			368
272	\class{SequenceMatcher} objects have the following methods:		369	\class{SequenceMatcher} objects have the following methods:
273			370
274	\begin{methoddesc}{set_seqs}{a, b}		371	\begin{methoddesc}{set_seqs}{a, b}
275	Set the two sequences to be compared.		372	Set the two sequences to be compared.
276	\end{methoddesc}		373	\end{methoddesc}
277			374
278	\class{SequenceMatcher} computes and caches detailed information about		375	\class{SequenceMatcher} computes and caches detailed information about
279	the second sequence, so if you want to compare one sequence against		376	the second sequence, so if you want to compare one sequence against
280	many sequences, use \method{set_seq2()} to set the commonly used		377	many sequences, use \method{set_seq2()} to set the commonly used
281	sequence once and call \method{set_seq1()} repeatedly, once for each		378	sequence once and call \method{set_seq1()} repeatedly, once for each
282	of the other sequences.		379	of the other sequences.
283			380
284	\begin{methoddesc}{set_seq1}{a}		381	\begin{methoddesc}{set_seq1}{a}
285	Set the first sequence to be compared. The second sequence to be		382	Set the first sequence to be compared. The second sequence to be
286	compared is not changed.		383	compared is not changed.
287	\end{methoddesc}		384	\end{methoddesc}
288			385
289	\begin{methoddesc}{set_seq2}{b}		386	\begin{methoddesc}{set_seq2}{b}
290	Set the second sequence to be compared. The first sequence to be		387	Set the second sequence to be compared. The first sequence to be
291	compared is not changed.		388	compared is not changed.
292	\end{methoddesc}		389	\end{methoddesc}
293			390
294	\begin{methoddesc}{find_longest_match}{alo, ahi, blo, bhi}		391	\begin{methoddesc}{find_longest_match}{alo, ahi, blo, bhi}
295	Find longest matching block in \code{\var{a}[\var{alo}:\var{ahi}]}		392	Find longest matching block in \code{\var{a}[\var{alo}:\var{ahi}]}
296	and \code{\var{b}[\var{blo}:\var{bhi}]}.		393	and \code{\var{b}[\var{blo}:\var{bhi}]}.
297			394
298	If \var{isjunk} was omitted or \code{None},		395	If \var{isjunk} was omitted or \code{None},
299	\method{get_longest_match()} returns \code{(\var{i}, \var{j},		396	\method{get_longest_match()} returns \code{(\var{i}, \var{j},
300	\var{k})} such that \code{\var{a}[\var{i}:\var{i}+\var{k}]} is equal		397	\var{k})} such that \code{\var{a}[\var{i}:\var{i}+\var{k}]} is equal
301	to \code{\var{b}[\var{j}:\var{j}+\var{k}]}, where		398	to \code{\var{b}[\var{j}:\var{j}+\var{k}]}, where
302	\code{\var{alo} <= \var{i} <= \var{i}+\var{k} <= \var{ahi}} and		399	\code{\var{alo} <= \var{i} <= \var{i}+\var{k} <= \var{ahi}} and
303	\code{\var{blo} <= \var{j} <= \var{j}+\var{k} <= \var{bhi}}.		400	\code{\var{blo} <= \var{j} <= \var{j}+\var{k} <= \var{bhi}}.
304	For all \code{(\var{i'}, \var{j'}, \var{k'})} meeting those		401	For all \code{(\var{i'}, \var{j'}, \var{k'})} meeting those
305	conditions, the additional conditions		402	conditions, the additional conditions
306	\code{\var{k} >= \var{k'}},		403	\code{\var{k} >= \var{k'}},
307	\code{\var{i} <= \var{i'}},		404	\code{\var{i} <= \var{i'}},
308	and if \code{\var{i} == \var{i'}}, \code{\var{j} <= \var{j'}}		405	and if \code{\var{i} == \var{i'}}, \code{\var{j} <= \var{j'}}
309	are also met.		406	are also met.
310	In other words, of all maximal matching blocks, return one that		407	In other words, of all maximal matching blocks, return one that
311	starts earliest in \var{a}, and of all those maximal matching blocks		408	starts earliest in \var{a}, and of all those maximal matching blocks
312	that start earliest in \var{a}, return the one that starts earliest		409	that start earliest in \var{a}, return the one that starts earliest
313	in \var{b}.		410	in \var{b}.
314			411
315	\begin{verbatim}		412	\begin{verbatim}
316	>>> s = SequenceMatcher(None, " abcd", "abcd abcd")		413	>>> s = SequenceMatcher(None, " abcd", "abcd abcd")
317	>>> s.find_longest_match(0, 5, 0, 9)		414	>>> s.find_longest_match(0, 5, 0, 9)
318	(0, 4, 5)		415	(0, 4, 5)
319	\end{verbatim}		416	\end{verbatim}
320			417
321	If \var{isjunk} was provided, first the longest matching block is		418	If \var{isjunk} was provided, first the longest matching block is
322	determined as above, but with the additional restriction that no		419	determined as above, but with the additional restriction that no
323	junk element appears in the block. Then that block is extended as		420	junk element appears in the block. Then that block is extended as
324	far as possible by matching (only) junk elements on both sides.		421	far as possible by matching (only) junk elements on both sides.
325	So the resulting block never matches on junk except as identical		422	So the resulting block never matches on junk except as identical
326	junk happens to be adjacent to an interesting match.		423	junk happens to be adjacent to an interesting match.
327			424
328	Here's the same example as before, but considering blanks to be junk.		425	Here's the same example as before, but considering blanks to be junk.
329	That prevents \code{' abcd'} from matching the \code{' abcd'} at the		426	That prevents \code{' abcd'} from matching the \code{' abcd'} at the
330	tail end of the second sequence directly. Instead only the		427	tail end of the second sequence directly. Instead only the
331	\code{'abcd'} can match, and matches the leftmost \code{'abcd'} in		428	\code{'abcd'} can match, and matches the leftmost \code{'abcd'} in
332	the second sequence:		429	the second sequence:
333			430
334	\begin{verbatim}		431	\begin{verbatim}
335	>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")		432	>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
336	>>> s.find_longest_match(0, 5, 0, 9)		433	>>> s.find_longest_match(0, 5, 0, 9)
337	(1, 0, 4)		434	(1, 0, 4)
338	\end{verbatim}		435	\end{verbatim}
339			436
340	If no blocks match, this returns \code{(\var{alo}, \var{blo}, 0)}.		437	If no blocks match, this returns \code{(\var{alo}, \var{blo}, 0)}.
341	\end{methoddesc}		438	\end{methoddesc}
342			439
343	\begin{methoddesc}{get_matching_blocks}{}		440	\begin{methoddesc}{get_matching_blocks}{}
344	Return list of triples describing matching subsequences.		441	Return list of triples describing matching subsequences.
345	Each triple is of the form \code{(\var{i}, \var{j}, \var{n})}, and		442	Each triple is of the form \code{(\var{i}, \var{j}, \var{n})}, and
346	means that \code{\var{a}[\var{i}:\var{i}+\var{n}] ==		443	means that \code{\var{a}[\var{i}:\var{i}+\var{n}] ==
347	\var{b}[\var{j}:\var{j}+\var{n}]}. The triples are monotonically		444	\var{b}[\var{j}:\var{j}+\var{n}]}. The triples are monotonically
348	increasing in \var{i} and \var{j}.		445	increasing in \var{i} and \var{j}.
349			446
350	The last triple is a dummy, and has the value \code{(len(\var{a}),		447	The last triple is a dummy, and has the value \code{(len(\var{a}),
351	len(\var{b}), 0)}. It is the only triple with \code{\var{n} == 0}.		448	len(\var{b}), 0)}. It is the only triple with \code{\var{n} == 0}.
352	% Explain why a dummy is used!		449	% Explain why a dummy is used!
353			450
354	\begin{verbatim}		451	\begin{verbatim}
355	>>> s = SequenceMatcher(None, "abxcd", "abcd")		452	>>> s = SequenceMatcher(None, "abxcd", "abcd")
356	>>> s.get_matching_blocks()		453	>>> s.get_matching_blocks()
357	[(0, 0, 2), (3, 2, 2), (5, 4, 0)]		454	[(0, 0, 2), (3, 2, 2), (5, 4, 0)]
358	\end{verbatim}		455	\end{verbatim}
359	\end{methoddesc}		456	\end{methoddesc}
360			457
361	\begin{methoddesc}{get_opcodes}{}		458	\begin{methoddesc}{get_opcodes}{}
362	Return list of 5-tuples describing how to turn \var{a} into \var{b}.		459	Return list of 5-tuples describing how to turn \var{a} into \var{b}.
363	Each tuple is of the form \code{(\var{tag}, \var{i1}, \var{i2},		460	Each tuple is of the form \code{(\var{tag}, \var{i1}, \var{i2},
364	\var{j1}, \var{j2})}. The first tuple has \code{\var{i1} ==		461	\var{j1}, \var{j2})}. The first tuple has \code{\var{i1} ==
365	\var{j1} == 0}, and remaining tuples have \var{i1} equal to the		462	\var{j1} == 0}, and remaining tuples have \var{i1} equal to the
366	\var{i2} from the preceeding tuple, and, likewise, \var{j1} equal to		463	\var{i2} from the preceeding tuple, and, likewise, \var{j1} equal to
367	the previous \var{j2}.		464	the previous \var{j2}.
368			465
369	The \var{tag} values are strings, with these meanings:		466	The \var{tag} values are strings, with these meanings:
370			467
371	\begin{tableii}{l\|l}{code}{Value}{Meaning}		468	\begin{tableii}{l\|l}{code}{Value}{Meaning}
372	\lineii{'replace'}{\code{\var{a}[\var{i1}:\var{i2}]} should be		469	\lineii{'replace'}{\code{\var{a}[\var{i1}:\var{i2}]} should be
373	replaced by \code{\var{b}[\var{j1}:\var{j2}]}.}		470	replaced by \code{\var{b}[\var{j1}:\var{j2}]}.}
374	\lineii{'delete'}{\code{\var{a}[\var{i1}:\var{i2}]} should be		471	\lineii{'delete'}{\code{\var{a}[\var{i1}:\var{i2}]} should be
375	deleted. Note that \code{\var{j1} == \var{j2}} in		472	deleted. Note that \code{\var{j1} == \var{j2}} in
376	this case.}		473	this case.}
377	\lineii{'insert'}{\code{\var{b}[\var{j1}:\var{j2}]} should be		474	\lineii{'insert'}{\code{\var{b}[\var{j1}:\var{j2}]} should be
378	inserted at \code{\var{a}[\var{i1}:\var{i1}]}.		475	inserted at \code{\var{a}[\var{i1}:\var{i1}]}.
379	Note that \code{\var{i1} == \var{i2}} in this		476	Note that \code{\var{i1} == \var{i2}} in this
380	case.}		477	case.}
381	\lineii{'equal'}{\code{\var{a}[\var{i1}:\var{i2}] ==		478	\lineii{'equal'}{\code{\var{a}[\var{i1}:\var{i2}] ==
382	\var{b}[\var{j1}:\var{j2}]} (the sub-sequences are		479	\var{b}[\var{j1}:\var{j2}]} (the sub-sequences are
383	equal).}		480	equal).}
384	\end{tableii}		481	\end{tableii}
385			482
386	For example:		483	For example:
387			484
388	\begin{verbatim}		485	\begin{verbatim}
389	>>> a = "qabxcd"		486	>>> a = "qabxcd"
390	>>> b = "abycdf"		487	>>> b = "abycdf"
391	>>> s = SequenceMatcher(None, a, b)		488	>>> s = SequenceMatcher(None, a, b)
392	>>> for tag, i1, i2, j1, j2 in s.get_opcodes():		489	>>> for tag, i1, i2, j1, j2 in s.get_opcodes():
393	... print ("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %		490	... print ("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
394	... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))		491	... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))
395	delete a[0:1] (q) b[0:0] ()		492	delete a[0:1] (q) b[0:0] ()
396	equal a[1:3] (ab) b[0:2] (ab)		493	equal a[1:3] (ab) b[0:2] (ab)
397	replace a[3:4] (x) b[2:3] (y)		494	replace a[3:4] (x) b[2:3] (y)
398	equal a[4:6] (cd) b[3:5] (cd)		495	equal a[4:6] (cd) b[3:5] (cd)
399	insert a[6:6] () b[5:6] (f)		496	insert a[6:6] () b[5:6] (f)
400	\end{verbatim}		497	\end{verbatim}
401	\end{methoddesc}		498	\end{methoddesc}
402			499
403	\begin{methoddesc}{get_grouped_opcodes}{\optional{n}}		500	\begin{methoddesc}{get_grouped_opcodes}{\optional{n}}
404	Return a generator of groups with up to \var{n} lines of context.		501	Return a generator of groups with up to \var{n} lines of context.
405			502
406	Starting with the groups returned by \method{get_opcodes()},		503	Starting with the groups returned by \method{get_opcodes()},
407	this method splits out smaller change clusters and eliminates		504	this method splits out smaller change clusters and eliminates
408	intervening ranges which have no changes.		505	intervening ranges which have no changes.
409			506
410	The groups are returned in the same format as \method{get_opcodes()}.		507	The groups are returned in the same format as \method{get_opcodes()}.
411	\versionadded{2.3}		508	\versionadded{2.3}
412	\end{methoddesc}		509	\end{methoddesc}
413			510
414	\begin{methoddesc}{ratio}{}		511	\begin{methoddesc}{ratio}{}
415	Return a measure of the sequences' similarity as a float in the		512	Return a measure of the sequences' similarity as a float in the
416	range [0, 1].		513	range [0, 1].
417			514
418	Where T is the total number of elements in both sequences, and M is		515	Where T is the total number of elements in both sequences, and M is
419	the number of matches, this is 2.0*M / T. Note that this is		516	the number of matches, this is 2.0*M / T. Note that this is
420	\code{1.0} if the sequences are identical, and \code{0.0} if they		517	\code{1.0} if the sequences are identical, and \code{0.0} if they
421	have nothing in common.		518	have nothing in common.
422			519
423	This is expensive to compute if \method{get_matching_blocks()} or		520	This is expensive to compute if \method{get_matching_blocks()} or
424	\method{get_opcodes()} hasn't already been called, in which case you		521	\method{get_opcodes()} hasn't already been called, in which case you
425	may want to try \method{quick_ratio()} or		522	may want to try \method{quick_ratio()} or
426	\method{real_quick_ratio()} first to get an upper bound.		523	\method{real_quick_ratio()} first to get an upper bound.
427	\end{methoddesc}		524	\end{methoddesc}
428			525
429	\begin{methoddesc}{quick_ratio}{}		526	\begin{methoddesc}{quick_ratio}{}
430	Return an upper bound on \method{ratio()} relatively quickly.		527	Return an upper bound on \method{ratio()} relatively quickly.
431			528
432	This isn't defined beyond that it is an upper bound on		529	This isn't defined beyond that it is an upper bound on
433	\method{ratio()}, and is faster to compute.		530	\method{ratio()}, and is faster to compute.
434	\end{methoddesc}		531	\end{methoddesc}
435			532
436	\begin{methoddesc}{real_quick_ratio}{}		533	\begin{methoddesc}{real_quick_ratio}{}
437	Return an upper bound on \method{ratio()} very quickly.		534	Return an upper bound on \method{ratio()} very quickly.
438			535
439	This isn't defined beyond that it is an upper bound on		536	This isn't defined beyond that it is an upper bound on
440	\method{ratio()}, and is faster to compute than either		537	\method{ratio()}, and is faster to compute than either
441	\method{ratio()} or \method{quick_ratio()}.		538	\method{ratio()} or \method{quick_ratio()}.
442	\end{methoddesc}		539	\end{methoddesc}
443			540
444	The three methods that return the ratio of matching to total characters		541	The three methods that return the ratio of matching to total characters
445	can give different results due to differing levels of approximation,		542	can give different results due to differing levels of approximation,
446	although \method{quick_ratio()} and \method{real_quick_ratio()} are always		543	although \method{quick_ratio()} and \method{real_quick_ratio()} are always
447	at least as large as \method{ratio()}:		544	at least as large as \method{ratio()}:
448			545
449	\begin{verbatim}		546	\begin{verbatim}
450	>>> s = SequenceMatcher(None, "abcd", "bcde")		547	>>> s = SequenceMatcher(None, "abcd", "bcde")
451	>>> s.ratio()		548	>>> s.ratio()
452	0.75		549	0.75
453	>>> s.quick_ratio()		550	>>> s.quick_ratio()
454	0.75		551	0.75
455	>>> s.real_quick_ratio()		552	>>> s.real_quick_ratio()
456	1.0		553	1.0
457	\end{verbatim}		554	\end{verbatim}
458			555
459			556
460	\subsection{SequenceMatcher Examples \label{sequencematcher-examples}}		557	\subsection{SequenceMatcher Examples \label{sequencematcher-examples}}
461			558
462			559
463	This example compares two strings, considering blanks to be ``junk:''		560	This example compares two strings, considering blanks to be ``junk:''
464			561
465	\begin{verbatim}		562	\begin{verbatim}
466	>>> s = SequenceMatcher(lambda x: x == " ",		563	>>> s = SequenceMatcher(lambda x: x == " ",
467	... "private Thread currentThread;",		564	... "private Thread currentThread;",
468	... "private volatile Thread currentThread;")		565	... "private volatile Thread currentThread;")
469	\end{verbatim}		566	\end{verbatim}
470			567
471	\method{ratio()} returns a float in [0, 1], measuring the similarity		568	\method{ratio()} returns a float in [0, 1], measuring the similarity
472	of the sequences. As a rule of thumb, a \method{ratio()} value over		569	of the sequences. As a rule of thumb, a \method{ratio()} value over
473	0.6 means the sequences are close matches:		570	0.6 means the sequences are close matches:
474			571
475	\begin{verbatim}		572	\begin{verbatim}
476	>>> print round(s.ratio(), 3)		573	>>> print round(s.ratio(), 3)
477	0.866		574	0.866
478	\end{verbatim}		575	\end{verbatim}
479			576
480	If you're only interested in where the sequences match,		577	If you're only interested in where the sequences match,
481	\method{get_matching_blocks()} is handy:		578	\method{get_matching_blocks()} is handy:
482			579
483	\begin{verbatim}		580	\begin{verbatim}
484	>>> for block in s.get_matching_blocks():		581	>>> for block in s.get_matching_blocks():
485	... print "a[%d] and b[%d] match for %d elements" % block		582	... print "a[%d] and b[%d] match for %d elements" % block
486	a[0] and b[0] match for 8 elements		583	a[0] and b[0] match for 8 elements
487	a[8] and b[17] match for 6 elements		584	a[8] and b[17] match for 6 elements
488	a[14] and b[23] match for 15 elements		585	a[14] and b[23] match for 15 elements
489	a[29] and b[38] match for 0 elements		586	a[29] and b[38] match for 0 elements
490	\end{verbatim}		587	\end{verbatim}
491			588
492	Note that the last tuple returned by \method{get_matching_blocks()} is		589	Note that the last tuple returned by \method{get_matching_blocks()} is
493	always a dummy, \code{(len(\var{a}), len(\var{b}), 0)}, and this is		590	always a dummy, \code{(len(\var{a}), len(\var{b}), 0)}, and this is
494	the only case in which the last tuple element (number of elements		591	the only case in which the last tuple element (number of elements
495	matched) is \code{0}.		592	matched) is \code{0}.
496			593
497	If you want to know how to change the first sequence into the second,		594	If you want to know how to change the first sequence into the second,
498	use \method{get_opcodes()}:		595	use \method{get_opcodes()}:
499			596
500	\begin{verbatim}		597	\begin{verbatim}
501	>>> for opcode in s.get_opcodes():		598	>>> for opcode in s.get_opcodes():
502	... print "%6s a[%d:%d] b[%d:%d]" % opcode		599	... print "%6s a[%d:%d] b[%d:%d]" % opcode
503	equal a[0:8] b[0:8]		600	equal a[0:8] b[0:8]
504	insert a[8:8] b[8:17]		601	insert a[8:8] b[8:17]
505	equal a[8:14] b[17:23]		602	equal a[8:14] b[17:23]
506	equal a[14:29] b[23:38]		603	equal a[14:29] b[23:38]
507	\end{verbatim}		604	\end{verbatim}
508			605
509	See also the function \function{get_close_matches()} in this module,		606	See also the function \function{get_close_matches()} in this module,
510	which shows how simple code building on \class{SequenceMatcher} can be		607	which shows how simple code building on \class{SequenceMatcher} can be
511	used to do useful work.		608	used to do useful work.
512			609
513			610
514	\subsection{Differ Objects \label{differ-objects}}		611	\subsection{Differ Objects \label{differ-objects}}
515			612
516	Note that \class{Differ}-generated deltas make no claim to be		613	Note that \class{Differ}-generated deltas make no claim to be
517	\strong{minimal} diffs. To the contrary, minimal diffs are often		614	\strong{minimal} diffs. To the contrary, minimal diffs are often
518	counter-intuitive, because they synch up anywhere possible, sometimes		615	counter-intuitive, because they synch up anywhere possible, sometimes
519	accidental matches 100 pages apart. Restricting synch points to		616	accidental matches 100 pages apart. Restricting synch points to
520	contiguous matches preserves some notion of locality, at the		617	contiguous matches preserves some notion of locality, at the
521	occasional cost of producing a longer diff.		618	occasional cost of producing a longer diff.
522			619
523	The \class{Differ} class has this constructor:		620	The \class{Differ} class has this constructor:
524			621
525	\begin{classdesc}{Differ}{\optional{linejunk\optional{, charjunk}}}		622	\begin{classdesc}{Differ}{\optional{linejunk\optional{, charjunk}}}
526	Optional keyword parameters \var{linejunk} and \var{charjunk} are		623	Optional keyword parameters \var{linejunk} and \var{charjunk} are
527	for filter functions (or \code{None}):		624	for filter functions (or \code{None}):
528			625
529	\var{linejunk}: A function that accepts a single string		626	\var{linejunk}: A function that accepts a single string
530	argument, and returns true if the string is junk. The default is		627	argument, and returns true if the string is junk. The default is
531	\code{None}, meaning that no line is considered junk.		628	\code{None}, meaning that no line is considered junk.
532			629
533	\var{charjunk}: A function that accepts a single character argument		630	\var{charjunk}: A function that accepts a single character argument
534	(a string of length 1), and returns true if the character is junk.		631	(a string of length 1), and returns true if the character is junk.
535	The default is \code{None}, meaning that no character is		632	The default is \code{None}, meaning that no character is
536	considered junk.		633	considered junk.
537	\end{classdesc}		634	\end{classdesc}
538			635
539	\class{Differ} objects are used (deltas generated) via a single		636	\class{Differ} objects are used (deltas generated) via a single
540	method:		637	method:
541			638
542	\begin{methoddesc}{compare}{a, b}		639	\begin{methoddesc}{compare}{a, b}
543	Compare two sequences of lines, and generate the delta (a sequence		640	Compare two sequences of lines, and generate the delta (a sequence
544	of lines).		641	of lines).
545			642
546	Each sequence must contain individual single-line strings ending		643	Each sequence must contain individual single-line strings ending
547	with newlines. Such sequences can be obtained from the		644	with newlines. Such sequences can be obtained from the
548	\method{readlines()} method of file-like objects. The delta generated		645	\method{readlines()} method of file-like objects. The delta generated
549	also consists of newline-terminated strings, ready to be printed as-is		646	also consists of newline-terminated strings, ready to be printed as-is
550	via the \method{writelines()} method of a file-like object.		647	via the \method{writelines()} method of a file-like object.
551	\end{methoddesc}		648	\end{methoddesc}
552			649
553			650
554	\subsection{Differ Example \label{differ-examples}}		651	\subsection{Differ Example \label{differ-examples}}
555			652
556	This example compares two texts. First we set up the texts, sequences		653	This example compares two texts. First we set up the texts, sequences
557	of individual single-line strings ending with newlines (such sequences		654	of individual single-line strings ending with newlines (such sequences
558	can also be obtained from the \method{readlines()} method of file-like		655	can also be obtained from the \method{readlines()} method of file-like
559	objects):		656	objects):
560			657
561	\begin{verbatim}		658	\begin{verbatim}
562	>>> text1 = ''' 1. Beautiful is better than ugly.		659	>>> text1 = ''' 1. Beautiful is better than ugly.
563	... 2. Explicit is better than implicit.		660	... 2. Explicit is better than implicit.
564	... 3. Simple is better than complex.		661	... 3. Simple is better than complex.
565	... 4. Complex is better than complicated.		662	... 4. Complex is better than complicated.
566	... '''.splitlines(1)		663	... '''.splitlines(1)
567	>>> len(text1)		664	>>> len(text1)
568	4		665	4
569	>>> text1[0][-1]		666	>>> text1[0][-1]
570	'\n'		667	'\n'
571	>>> text2 = ''' 1. Beautiful is better than ugly.		668	>>> text2 = ''' 1. Beautiful is better than ugly.
572	... 3. Simple is better than complex.		669	... 3. Simple is better than complex.
573	... 4. Complicated is better than complex.		670	... 4. Complicated is better than complex.
574	... 5. Flat is better than nested.		671	... 5. Flat is better than nested.
575	... '''.splitlines(1)		672	... '''.splitlines(1)
576	\end{verbatim}		673	\end{verbatim}
577			674
578	Next we instantiate a Differ object:		675	Next we instantiate a Differ object:
579			676
580	\begin{verbatim}		677	\begin{verbatim}
581	>>> d = Differ()		678	>>> d = Differ()
582	\end{verbatim}		679	\end{verbatim}
583			680
584	Note that when instantiating a \class{Differ} object we may pass		681	Note that when instantiating a \class{Differ} object we may pass
585	functions to filter out line and character ``junk.'' See the		682	functions to filter out line and character ``junk.'' See the
586	\method{Differ()} constructor for details.		683	\method{Differ()} constructor for details.
587			684
588	Finally, we compare the two:		685	Finally, we compare the two:
589			686
590	\begin{verbatim}		687	\begin{verbatim}
591	>>> result = list(d.compare(text1, text2))		688	>>> result = list(d.compare(text1, text2))
592	\end{verbatim}		689	\end{verbatim}
593			690
594	\code{result} is a list of strings, so let's pretty-print it:		691	\code{result} is a list of strings, so let's pretty-print it:
595			692
596	\begin{verbatim}		693	\begin{verbatim}
597	>>> from pprint import pprint		694	>>> from pprint import pprint
598	>>> pprint(result)		695	>>> pprint(result)
599	[' 1. Beautiful is better than ugly.\n',		696	[' 1. Beautiful is better than ugly.\n',
600	'- 2. Explicit is better than implicit.\n',		697	'- 2. Explicit is better than implicit.\n',
601	'- 3. Simple is better than complex.\n',		698	'- 3. Simple is better than complex.\n',
602	'+ 3. Simple is better than complex.\n',		699	'+ 3. Simple is better than complex.\n',
603	'? ++ \n',		700	'? ++ \n',
604	'- 4. Complex is better than complicated.\n',		701	'- 4. Complex is better than complicated.\n',
605	'? ^ ---- ^ \n',		702	'? ^ ---- ^ \n',
606	'+ 4. Complicated is better than complex.\n',		703	'+ 4. Complicated is better than complex.\n',
607	'? ++++ ^ ^ \n',		704	'? ++++ ^ ^ \n',
608	'+ 5. Flat is better than nested.\n']		705	'+ 5. Flat is better than nested.\n']
609	\end{verbatim}		706	\end{verbatim}
610			707
611	As a single multi-line string it looks like this:		708	As a single multi-line string it looks like this:
612			709
613	\begin{verbatim}		710	\begin{verbatim}
614	>>> import sys		711	>>> import sys
615	>>> sys.stdout.writelines(result)		712	>>> sys.stdout.writelines(result)
616	1. Beautiful is better than ugly.		713	1. Beautiful is better than ugly.
617	- 2. Explicit is better than implicit.		714	- 2. Explicit is better than implicit.
618	- 3. Simple is better than complex.		715	- 3. Simple is better than complex.
619	+ 3. Simple is better than complex.		716	+ 3. Simple is better than complex.
620	? ++		717	? ++
621	- 4. Complex is better than complicated.		718	- 4. Complex is better than complicated.
622	? ^ ---- ^		719	? ^ ---- ^
623	+ 4. Complicated is better than complex.		720	+ 4. Complicated is better than complex.
624	? ++++ ^ ^		721	? ++++ ^ ^
625	+ 5. Flat is better than nested.		722	+ 5. Flat is better than nested.
626	\end{verbatim}		723	\end{verbatim}

Legends

Colors
Added
Changed
Deleted

Links
(f)irst change
(n)ext change
(t)op