import enum import subprocess import textwrap import xml.etree.ElementTree def raw_log(): """Fetch the log in XML format. Example output (all logentry values are in a tag):: Georg Brandl 2012-09-29T09:51:42+02:00 merge with 3.3 .hgtags Lib/test/test_sys.py """ command = [ 'hg', '--repository', '../default', 'log', '-v', '--style', 'xml', '--branch', 'default', '-r', 'tip:v3.3.0' ] return subprocess.check_output(' '.join(command), universal_newlines=True, shell=True) @enum.unique class EntryCategory(enum.Enum): """Categories that a log entry may fall under. The values of the enum are to be compatible with str.startswith(). """ Core_and_Builtins = 'Grammar', 'Objects', 'Parser', 'Python' C_API = 'Modules' IDLE = 'Lib/idlelib' Tests = 'Lib/test' Library = 'Lib' Build = 'Include', 'configure.ac', 'Makefile.pre.in', 'setup.py' Windows = 'PC', 'PCbuild' Documentation = 'Doc' Tools = 'Tools' def __str__(self): """Output a human-readable version of the enum's name.""" return self.name.replace('_', ' ') class LogEntry: """Object representation of a log entry.""" def __init__(self, etree_object): """Create log entry from etree object.""" self.revision = etree_object.attrib['revision'] self.parents = [] self.paths = {} for child_node in etree_object: if child_node.tag == 'parent': self.parents.append(child_node.attrib['revision']) elif child_node.tag == 'paths': for path in child_node: self.paths[path.text] = path.attrib['action'] elif child_node.tag == 'msg': self.message = child_node.text @classmethod def process_logs(cls, xml_log): """Take in raw XML log output and return a sequence of log entries.""" log_entries = [] processed = xml.etree.ElementTree.fromstring(xml_log) for entry in processed: log_entries.append(cls(entry)) return log_entries def __repr__(self): return ''.format(self.revision) def __str__(self): rev_line = 'Revision ' + self.revision if self.parents: rev_line += ' ({})'.format(self.parents) rev_line += ':' metadata = [rev_line] metadata.append(str(self.paths)) classification = self.classify() if not classification: classification = 'N/A' metadata.append('Classification: ' + str(classification)) issue = self.issue_number() if issue: metadata.append('Issue #' + issue) metadata.append(self.summary()) return '\n '.join(metadata) @property def from_merge(self): """Is the revision the result of a merge?""" return bool(self.parents) def interesting_paths(self): """Return the "interesting" paths. "Interesting" is defined by a path: * Not in Misc/ * Not a doc file (i.e. HTML or reST) """ try: return self._interesting_paths except AttributeError: filtered_paths = set() for path in self.paths.keys(): if path.startswith('Misc'): continue if path.endswith(('.html', '.rst')): continue filtered_paths.add(path) self._interesting_paths = filtered_paths return filtered_paths @property def has_details(self): return '\n\n' in self.message @property def newsworthy(self): """Heuristic to determine what warrants mentioning.""" inferred_interesting = self.interesting_paths() and self.has_details return not self.from_merge and self.issue_number() or inferred_interesting def issue_number(self): """Find the issue number (if any).""" try: return self._issue except AttributeError: try: number_sign = self.message.index('#') except ValueError: return None digits = [] index = number_sign + 1 for char in self.message[number_sign+1:]: if not char.isdigit(): break digits.append(char) self._issue = ''.join(digits) return self._issue def summary(self): """Create the summary line in the commit message. The summary is taken as the text up to the first \n\n. Newlines are removed to allow for later line wrapping. The issue number (if a prefix to the summary line) is also stripped to control formatting. """ try: return self._summary except AttributeError: summary = self.message summary, _, self._details = summary.partition('\n\n') summary = summary.replace('\n', ' ') issue_number = self.issue_number() if issue_number: formatted_number = '#{}: '.format(issue_number) try: number_index = summary.index(formatted_number) except ValueError: pass else: ending_index = number_index + len(formatted_number) summary = summary[ending_index:] if not summary.endswith('.'): summary += '.' self._summary = summary return summary def category(self): """Attempt to classify what the commit influences (e.g. IDLE, build, etc.).""" try: return self._category except AttributeError: paths = self.paths.keys() for possible_category in EntryCategory: if any(path.startswith(possible_category.value) for path in paths): category = possible_category break else: category = None self._category = category return category def text_format(log_entries): output = [] groupings = {} for entry in log_entries: groupings.setdefault(entry.category(), list()).append(entry) for category in EntryCategory: if category not in groupings: continue category_title = str(category) output.append(category_title + '\n' + '-' * len(category_title)) for entry in groupings[category]: issue_output = ['- '] issue_number = entry.issue_number() if issue_number: issue_output.append('Issue #{}: '.format(issue_number)) issue_output.append('\n '.join(textwrap.wrap(entry.summary()))) issue_line = ''.join(issue_output) output.append(issue_line) return '\n\n'.join(output) if __name__ == '__main__': import sys with open(sys.argv[1]) as file: log_entries = LogEntry.process_logs(file.read()) worthy = [] for entry in log_entries: if entry.newsworthy: worthy.append(entry) print(text_format(worthy))