Index: Doc/lib/libminiconf.tex =================================================================== --- Doc/lib/libminiconf.tex (revision 0) +++ Doc/lib/libminiconf.tex (revision 0) @@ -0,0 +1,173 @@ +\section{\module{miniconf} --- Plain text Python built-in objects persistence} + +\declaremodule{standard}{miniconf} + +\modulesynopsis{Convert selected built-in Python objects to textual +representation and back.} + +\module{miniconf} provides data persistence of a subset of Python built-in +objects as plain text. Objects are loaded from, and dumped to valid Python +source composed of assignment statements of the form +\code{identifier = value}. Supported objects are: + +\begin{itemize} +\item dictionaries (\code{dict}) +\item lists and tuples (\code{list} and \code{tuple}) +\item integers and floats (\code{int} and \code{float}) +\item plain and unicode strings (\code{str} and \code{unicode}) +\item booleans (\code{bool}) +\item \code{None} +\end{itemize} + +Arbitrarily complex objects composed of these types can be +handled. \module{miniconf} is restricted to these types because they can be +easily reconstructed from literal representation in Python, entirely known at +byte-compilation, without the need to execute the code. + +\module{miniconf} aims at providing an easy, elegant way to store (dump) and +retrieve (load) configurations\footnote{Hence the name, \module{miniconf}. See +\refmodule{ConfigParser} for a different but more thorough, higher-level +implementation of configuration file handlers.} and other simple datasets in a +human-readable, familiar pythonic notation, while preventing unwanted injection +of external code. + +Basically, \module{miniconf} exposes two important functions, \function{load} +and \function{dump}, as well as one helper function, \function{format_header}: + +\begin{funcdesc}{load}{buf\optional{, pedantic=False}} +Load configuration from string \var{buf}. If \var{pedantic} is \code{True}, +un-loadable elements will raise \exception{TypeError} exceptions instead of +being silently dropped. On success, return a dictionary containing the parsed +built-in objects, indexed by assignment names. On error, raise a +\exception{SyntaxError}, \exception{TypeError} or \exception{ValueError} +exception. +\end{funcdesc} + +\begin{funcdesc}{dump}{data\optional{, comments=\{\}, pedantic=True, ...}} +Dump configuration from dictionary \var{data}, prepending string comments from +corresponding values (i.e. associated to the matching keys in \var{data}) in +\var{comments}. If \var{pedantic} is \code{True}, a \exception{TypeError} +exception will be raised if some value in data is a derivate class of an +otherwise supported type. Return the formatted string on success, or raise a +\exception{TypeError} or \exception{ValueError} on error. \var{data} dictionary +is dumped in identifiers' alphabetical order. Valid keywords are optional +arguments to the low-level \class{PrettyPrinter} object (see the +\function{pformat} function from the \refmodule{pprint} module). + +If every lines in a comment string are already starting with a pound sign thus +making the string an already valid Python comment, such string is preserved +untouched in the output. If not, the comment string will be formatted using +\function{format_header}, using the same width used by the +\class{PrettyPrinter}. Basically, this means you are free to either have +comments automatically formatted and wrapped as a single paragraph, or use your +own layout if you want, as long as the whole string keeps being a valid Python +comment. + +Values associated with special \code{'--top--'} and \code{'--bottom--'} keys, if +they exist in comments, will be respectively included at the beginning and end +of the return string; same formatting rules apply to them. +\end{funcdesc} + +\begin{funcdesc}{format_header}{header\optional{, width=80}} +Turn \var{header} into a valid Python comment by prepending a pound sing +followed by a space to each line, and wrap it to the given width. Return the +result as a single, potentially multi-lines string. +\end{funcdesc} + +\subsection{Example} + +Load objects from a textual representation, stored in \var{snippet}: + +\begin{verbatim} +>>> import miniconf +>>> snippet = 'spam = [ 1, True, ("test", None) ]; egg = -2' +>>> config = miniconf.load(snippet) +>>> print config +{'egg': -2, 'spam': [1, True, ('test', None)]} +\end{verbatim} + +Of course, \var{config} could have been constructed from \var{snippet} as well +by doing: + +\begin{verbatim} +>>> config = {} +>>> exec snippet in config +>>> del config['__builtins__'] +\end{verbatim} + +The whole point of using \module{miniconf} instead of the exec statement is that +it is safer, since no arbitrary code execution ever occurs: the code is only +parsed, not executed, and the objects are reconstructed from the snippet +abstract syntax tree. In practice, it makes user access to simple pythonic data +structure possible without having to fear injection of unwanted third-party +code. + +Finally, let's modify \var{config} a little and dump it back: + +\begin{verbatim} +>>> config['egg'] = u'new_value' +>>> config['new'] = range(10) +>>> print miniconf.dump(config) +egg = u'new_value' + +new = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + +spam = [1, True, ('test', None)] +\end{verbatim} + +\subsection{Explanation on pedantry} + +\function{load} and \function{dump} have two modes of operation: pedantic and +non-pedantic. + +\begin{itemize} +\item +On \function{load}, the pedantic argument indicates whether the function should +bail out on un-loadable constructs instead or just ignore them. Default is not +to be pedantic on load. + +\item +On \function{dump}, the pedantic argument indicates if the function should bail +out when dumping objects that could only be partially restored later on (because +they belong to a derivate class of a supported type). A successfully dump will +always be re-loadable anyway regardless of the pedantry. Default is to be +pedantic on dump. +\end{itemize} + +A non-pedantic load or dump does not ensure that the operation will not raise an +exception (such as a \exception{SyntaxError} because of an unparsable buffer +during a load), only that the level of tolerance to the minor problems just +described will be greater. + +\subsection{Limitations} + +\module{miniconf} has a few limitations: + +\begin{itemize} +\item +It only supports a subset of built-ins types (see above). + +\item +It is strictly string-based: it loads from them, and dumps to them. Potential +race conditions with the underlying sources and destinations such as real files +has to be dealt with externally (handling locks, etc.). + +\item +\function{dump} and \function{load} are not inverse functions, because +\function{load} is not injective; any special content or formatting in the +source code (comments, un-loadable objects, non-assignments statements, lexical +format) will be discarded at load time. See for instance: + +\begin{verbatim} +>>> from miniconf import load, dump +>>> print dump(load('spam = "egg" # this comment will be lost')) +spam = 'egg' +\end{verbatim} + +Basically, this means that an external agent (user or application) +cannot add an element such as a comment to a snippet, and have it +preserved next time the program will have loaded, then dumped it +back. Of course, one can very well choose not to systematically dump +the data over the source of the next load, which alleviates this +limitation. +\end{itemize} Index: Doc/lib/lib.tex =================================================================== --- Doc/lib/lib.tex (revision 50858) +++ Doc/lib/lib.tex (working copy) @@ -218,6 +218,7 @@ \input{libcopyreg} % really copy_reg % from runtime... \input{libshelve} \input{libmarshal} +\input{libminiconf} \input{libanydbm} \input{libwhichdb} \input{libdbm} Index: Lib/miniconf.py =================================================================== --- Lib/miniconf.py (revision 0) +++ Lib/miniconf.py (revision 0) @@ -0,0 +1,339 @@ +""" +Plain text Python built-in objects persistence + +miniconf provides data persistence of a subset of Python built-in +objects as plain text. Objects are loaded from, and dumped to valid +Python source composed of assignment statements of the form +\"identifier = value\". Supported objects are: + +- dictionaries +- lists and tuples +- integers and floats +- plain and unicode strings +- booleans +- None + +Arbitrarily complex objects composed of these types can be +handled. miniconf is restricted to these types because they can be +easily reconstructed from literal representation in Python, entirely +known at byte-compilation, without the need to execute the code. + +miniconf aims at providing an easy, elegant way to store (dump) and +retrieve (load) configuration information and simple datasets in a +human-readable, familiar pythonic notation, while preventing unwanted +injection of external code. + +Usage example +============= + +# Load the objects from a textual representation +# +>>> import miniconf +>>> snippet = 'spam = [ 1, True, (\"test\", None) ]; egg = -2' +>>> config = miniconf.load(snippet) +>>> print config +{'egg': -2, 'spam': [1, True, ('test', None)]} + +# Note that config could as well be constructed from snippet by doing: +# +>>> config = {} +>>> exec snippet in config +>>> del config['__builtins__'] + +The whole point of using miniconf instead of the exec statement is +that it is safer, since no arbitrary code execution ever occurs: the +code is only parsed, not executed, and the objects are reconstructed +from the snippet abstract syntax tree. In practice, it makes user +access to simple pythonic data structure possible without having to +fear injection of unwanted third-party code. + +# Modify the data and dump it back +# +>>> config['egg'] = u'new_value' +>>> config['new'] = range(10) +>>> print miniconf.dump(config) +egg = u'new_value' + +new = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + +spam = [1, True, ('test', None)] + +Special explanation on pedantry +=============================== + +All load(), dump() have two modes of operation: pedantic and +non-pedantic. + +- On load, the pedantic argument indicates whether the function should + bail out on un-loadable constructs instead or just ignore + them. Default is not to be pedantic on load. + +- On dump, the pedantic argument indicates if the function should bail + out when dumping objects that could only be partially restored later + on (because they belong to a derivate class of a supported type). A + successfully dump will always be re-loadable anyway regardless of the + pedantry. Default is to be pedantic on dump. + +Please keep in mind that a non-pedantic load or dump does not ensure +that the operation will not raise an exception (such as a SyntaxError +because of an unparsable buffer during a load), only that the level of +tolerance to the minor problems just described will be greater. + +Limitations +=========== + +miniconf has a few limitations one should be aware of: + +- It only supports a subset of built-ins types (see above). + +- It is strictly string-based: it loads from them, and dumps to + them. Potential race conditions with the underlying sources and + destinations such as real files has to be dealt with externally + (handling locks, etc.). + +- dump() and load() are not inverse functions, because load() is not + injective; any special content or formatting in the source code + (comments, un-loadable objects, non-assignments statements, lexical + format) will be discarded at load time. See for instance: + + >>> from miniconf import load, dump + >>> print dump(load('spam = \"egg\" # this comment will be lost')) + spam = 'egg' + + Basically, this means that an external agent (user or application) + cannot add an element such as a comment to a snippet, and have it + preserved next time the program will have loaded, then dumped it + back. Of course, one can very well choose not to systematically dump + the data over the source of the next load, which alleviates this + limitation. + +""" +# Written by Sylvain Fourmanoit , 2006. +# Please carbon-copy any comment or bug report to this address. + +import compiler +import re +import pprint +import textwrap + +class _Load: + names = {'None': None, 'True': True, 'False': False} + + def __init__(self): + if not hasattr(_Load, 'lookup'): + _Load.lookup = dict([(item[5:], getattr(_Load, item)) + for item in _Load.__dict__ + if '_node' == item[:5]]) + + def __call__(self, buf, pedantic): + # We use the supplemented compiler interface to the parser + # module because it comes with a DOM-like interface very + # handy for "walking" the tree. + self.pedantic = pedantic + return dict(self._group( + self.start_walk(compiler.parse(buf).getChildNodes()[0]))) + + def start_walk(self, node): + assert(node.__class__ == compiler.ast.Stmt) + for child in node.getChildNodes(): + for result in self.walk(child): + yield result + + def walk(self, node): + try: + return self.lookup[node.__class__.__name__](self, node) + except KeyError: + return self._nodeDiscard(node) + + def assert_complex(self, node): + if isinstance(node.value, complex): + raise TypeError('complex numbers are not supported') + + def _nodeAssign(self,node): + return (self.walk(node.getChildNodes()[0]),\ + self.walk(node.getChildNodes()[1])) + + def _nodeAssName(self, node): + return node.name + + def _nodeConst(self, node): + self.assert_complex(node) + return node.value + + def _nodeUnarySub(self, node): + node = node.getChildNodes()[0] + self.assert_complex(node) + return -(node.value) + + def _nodeName(self, node): + try: + return self.names[node.name] + except KeyError: + if self.pedantic: + raise TypeError('name node with value "%s" discarded' % + node.name) + else: + return [] + + def _nodeDict(self, node): + return dict([(self.walk(k),self.walk(v)) + for k,v in node.items]) + + def _nodeTuple(self, node): + return tuple([self.walk(i) + for i in node.nodes]) + + def _nodeList(self, node): + return [self.walk(i) + for i in node.nodes] + + def _nodeDiscard(self, node): + if self.pedantic: + raise TypeError('node of type "%s" discarded' % \ + node.__class__.__name__) + return [] + + def _group(self, iterable): + group=[] + for item in iterable: + group.append(item) + if len(group)==2: + yield group + group=[] + +class _Dump: + types = (dict, list, tuple, int, float, str, unicode, bool, type(None)) + + def __init__(self): + self.hexpr = re.compile('^[^#]', re.MULTILINE) + + def __call__(self, data, headers, pedantic, **kw): + data = self.recast(data, pedantic, [dict]) + return '\n'.join( + [self.header(headers, '--top--', **kw)] + + ['%s%s = %s\n' % + (self.header(headers, k, **kw), + k, pprint.pformat(data[k], **kw)) + for k in self.keys(data)] + + [self.header(headers, '--bottom--', **kw)] + ).strip() + + def recast(self, data, pedantic, types): + # This makes sure that the data object is only aggregated + # from supported types; if pedantic is False, it even + # coerce every sub-objects to supported base types, to make + # sure that the final representation will be pretty-printed + # to something that will always be loadable later on. + def cast(data, pedantic, types): + if pedantic: + if type(data) in types: + return data + else: + for t in types: + if isinstance(data, t): + if t is not type(None): + return t(data) + else: + return None + + raise TypeError( \ + 'Object "%s" is of unsupported type %s, while it should be one of %s' % + (data, type(data), types)) + + data = cast(data, pedantic, types) + + if type(data) is dict: + iterator = data.iteritems() + elif type(data) in (list, tuple): + iterator = iter(data) + else: + iterator = None + + if iterator is not None: + return type(data)([self.recast(item, pedantic, self.types) + for item in iterator]) + else: + return data + + def keys(self, data): + # Order the keys, and make sure that they are all valid + # Python identifiers. + ident = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*$') + keys = data.keys() + keys.sort() + for key in keys: + if not ident.match(key): + raise ValueError( + 'Key "%s" is not a valid Python identifier' % key) + return keys + + def header(self, headers, key, width=80, **kw): + if headers.has_key(key): + if type(headers[key]) is str: + if self.hexpr.search(headers[key]): + return format_header(headers[key], width=width) + '\n' + else: + return headers[key] + '\n' + else: + raise TypeError( \ + 'Header for identifier "%s" is of type "%s", not string' % \ + (key, type(headers[key]))) + else: + return '' + +def format_header(header, width=80, **kw): + """ + Transform a string into a python comment. + + Turn header into a valid Python comment by prepending '# ' to each + line, and wrap it to the given width. Return the result as a + single, potentially multi-lines string. + """ + width -= 2 + return '\n'.join(['# ' + line + for line in textwrap.wrap(header, width)]) + +def load(buf, pedantic=False): + """ + Load configuration from string, returning a dictionary. + + Load configuration from string buf. If pedantic is True, + un-loadable elements will raise TypeError exceptions instead of + being silently dropped. On success, return a dictionary containing + the parsed built-in objects, indexed by assignment names. On + error, raise a SyntaxError, TypeError or ValueError exception. + """ + return _Load()(buf, pedantic) + +def dump(data, comments={}, pedantic=True, **kw): + """ + Dump configuration from dictionary, returning a string. + + Dump configuration from dictionary data, prepending string + comments from corresponding values (i.e. associated to the + matching keys in data) in comments. If pedantic is True, a + TypeError exception will be raised if some value in data is a + derivate class of an otherwise supported type. Return the + formatted string on success, or raise a TypeError or ValueError on + error. data dictionary is dumped in identifiers' alphabetical + order. Valid keywords are optional arguments to the low-level + PrettyPrinter object (see the pformat method from the pprint + module). + + === Note on comments === + + If every lines in a comment string are already starting with a + pound sign ('#') thus making the string an already valid Python + comment, such string is preserved untouched in the output. If not, + the comment string will be formatted using format_header(), using + the same width used by the PrettyPrinter. Basically, this means + you are free to either have comments automatically formatted and + wrapped as a single paragraph, or use your own layout if you want, + as long as the whole string keeps being a valid Python comment. + + Values associated with special '--top--' and '--bottom--' keys, if + they exist in comments, will be respectively included at the + beginning and end of the return string; same formatting + rules apply to them. + """ + return _Dump()(data, headers=comments, pedantic=pedantic, **kw) Index: Lib/test/test_miniconf.py =================================================================== --- Lib/test/test_miniconf.py (revision 0) +++ Lib/test/test_miniconf.py (revision 0) @@ -0,0 +1,205 @@ +#!/usr/bin/env python + +from test import test_support +import miniconf +import unittest +import sys + +class LoadTestCase(unittest.TestCase): + # Test loading + + src_unloadable = 'while 1: pass # The loop of death' + + def test_empty(self): + # Test empty load + data = miniconf.load('') + self.assertEqual(data, {}) + + def test_comment_trimming(self): + # Test empty load + data = miniconf.load('# Nothing to see there, move along') + self.assertEqual(data, {}) + + def test_spurious_source(self): + # Test a source with syntax error + self.assertRaises(SyntaxError, miniconf.load, 'arrg!') + + def test_unloadable_structure(self): + # Test on unloadable object + data = miniconf.load(self.src_unloadable) + self.assertEqual(data, {}) + + def test_unloadable_structure_pedantic(self): + # Test on unloadable object, pedantic mode + self.assertRaises(TypeError, miniconf.load, + self.src_unloadable, pedantic = True) + + def test_trivial_load(self): + # Try to load a trivial value + data = miniconf.load('spam = 1') + self.assertEqual(data, {'spam': 1}) + + def test_all_around_flat_load(self): + # Test the load a single value of each supported type + for value in (dict(), list(), tuple(), + int(), float(), str(), unicode(), bool(), None): + data = miniconf.load('spam = %s' % repr(value)) + self.assertEqual(data['spam'], value) + + def test_recursive_load(self): + # Test the load of an arbitrary, nested structure + val = [0, {1: [2, (3, 4)]}] + data = miniconf.load('spam = %s' % repr(val)) + self.assertEqual(data['spam'], val) + +class DumpTestCase(unittest.TestCase): + # Test dumping() + + class MyInt(int): pass + + def test_empty(self): + # Test an empty dump + out = miniconf.dump({}) + self.assertEqual(out, '') + + def test_invalid_data(self): + # Test on a non-dictionary data + self.assertRaises(TypeError, miniconf.dump, None) + + def test_subclass_dump_pedantic(self): + # Test a subclass dump, in pedantic mode + self.assertRaises(TypeError, miniconf.dump, + {'spam' : self.MyInt(1)}) + + def test_subclass_dump_non_pedantic(self): + # Test a subclass dump, in non-pedantic mode + out = miniconf.dump({'spam' : self.MyInt(1)}, pedantic=False) + self.assertEqual(out, 'spam = 1') + + def test_invalid_identifier(self): + # Test passing an invalid Python identifier + self.assertRaises(ValueError, miniconf.dump, + {'?invalid_id' : None }) + + def test_comment(self): + # Test generation of a single comment + out = miniconf.dump({}, comments = {'--top--': 'Egg'}) + self.assertEqual(out, '# Egg') + + def test_invalid_comment(self): + # Test rejection of an invalid comment + self.assertRaises(TypeError, miniconf.dump, + {}, comments = {'--top--': None}) + + def test_all_around_flat_dump(self): + # Test the dump a single value of each supported type + for value in (dict(), list(), tuple(), + int(), float(), str(), unicode(), bool(), None): + out = miniconf.dump({'spam': value}) + self.assertEqual(out, 'spam = %s' % repr(value)) + + def test_recursive_dump(self): + # Test the dump of an arbitrary, nested structure + val = [0, {1: [2, (3, 4)]}] + out = miniconf.dump({ 'spam': val}) + self.assertEqual(out, 'spam = %s' % repr(val)) + +class ValuesTestCase(unittest.TestCase): + # Test dump(load(val)) more extensively (some ideas and code borrowed from + # test_marshal.py) + + def test_ints(self): + # Test the full range of Python ints + n = sys.maxint + while n: + for expected in (-n, n): + s = miniconf.dump({'spam' : expected}) + got = miniconf.load(s)['spam'] + self.assertEqual(expected, got) + n = n >> 1 + + def test_bool_and_none(self): + # Test booleans and None + for expected in (True, False, None): + s = miniconf.dump({'spam': expected}) + got = miniconf.load(s)['spam'] + self.assertEqual(expected, got) + + def test_float(self): + # Test a few floats + small = 1e-25 + n = sys.maxint * 3.7e250 + while n > small: + for expected in (-n, n): + f = float(expected) + s = miniconf.dump({'spam': f}) + got = miniconf.load(s)['spam'] + self.assertEqual(f, got) + n /= 123.4567 + + n = sys.maxint * 3.7e-250 + while n < small: + for expected in (-n, n): + f = float(expected) + + s = miniconf.dump({'spam': f}) + got = miniconf.load(s)['spam'] + n *= 123.4567 + + def test_string(self): + # Test some random plain and unicode strings + for expected in [ "", 'Test \xcb', '-' * 400 ]: + s = miniconf.dump({'spam': expected}) + got = miniconf.load(s)['spam'] + self.assertEqual(expected, got) + + def test_unicode(self): + # Test some random plain and unicode strings + for expected in [ u"", u'Test \xcb', u'-' * 400 ]: + s = miniconf.dump({'spam': expected}) + got = miniconf.load(s)['spam'] + self.assertEqual(expected, got) + +class StressTestCase(unittest.TestCase): + def test_stress(self): + # Test if we can sucessfully dump, then reload a collection of + # relatively complex nested objects composed of every + # supported types. + + def create(rank = 0, it = 1): + # This is the object creation function: return a data + # dictionary suitable to be fed to dump(). + def ident(iterable): + for k, v in enumerate(iterable): + yield 'spam_%d' % k, v + + types = (dict, list, tuple, int, float, str, unicode, bool) + + if rank < 30: + for i in xrange(it): + t = types[(rank + i)% len(types)] + if t in types[:3]: + size = (rank % 5) + 5 + if t is dict: + var = t(ident(create(rank + i + 1, size))) + else: + var = t(create(rank + i + 1, size)) + yield var + else: + yield t(rank + i) + else: + yield None + + expected = list(create())[0] + s = miniconf.dump(expected) + got = miniconf.load(s) + self.assertEqual(got, expected) + +def test_main(): + test_support.run_unittest(LoadTestCase, + DumpTestCase, + ValuesTestCase, + StressTestCase) + +if __name__ == "__main__": + test_main()