Index: .bzrignore =================================================================== --- .bzrignore (revision 78805) +++ .bzrignore (working copy) @@ -14,6 +14,7 @@ pyconfig.h libpython*.a python.exe +python-gdb.py reflog.txt tags TAGS Index: .hgignore =================================================================== --- .hgignore (revision 78805) +++ .hgignore (working copy) @@ -30,6 +30,7 @@ Modules/config.c Parser/pgen core +python-gdb.py syntax: glob libpython*.a Index: Misc/ACKS =================================================================== --- Misc/ACKS (revision 78805) +++ Misc/ACKS (working copy) @@ -481,6 +481,7 @@ Nick Maclaren Steve Majewski Grzegorz Makarewicz +David Malcolm Ken Manheimer Vladimir Marangozov David Marek Index: Tools/gdb/libpython.py =================================================================== --- Tools/gdb/libpython.py (revision 0) +++ Tools/gdb/libpython.py (revision 0) @@ -0,0 +1,620 @@ +#!/usr/bin/python +''' +From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb +to be extended with Python code e.g. for library-specific data visualizations, +such as for the C++ STL types. + +This python module deals with the case when the process being debugged (the +"inferior process" in gdb parlance) is itself python, or more specifically, +linked against libpython. In this situation, almost every item of data is a +(PyObject*), and having the debugger merely print their addresses is not very +enlightening. + +This module embeds knowledge about the implementation details of libpython so +that we can emit useful visualizations e.g. a string, a list, a dict, a frame +giving file/line information and the state of local variables + +In particular, given a gdb.Value corresponding to a PyObject* in the inferior +process, we can generate a "proxy value" within the gdb process. For example, +given a PyObject* in the inferior process that is in fact a PyListObject* +holding three PyObject* that turn out to be PyStringObject* instances, we can +generate a proxy value within the gdb process that is a list of strings: + ["foo", "bar", "baz"] + +We try to defer all gdb.lookup_type() invocations until as late as possible: +when the /usr/bin/python process starts in the debugger, the libpython.so +hasn't been dynamically loaded yet, so none of the type names are known to +the debugger + +Tested with both libpython2.6 and libpython3.1 +''' + +import gdb + +# Look up the gdb.Type for some standard types: +_type_char_ptr = gdb.lookup_type('char').pointer() # char* +_type_void_ptr = gdb.lookup_type('void').pointer() # void* +_type_size_t = gdb.lookup_type('size_t') + +SIZEOF_VOID_P = _type_void_ptr.sizeof + + +Py_TPFLAGS_HEAPTYPE = (1L << 9) + +Py_TPFLAGS_INT_SUBCLASS = (1L << 23) +Py_TPFLAGS_LONG_SUBCLASS = (1L << 24) +Py_TPFLAGS_LIST_SUBCLASS = (1L << 25) +Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26) +Py_TPFLAGS_STRING_SUBCLASS = (1L << 27) +Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28) +Py_TPFLAGS_DICT_SUBCLASS = (1L << 29) +Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30) +Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31) + + +class NullPyObjectPtr(RuntimeError): + pass + + +def safety_limit(val): + # Given a integer value from the process being debugged, limit it to some + # safety threshold so that arbitrary breakage within said process doesn't + # break the gdb process too much (e.g. sizes of iterations, sizes of lists) + return min(val, 100) + + +def safe_range(val): + # As per range, but don't trust the value too much: cap it to a safety + # threshold in case the data was corrupted + return xrange(safety_limit(val)) + + +class PyObjectPtr(object): + """ + Class wrapping a gdb.Value that's a either a (PyObject*) within the + inferior process, or some subclass pointer e.g. (PyStringObject*) + + There will be a subclass for every refined PyObject type that we care + about. + + Note that at every stage the underlying pointer could be NULL, point + to corrupt data, etc; this is the debugger, after all. + """ + _typename = 'PyObject' + + def __init__(self, gdbval, cast_to=None): + if cast_to: + self._gdbval = gdbval.cast(cast_to) + else: + self._gdbval = gdbval + + def field(self, name): + ''' + Get the gdb.Value for the given field within the PyObject, coping with + some python 2 versus python 3 differences. + + Various libpython types are defined using the "PyObject_HEAD" and + "PyObject_VAR_HEAD" macros. + + In Python 2, this these are defined so that "ob_type" and (for a var + object) "ob_size" are fields of the type in question. + + In Python 3, this is defined as an embedded PyVarObject type thus: + PyVarObject ob_base; + so that the "ob_size" field is located insize the "ob_base" field, and + the "ob_type" is most easily accessed by casting back to a (PyObject*). + ''' + if self.is_null(): + raise NullPyObjectPtr(self) + + if name == 'ob_type': + pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type()) + return pyo_ptr.dereference()[name] + + if name == 'ob_size': + try: + # Python 2: + return self._gdbval.dereference()[name] + except RuntimeError: + # Python 3: + return self._gdbval.dereference()['ob_base'][name] + + # General case: look it up inside the object: + return self._gdbval.dereference()[name] + + def type(self): + return PyTypeObjectPtr(self.field('ob_type')) + + def is_null(self): + return 0 == long(self._gdbval) + + def safe_tp_name(self): + try: + return self.type().field('tp_name').string() + except NullPyObjectPtr: + # NULL tp_name? + return 'unknown' + except RuntimeError: + # Can't even read the object at all? + return 'unknown' + + def proxyval(self): + ''' + Scrape a value from the inferior process, and try to represent it + within the gdb process, whilst (hopefully) avoiding crashes when + the remote data is corrupt. + + Derived classes will override this. + + For example, a PyIntObject* with ob_ival 42 in the inferior process + should result in an int(42) in this process. + ''' + + class FakeRepr(object): + """ + Class representing a non-descript PyObject* value in the inferior + process for when we don't have a custom scraper, intended to have + a sane repr(). + """ + + def __init__(self, tp_name, address): + self.tp_name = tp_name + self.address = address + + def __repr__(self): + return '<%s at remote 0x%x>' % (self.tp_name, self.address) + + return FakeRepr(self.safe_tp_name(), + long(self._gdbval)) + + @classmethod + def subclass_from_type(cls, t): + ''' + Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a + (PyTypeObject*), determine the corresponding subclass of PyObjectPtr + to use + + Ideally, we would look up the symbols for the global types, but that + isn't working yet: + (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value + Traceback (most recent call last): + File "", line 1, in + NotImplementedError: Symbol type not yet supported in Python scripts. + Error while executing Python code. + + For now, we use tp_flags, after doing some string comparisons on the + tp_name for some special-cases that don't seem to be visible through + flags + ''' + try: + tp_name = t.field('tp_name').string() + tp_flags = int(t.field('tp_flags')) + except RuntimeError: + # Handle any kind of error e.g. NULL ptrs by simply using the base + # class + return cls + + #print 'tp_flags = 0x%08x' % tp_flags + #print 'tp_name = %r' % tp_name + + name_map = {'bool': PyBoolObjectPtr, + 'classobj': PyClassObjectPtr, + 'instance': PyInstanceObjectPtr, + 'NoneType': PyNoneStructPtr, + 'frame': PyFrameObjectPtr, + } + if tp_name in name_map: + return name_map[tp_name] + + if tp_flags & Py_TPFLAGS_HEAPTYPE: + return HeapTypeObjectPtr + + if tp_flags & Py_TPFLAGS_INT_SUBCLASS: + return PyIntObjectPtr + if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: + return PyLongObjectPtr + if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: + return PyListObjectPtr + if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: + return PyTupleObjectPtr + if tp_flags & Py_TPFLAGS_STRING_SUBCLASS: + return PyStringObjectPtr + if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: + return PyUnicodeObjectPtr + if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: + return PyDictObjectPtr + #if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: + # return something + #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS: + # return PyTypeObjectPtr + + # Use the base class: + return cls + + @classmethod + def from_pyobject_ptr(cls, gdbval): + ''' + Try to locate the appropriate derived class dynamically, and cast + the pointer accordingly. + ''' + try: + p = PyObjectPtr(gdbval) + cls = cls.subclass_from_type(p.type()) + return cls(gdbval, cast_to=cls.get_gdb_type()) + except RuntimeError: + # Handle any kind of error e.g. NULL ptrs by simply using the base + # class + pass + return cls(gdbval) + + @classmethod + def get_gdb_type(cls): + return gdb.lookup_type(cls._typename).pointer() + + +class InstanceProxy(object): + + def __init__(self, cl_name, attrdict, address): + self.cl_name = cl_name + self.attrdict = attrdict + self.address = address + + def __repr__(self): + if isinstance(self.attrdict, dict): + kwargs = ', '.join(["%s=%r" % (arg, val) + for arg, val in self.attrdict.iteritems()]) + return '<%s(%s) at remote 0x%x>' % (self.cl_name, + kwargs, self.address) + else: + return '<%s at remote 0x%x>' % (self.cl_name, + self.address) + + +def _PyObject_VAR_SIZE(typeobj, nitems): + return ( ( typeobj.field('tp_basicsize') + + nitems * typeobj.field('tp_itemsize') + + (SIZEOF_VOID_P - 1) + ) & ~(SIZEOF_VOID_P - 1) + ).cast(_type_size_t) + +class HeapTypeObjectPtr(PyObjectPtr): + _typename = 'PyObject' + + def proxyval(self): + ''' + Support for new-style classes. + + Currently we just locate the dictionary using _PyObject_GetDictPtr, + ignoring descriptors + ''' + attr_dict = {} + + try: + typeobj = self.type() + dictoffset = int_from_int(typeobj.field('tp_dictoffset')) + if dictoffset != 0: + if dictoffset < 0: + type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() + tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) + if tsize < 0: + tsize = -tsize + size = _PyObject_VAR_SIZE(typeobj, tsize) + dictoffset += size + assert dictoffset > 0 + assert dictoffset % SIZEOF_VOID_P == 0 + + dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset + PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() + dictptr = dictptr.cast(PyObjectPtrPtr) + attr_dict = PyObjectPtr.from_pyobject_ptr(dictptr.dereference()).proxyval() + except RuntimeError: + # Corrupt data somewhere; fail safe + pass + + tp_name = self.safe_tp_name() + + # New-style class: + return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) + + +class PyBoolObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two + instances (Py_True/Py_False) within the process being debugged. + """ + _typename = 'PyBoolObject' + + def proxyval(self): + if int_from_int(self.field('ob_ival')): + return True + else: + return False + + +class PyClassObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyClassObject* i.e. a + instance within the process being debugged. + """ + _typename = 'PyClassObject' + + +class PyCodeObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyCodeObject* i.e. a instance + within the process being debugged. + """ + _typename = 'PyCodeObject' + + +class PyDictObjectPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance + within the process being debugged. + """ + _typename = 'PyDictObject' + + def proxyval(self): + result = {} + for i in safe_range(self.field('ma_mask') + 1): + ep = self.field('ma_table') + i + pvalue = PyObjectPtr.from_pyobject_ptr(ep['me_value']) + if not pvalue.is_null(): + pkey = PyObjectPtr.from_pyobject_ptr(ep['me_key']) + result[pkey.proxyval()] = pvalue.proxyval() + return result + + +class PyInstanceObjectPtr(PyObjectPtr): + _typename = 'PyInstanceObject' + + def proxyval(self): + # Get name of class: + in_class = PyObjectPtr.from_pyobject_ptr(self.field('in_class')) + cl_name = PyObjectPtr.from_pyobject_ptr(in_class.field('cl_name')).proxyval() + + # Get dictionary of instance attributes: + in_dict = PyObjectPtr.from_pyobject_ptr(self.field('in_dict')).proxyval() + + # Old-style class: + return InstanceProxy(cl_name, in_dict, long(self._gdbval)) + + +class PyIntObjectPtr(PyObjectPtr): + _typename = 'PyIntObject' + + def proxyval(self): + result = int_from_int(self.field('ob_ival')) + return result + + +class PyListObjectPtr(PyObjectPtr): + _typename = 'PyListObject' + + def __getitem__(self, i): + # Get the gdb.Value for the (PyObject*) with the given index: + field_ob_item = self.field('ob_item') + return field_ob_item[i] + + def proxyval(self): + result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval() + for i in safe_range(int_from_int(self.field('ob_size')))] + return result + + +class PyLongObjectPtr(PyObjectPtr): + _typename = 'PyLongObject' + + def proxyval(self): + ''' + Python's Include/longobjrep.h has this declaration: + struct _longobject { + PyObject_VAR_HEAD + digit ob_digit[1]; + }; + + with this description: + The absolute value of a number is equal to + SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) + Negative numbers are represented with ob_size < 0; + zero is represented by ob_size == 0. + + where SHIFT can be either: + #define PyLong_SHIFT 30 + #define PyLong_SHIFT 15 + ''' + ob_size = long(self.field('ob_size')) + if ob_size == 0: + return 0L + + ob_digit = self.field('ob_digit') + + if gdb.lookup_type('digit').sizeof == 2: + SHIFT = 15L + else: + # FIXME: I haven't yet tested this case + SHIFT = 30L + + digits = [long(ob_digit[i]) * 2**(SHIFT*i) + for i in safe_range(abs(ob_size))] + result = sum(digits) + if ob_size < 0: + result = -result + return result + + +class PyNoneStructPtr(PyObjectPtr): + """ + Class wrapping a gdb.Value that's a PyObject* pointing to the + singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type + """ + _typename = 'PyObject' + + def proxyval(self): + return None + + +class PyFrameObjectPtr(PyObjectPtr): + _typename = 'PyFrameObject' + + def __str__(self): + fi = FrameInfo(self) + return str(fi) + + +class PyStringObjectPtr(PyObjectPtr): + _typename = 'PyStringObject' + + def __str__(self): + field_ob_sval = self.field('ob_sval') + char_ptr = field_ob_sval.address.cast(_type_char_ptr) + return char_ptr.string() + + def proxyval(self): + return str(self) + + +class PyTupleObjectPtr(PyObjectPtr): + _typename = 'PyTupleObject' + + def __getitem__(self, i): + # Get the gdb.Value for the (PyObject*) with the given index: + field_ob_item = self.field('ob_item') + return field_ob_item[i] + + def proxyval(self): + result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval() + for i in safe_range(int_from_int(self.field('ob_size')))]) + return result + + +class PyTypeObjectPtr(PyObjectPtr): + _typename = 'PyTypeObject' + + +class PyUnicodeObjectPtr(PyObjectPtr): + _typename = 'PyUnicodeObject' + + def proxyval(self): + # From unicodeobject.h: + # Py_ssize_t length; /* Length of raw Unicode data in buffer */ + # Py_UNICODE *str; /* Raw Unicode buffer */ + field_length = long(self.field('length')) + field_str = self.field('str') + + # Gather a list of ints from the Py_UNICODE array; these are either + # UCS-2 or UCS-4 code points: + Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] + + # Convert the int code points to unicode characters, and generate a + # local unicode instance: + result = u''.join([unichr(ucs) for ucs in Py_UNICODEs]) + return result + + +def int_from_int(gdbval): + return int(str(gdbval)) + + +def stringify(val): + # TODO: repr() puts everything on one line; pformat can be nicer, but + # can lead to v.long results; this function isolates the choice + if True: + return repr(val) + else: + from pprint import pformat + return pformat(val) + + +class FrameInfo: + ''' + Class representing all of the information we can scrape about a + PyFrameObject* + ''' + def __init__(self, fval): + self.fval = fval + self.co = PyCodeObjectPtr.from_pyobject_ptr(fval.field('f_code')) + self.co_name = PyObjectPtr.from_pyobject_ptr(self.co.field('co_name')) + self.co_filename = PyObjectPtr.from_pyobject_ptr(self.co.field('co_filename')) + self.f_lineno = int_from_int(fval.field('f_lineno')) + self.co_nlocals = int_from_int(self.co.field('co_nlocals')) + self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) + self.locals = [] # list of kv pairs + f_localsplus = self.fval.field('f_localsplus') + for i in safe_range(self.co_nlocals): + #print 'i=%i' % i + value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) + if not value.is_null(): + name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i]) + #print 'name=%s' % name + value = value.proxyval() + #print 'value=%s' % value + self.locals.append((str(name), value)) + + def __str__(self): + return ('File %s, line %i, in %s (%s)' + % (self.co_filename, + self.f_lineno, + self.co_name, + ', '.join(['%s=%s' % (k, stringify(v)) for k, v in self.locals])) + ) + + +class PyObjectPtrPrinter: + "Prints a (PyObject*)" + + def __init__ (self, gdbval): + self.gdbval = gdbval + + def to_string (self): + proxyval = PyObjectPtr.from_pyobject_ptr(self.gdbval).proxyval() + return stringify(proxyval) + + +class PyFrameObjectPtrPrinter(PyObjectPtrPrinter): + "Prints a (PyFrameObject*)" + + def to_string (self): + pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) + fi = FrameInfo(pyop) + return str(fi) + + +def pretty_printer_lookup(gdbval): + type = gdbval.type.unqualified() + if type.code == gdb.TYPE_CODE_PTR: + type = type.target().unqualified() + t = str(type) + if t == "PyObject": + return PyObjectPtrPrinter(gdbval) + elif t == "PyFrameObject": + return PyFrameObjectPtrPrinter(gdbval) + + +""" +During development, I've been manually invoking the code in this way: +(gdb) python + +import sys +sys.path.append('/home/david/coding/python-gdb') +import libpython +end + +then reloading it after each edit like this: +(gdb) python reload(libpython) + +The following code should ensure that the prettyprinter is registered +if the code is autoloaded by gdb when visiting libpython.so, provided +that this python file is installed to the same path as the library (or its +.debug file) plus a "-gdb.py" suffix, e.g: + /usr/lib/libpython2.6.so.1.0-gdb.py + /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py +""" +def register (obj): + if obj == None: + obj = gdb + + # Wire up the pretty-printer + obj.pretty_printers.append(pretty_printer_lookup) + +register (gdb.current_objfile ()) Index: Tools/README =================================================================== --- Tools/README (revision 78805) +++ Tools/README (working copy) @@ -20,6 +20,9 @@ freeze Create a stand-alone executable from a Python program. +gdb Python code to be run inside gdb, to make it easier to + debug Python itself (by David Malcolm). + i18n Tools for internationalization. pygettext.py parses Python source code and generates .pot files, and msgfmt.py generates a binary message catalog Index: Lib/test/test_gdb.py =================================================================== --- Lib/test/test_gdb.py (revision 0) +++ Lib/test/test_gdb.py (revision 0) @@ -0,0 +1,287 @@ +# Verify that gdb can pretty-print the various PyObject* types +# +# The code for testing gdb was adapted from similar work in Unladen Swallow's +# Lib/test/test_jit_gdb.py + +import os +import re +import subprocess +import sys +import unittest + +from test.test_support import run_unittest + +try: + gdb_version, _ = subprocess.Popen(["gdb", "--version"], + stdout=subprocess.PIPE).communicate() +except OSError: + # This is what "no gdb" looks like. There may, however, be other + # errors that manifest this way too. + raise unittest.SkipTest("Couldn't find gdb on the path") +gdb_version_number = re.search(r"^GNU gdb [^\d]*(\d+)\.", gdb_version) +if int(gdb_version_number.group(1)) < 7: + raise unittest.SkipTest("gdb versions before 7.0 didn't support python embedding" + " Saw:\n" + gdb_version) + +# Verify that "gdb" was built with the embedded python support enabled: +cmd = "--eval-command=python import sys; print sys.version_info" +p = subprocess.Popen(["gdb", "--batch", cmd], + stdout=subprocess.PIPE) +gdbpy_version, _ = p.communicate() +if gdbpy_version == '': + raise unittest.SkipTest("gdb not built with embedded python support") + + +class DebuggerTests(unittest.TestCase): + + """Test that the debugger can debug Python.""" + + def run_gdb(self, *args): + """Runs gdb with the command line given by *args. + + Returns its stdout, stderr + """ + out, err = subprocess.Popen( + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + ).communicate() + return out, err + + def get_stack_trace(self, source, breakpoint='PyObject_Print', + commands_after_breakpoint=None, + import_site=False): + ''' + Run 'python -c SOURCE' under gdb with a breakpoint. + + Support injecting commands after the breakpoint is reached + + Returns the stdout from gdb + + commands_after_breakpoint: if provided, a list of strings: gdb commands + ''' + # We use "set breakpoint pending yes" to avoid blocking with a: + # Function "foo" not defined. + # Make breakpoint pending on future shared library load? (y or [n]) + # error, which typically happens python is dynamically linked (the + # breakpoints of interest are to be found in the shared library) + # When this happens, we still get: + # Function "PyObject_Print" not defined. + # emitted to stderr each time, alas. + + # Initially I had "--eval-command=continue" here, but removed it to + # avoid repeated print breakpoints when traversing hierarchical data + # structures + + # Generate a list of commands in gdb's language: + commands = ['set breakpoint pending yes', + 'break %s' % breakpoint, + 'run'] + if commands_after_breakpoint: + commands += commands_after_breakpoint + commands += ['backtrace'] + + # print commands + + # Use "commands" to generate the arguments with which to invoke "gdb": + args = ["gdb", "--batch"] + args += ['--eval-command=%s' % cmd for cmd in commands] + args += ["--args", + sys.executable] + + if not import_site: + # -S suppresses the default 'import site' + args += ["-S"] + + args += ["-c", source] + + # print args + + # Use "args" to invoke gdb, capturing stdout, stderr: + out, err = self.run_gdb(*args) + + # Ignore some noise on stderr due to the pending breakpoint: + err = err.replace('Function "%s" not defined.\n' % breakpoint, '') + + # Ensure no unexpected error messages: + self.assertEquals(err, '') + + return out + + def get_gdb_repr(self, source, + commands_after_breakpoint=None, + import_site=False): + # Given an input python source representation of data, + # run "python -c'print DATA'" under gdb with a breakpoint on + # PyObject_Print and scrape out gdb's representation of the "op" + # parameter, and verify that the gdb displays the same string + # + # For a nested structure, the first time we hit the breakpoint will + # give us the top-level structure + gdb_output = self.get_stack_trace(source, 'PyObject_Print', + commands_after_breakpoint, + import_site) + m = re.match('.*#0 PyObject_Print \(op\=(.*?), fp=.*\).*', + gdb_output, re.DOTALL) + #print m.groups() + return m.group(1), gdb_output + + def test_getting_backtrace(self): + gdb_output = self.get_stack_trace('print 42') + self.assertTrue('PyObject_Print' in gdb_output) + + def assertGdbRepr(self, val, commands_after_breakpoint=None): + # Ensure that gdb's rendering of the value in a debugged process + # matches repr(value) in this process: + gdb_repr, gdb_output = self.get_gdb_repr('print ' + repr(val), + commands_after_breakpoint) + self.assertEquals(gdb_repr, repr(val), gdb_output) + + def test_int(self): + self.assertGdbRepr(42) + self.assertGdbRepr(0) + self.assertGdbRepr(-7) + self.assertGdbRepr(sys.maxint) + self.assertGdbRepr(-sys.maxint) + + def test_long(self): + self.assertGdbRepr(0L) + self.assertGdbRepr(1000000000000L) + self.assertGdbRepr(-1L) + self.assertGdbRepr(-1000000000000000L) + + def test_singletons(self): + self.assertGdbRepr(True) + self.assertGdbRepr(False) + self.assertGdbRepr(None) + + def test_dicts(self): + self.assertGdbRepr({}) + self.assertGdbRepr({'foo': 'bar'}) + + def test_lists(self): + self.assertGdbRepr([]) + self.assertGdbRepr(range(5)) + + def test_strings(self): + self.assertGdbRepr('') + self.assertGdbRepr('And now for something hopefully the same') + + def test_tuples(self): + self.assertGdbRepr(tuple()) + self.assertGdbRepr((1,)) + + def test_unicode(self): + self.assertGdbRepr(u'hello world') + self.assertGdbRepr(u'\u2620') + + def test_classic_class(self): + gdb_repr, gdb_output = self.get_gdb_repr(''' +class Foo: + pass +foo = Foo() +foo.an_int = 42 +print foo''') + m = re.match(r'', gdb_repr) + self.assertTrue(m, + msg='Unexpected classic-class rendering %r' % gdb_repr) + + def test_modern_class(self): + gdb_repr, gdb_output = self.get_gdb_repr(''' +class Foo(object): + pass +foo = Foo() +foo.an_int = 42 +print foo''') + m = re.match(r'', gdb_repr) + self.assertTrue(m, + msg='Unexpected new-style class rendering %r' % gdb_repr) + + def test_subclassing_list(self): + gdb_repr, gdb_output = self.get_gdb_repr(''' +class Foo(list): + pass +foo = Foo() +foo += [1, 2, 3] +foo.an_int = 42 +print foo''') + m = re.match(r'', gdb_repr) + self.assertTrue(m, + msg='Unexpected new-style class rendering %r' % gdb_repr) + + def test_subclassing_tuple(self): + '''This should exercise the negative tp_dictoffset code in the + new-style class support''' + gdb_repr, gdb_output = self.get_gdb_repr(''' +class Foo(tuple): + pass +foo = Foo((1, 2, 3)) +foo.an_int = 42 +print foo''') + m = re.match(r'', gdb_repr) + self.assertTrue(m, + msg='Unexpected new-style class rendering %r' % gdb_repr) + + def assertSane(self, source, corruption, exp_type='unknown'): + '''Run Python under gdb, corrupting variables in the inferior process + immediately before taking a backtrace. + + Verify that the variable's representation is the expected failsafe + representation''' + gdb_repr, gdb_output = \ + self.get_gdb_repr(source, + commands_after_breakpoint=[corruption]) + self.assertTrue(re.match('<%s at remote 0x[0-9a-f]+>' % exp_type, + gdb_repr), + 'Unexpected gdb representation: %r\n%s' % \ + (gdb_repr, gdb_output)) + + def test_NULL_ptr(self): + 'Ensure that a NULL PyObject* is handled gracefully' + self.assertSane('print 42', + 'set variable op=0') + + def test_NULL_ob_type(self): + self.assertSane('print 42', + 'set op->ob_type=0') + + def test_corrupt_ob_type(self): + self.assertSane('print 42', + 'set op->ob_type=0xDEADBEEF') + + def test_corrupt_tp_flags(self): + self.assertSane('print 42', + 'set op->ob_type->tp_flags=0x0', + exp_type='int') + + def test_corrupt_tp_name(self): + self.assertSane('print 42', + 'set op->ob_type->tp_name=0xDEADBEEF') + + def test_NULL_instance_dict(self): + self.assertSane(''' +class Foo: + pass +foo = Foo() +foo.an_int = 42 +print foo''', + 'set ((PyInstanceObject*)op)->in_dict = 0', + exp_type='Foo') + + def test_builtins_help(self): + # Ensure that the new-style class _Helper in site.py can be handled + # (this was the issue causing tracebacks in + # http://bugs.python.org/issue8032#msg100537 ) + + gdb_repr, gdb_output = self.get_gdb_repr('print __builtins__.help', import_site=True) + m = re.match(r'<_Helper at remote 0x[0-9a-f]+>', gdb_repr) + self.assertTrue(m, + msg='Unexpected rendering %r' % gdb_repr) + + # TODO: + # frames + + +def test_main(): + run_unittest(DebuggerTests) + +if __name__ == "__main__": + test_main() Index: Makefile.pre.in =================================================================== --- Makefile.pre.in (revision 78805) +++ Makefile.pre.in (working copy) @@ -359,7 +359,7 @@ # Default target all: build_all -build_all: $(BUILDPYTHON) oldsharedmods sharedmods +build_all: $(BUILDPYTHON) oldsharedmods sharedmods gdbhooks # Compile a binary with gcc profile guided optimization. profile-opt: @@ -432,6 +432,16 @@ libpython$(VERSION).sl: $(LIBRARY_OBJS) $(LDSHARED) $(LDFLAGS) -o $@ $(LIBRARY_OBJS) $(MODLIBS) $(SHLIBS) $(LIBC) $(LIBM) $(LDLAST) +# Copy up the gdb python hooks into a position where they can be automatically +# loaded by gdb during Lib/test/test_gdb.py +# +# Distributors are likely to want to install this somewhere else e.g. relative +# to the stripped DWARF data for the shared library. +gdbhooks: $(BUILDPYTHON)-gdb.py + +$(BUILDPYTHON)-gdb.py: Tools/gdb/libpython.py + $(INSTALL_SCRIPT) $< $(BUILDPYTHON)-gdb.py + # This rule is here for OPENSTEP/Rhapsody/MacOSX. It builds a temporary # minimal framework (not including the Lib directory and such) in the current # directory. @@ -1233,5 +1243,6 @@ .PHONY: frameworkinstallmaclib frameworkinstallapps frameworkinstallunixtools .PHONY: frameworkaltinstallunixtools recheck autoconf clean clobber distclean .PHONY: smelly funny patchcheck +.PHONY: gdbhooks # IF YOU PUT ANYTHING HERE IT WILL GO AWAY