Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(64)

Side by Side Diff: Lib/dis.py

Issue 11816: Add functions to return disassembly as string
Patch Set: Created 1 year, 5 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Doc/whatsnew/3.3.rst ('k') | Lib/test/bytecode_helper.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 """Disassembler of Python byte code into mnemonics.""" 1 """Disassembler of Python byte code into mnemonics."""
2 2
3 import sys 3 import sys
4 import types 4 import types
5 import collections
5 6
6 from opcode import * 7 from opcode import *
7 from opcode import __all__ as _opcodes_all 8 from opcode import __all__ as _opcodes_all
8 9
9 __all__ = ["code_info", "dis", "disassemble", "distb", "disco", 10 __all__ = ["code_info", "dis", "disassemble", "distb", "disco",
10 "findlinestarts", "findlabels", "show_code"] + _opcodes_all 11 "findlinestarts", "findlabels", "show_code",
12 "get_instructions", "Instruction"] + _opcodes_all
11 del _opcodes_all 13 del _opcodes_all
12 14
13 _have_code = (types.MethodType, types.FunctionType, types.CodeType, type) 15 _have_code = (types.MethodType, types.FunctionType, types.CodeType, type)
14 16
15 def _try_compile(source, name): 17 def _try_compile(source, name):
16 """Attempts to compile the given source, first as an expression and 18 """Attempts to compile the given source, first as an expression and
17 then as a statement if the first approach fails. 19 then as a statement if the first approach fails.
18 20
19 Utility function to accept strings in functions that otherwise 21 Utility function to accept strings in functions that otherwise
20 expect code objects 22 expect code objects
21 """ 23 """
22 try: 24 try:
23 c = compile(source, name, 'eval') 25 c = compile(source, name, 'eval')
24 except SyntaxError: 26 except SyntaxError:
25 c = compile(source, name, 'exec') 27 c = compile(source, name, 'exec')
26 return c 28 return c
27 29
28 def dis(x=None): 30 def dis(x=None, *, file=None):
29 """Disassemble classes, methods, functions, or code. 31 """Disassemble classes, methods, functions, or code.
30 32
31 With no argument, disassemble the last traceback. 33 With no argument, disassemble the last traceback.
32 34
33 """ 35 """
34 if x is None: 36 if x is None:
35 distb() 37 distb()
36 return 38 return
37 if hasattr(x, '__func__'): # Method 39 if hasattr(x, '__func__'): # Method
38 x = x.__func__ 40 x = x.__func__
39 if hasattr(x, '__code__'): # Function 41 if hasattr(x, '__code__'): # Function
40 x = x.__code__ 42 x = x.__code__
41 if hasattr(x, '__dict__'): # Class or module 43 if hasattr(x, '__dict__'): # Class or module
42 items = sorted(x.__dict__.items()) 44 items = sorted(x.__dict__.items())
43 for name, x1 in items: 45 for name, x1 in items:
44 if isinstance(x1, _have_code): 46 if isinstance(x1, _have_code):
45 print("Disassembly of %s:" % name) 47 print("Disassembly of %s:" % name, file=file)
46 try: 48 try:
47 dis(x1) 49 dis(x1)
48 except TypeError as msg: 50 except TypeError as msg:
49 print("Sorry:", msg) 51 print("Sorry:", msg, file=file)
50 print() 52 print(file=file)
51 elif hasattr(x, 'co_code'): # Code object 53 elif hasattr(x, 'co_code'): # Code object
52 disassemble(x) 54 disassemble(x, file=file)
53 elif isinstance(x, (bytes, bytearray)): # Raw bytecode 55 elif isinstance(x, (bytes, bytearray)): # Raw bytecode
54 _disassemble_bytes(x) 56 _disassemble_bytes(x, file=file)
55 elif isinstance(x, str): # Source code 57 elif isinstance(x, str): # Source code
56 _disassemble_str(x) 58 _disassemble_str(x, file=file)
57 else: 59 else:
58 raise TypeError("don't know how to disassemble %s objects" % 60 raise TypeError("don't know how to disassemble %s objects" %
59 type(x).__name__) 61 type(x).__name__)
60 62
61 def distb(tb=None): 63 def distb(tb=None, *, file=None):
62 """Disassemble a traceback (default: last traceback).""" 64 """Disassemble a traceback (default: last traceback)."""
63 if tb is None: 65 if tb is None:
64 try: 66 try:
65 tb = sys.last_traceback 67 tb = sys.last_traceback
66 except AttributeError: 68 except AttributeError:
67 raise RuntimeError("no last traceback to disassemble") 69 raise RuntimeError("no last traceback to disassemble")
68 while tb.tb_next: tb = tb.tb_next 70 while tb.tb_next: tb = tb.tb_next
69 disassemble(tb.tb_frame.f_code, tb.tb_lasti) 71 disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
70 72
71 # The inspect module interrogates this dictionary to build its 73 # The inspect module interrogates this dictionary to build its
72 # list of CO_* constants. It is also used by pretty_flags to 74 # list of CO_* constants. It is also used by pretty_flags to
73 # turn the co_flags field into a human readable list. 75 # turn the co_flags field into a human readable list.
74 COMPILER_FLAG_NAMES = { 76 COMPILER_FLAG_NAMES = {
75 1: "OPTIMIZED", 77 1: "OPTIMIZED",
76 2: "NEWLOCALS", 78 2: "NEWLOCALS",
77 4: "VARARGS", 79 4: "VARARGS",
78 8: "VARKEYWORDS", 80 8: "VARKEYWORDS",
79 16: "NESTED", 81 16: "NESTED",
80 32: "GENERATOR", 82 32: "GENERATOR",
81 64: "NOFREE", 83 64: "NOFREE",
82 } 84 }
83 85
84 def pretty_flags(flags): 86 def pretty_flags(flags):
85 """Return pretty representation of code flags.""" 87 """Return pretty representation of code flags."""
86 names = [] 88 names = []
87 for i in range(32): 89 for i in range(32):
88 flag = 1<<i 90 flag = 1<<i
89 if flags & flag: 91 if flags & flag:
90 names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag))) 92 names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
91 flags ^= flag 93 flags ^= flag
92 if not flags: 94 if not flags:
93 break 95 break
94 else: 96 else:
95 names.append(hex(flags)) 97 names.append(hex(flags))
96 return ", ".join(names) 98 return ", ".join(names)
97 99
98 def code_info(x): 100 def _get_code_object(x):
99 """Formatted details of methods, functions, or code.""" 101 """Helper to handle methods, functions, strings and raw code objects"""
100 if hasattr(x, '__func__'): # Method 102 if hasattr(x, '__func__'): # Method
101 x = x.__func__ 103 x = x.__func__
102 if hasattr(x, '__code__'): # Function 104 if hasattr(x, '__code__'): # Function
103 x = x.__code__ 105 x = x.__code__
104 if isinstance(x, str): # Source code 106 if isinstance(x, str): # Source code
105 x = _try_compile(x, "<code_info>") 107 x = _try_compile(x, "<disassembly>")
106 if hasattr(x, 'co_code'): # Code object 108 if hasattr(x, 'co_code'): # Code object
107 return _format_code_info(x) 109 return x
108 else: 110 raise TypeError("don't know how to disassemble %s objects" %
109 raise TypeError("don't know how to disassemble %s objects" % 111 type(x).__name__)
110 type(x).__name__) 112
113 def code_info(x):
114 """Formatted details of methods, functions, or code."""
115 return _format_code_info(_get_code_object(x))
111 116
112 def _format_code_info(co): 117 def _format_code_info(co):
113 lines = [] 118 lines = []
114 lines.append("Name: %s" % co.co_name) 119 lines.append("Name: %s" % co.co_name)
115 lines.append("Filename: %s" % co.co_filename) 120 lines.append("Filename: %s" % co.co_filename)
116 lines.append("Argument count: %s" % co.co_argcount) 121 lines.append("Argument count: %s" % co.co_argcount)
117 lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount) 122 lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
118 lines.append("Number of locals: %s" % co.co_nlocals) 123 lines.append("Number of locals: %s" % co.co_nlocals)
119 lines.append("Stack size: %s" % co.co_stacksize) 124 lines.append("Stack size: %s" % co.co_stacksize)
120 lines.append("Flags: %s" % pretty_flags(co.co_flags)) 125 lines.append("Flags: %s" % pretty_flags(co.co_flags))
(...skipping 12 matching lines...) Expand all
133 if co.co_freevars: 138 if co.co_freevars:
134 lines.append("Free variables:") 139 lines.append("Free variables:")
135 for i_n in enumerate(co.co_freevars): 140 for i_n in enumerate(co.co_freevars):
136 lines.append("%4d: %s" % i_n) 141 lines.append("%4d: %s" % i_n)
137 if co.co_cellvars: 142 if co.co_cellvars:
138 lines.append("Cell variables:") 143 lines.append("Cell variables:")
139 for i_n in enumerate(co.co_cellvars): 144 for i_n in enumerate(co.co_cellvars):
140 lines.append("%4d: %s" % i_n) 145 lines.append("%4d: %s" % i_n)
141 return "\n".join(lines) 146 return "\n".join(lines)
142 147
143 def show_code(co): 148 def show_code(co, *, file=None):
144 """Print details of methods, functions, or code to stdout.""" 149 """Print details of methods, functions, or code to stdout."""
145 print(code_info(co)) 150 print(code_info(co), file=file)
146 151
147 def disassemble(co, lasti=-1): 152 _Instruction = collections.namedtuple("_Instruction",
148 """Disassemble a code object.""" 153 "opname opcode arg argval argrepr offset starts_line is_jump_target")
149 code = co.co_code 154
155 class Instruction(_Instruction):
156 """Details for a bytecode operation
157
158 Defined fields:
159 opname - human readable name for operation
160 opcode - numeric code for operation
161 arg - numeric argument to operation (if any), otherwise None
162 argval - resolved arg value (if known), otherwise same as arg
163 argrepr - human readable description of operation argument
164 offset - start index of operation within bytecode sequence
165 starts_line - line started by this opcode (if any), otherwise None
166 is_jump_target - True if other code jumps to here, otherwise False
167 """
168
169 def _disassemble(self, lineno_width=3, mark_as_current=False):
170 """Format instruction details for inclusion in disassembly output
171
172 *lineno_width* sets the width of the line number field (0 omits it)
173 *mark_as_current* inserts a '-->' marker arrow as part of the line
174 """
175 fields = []
176 # Column: Source code line number
177 if lineno_width:
178 if self.starts_line is not None:
179 lineno_fmt = "%%%dd" % lineno_width
180 fields.append(lineno_fmt % self.starts_line)
181 else:
182 fields.append(' ' * lineno_width)
183 # Column: Current instruction indicator
184 if mark_as_current:
185 fields.append('-->')
186 else:
187 fields.append(' ')
188 # Column: Jump target marker
189 if self.is_jump_target:
190 fields.append('>>')
191 else:
192 fields.append(' ')
193 # Column: Instruction offset from start of code sequence
194 fields.append(repr(self.offset).rjust(4))
195 # Column: Opcode name
196 fields.append(self.opname.ljust(20))
197 # Column: Opcode argument
198 if self.arg is not None:
199 fields.append(repr(self.arg).rjust(5))
200 # Column: Opcode argument details
201 if self.argrepr:
202 fields.append('(' + self.argrepr + ')')
203 return ' '.join(fields)
204
205
206 def get_instructions(x, *, line_offset=0):
207 """Iterator for the opcodes in methods, functions or code
208
209 Generates a series of Instruction named tuples giving the details of
210 each operations in the supplied code.
211
212 The given line offset is added to the 'starts_line' attribute of any
213 instructions that start a new line.
214 """
215 co = _get_code_object(x)
216 cell_names = co.co_cellvars + co.co_freevars
217 linestarts = dict(findlinestarts(co))
218 return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
219 co.co_consts, cell_names, linestarts,
220 line_offset)
221
222 def _get_arg_info(arg, info_source):
223 """Helper to get optional details about the operation argument
224
225 Returns the dereferenced argval and its repr() if the info
226 source is defined.
227 Otherwise return the arg and its repr().
228 """
229 argval = arg
230 if info_source is not None:
231 argval = info_source[arg]
232 if isinstance(argval, str):
233 details = argval
234 else:
235 details = repr(argval)
236 return argval, details
237
238
239 def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
240 cells=None, linestarts=None, line_offset=0):
241 """Iterate over the instructions in a bytecode string.
242
243 Generates a sequence of Instruction namedtuples giving the details of each
244 opcode. Additional information about the code's runtime environment
245 (e.g. variable names, constants) can be specified using optional
246 arguments.
247
248 """
150 labels = findlabels(code) 249 labels = findlabels(code)
151 linestarts = dict(findlinestarts(co))
152 n = len(code)
153 i = 0
154 extended_arg = 0 250 extended_arg = 0
251 starts_line = None
155 free = None 252 free = None
156 while i < n: 253 # enumerate() is not an option, since we sometimes process
157 op = code[i] 254 # multiple elements on a single pass through the loop
158 if i in linestarts:
159 if i > 0:
160 print()
161 print("%3d" % linestarts[i], end=' ')
162 else:
163 print(' ', end=' ')
164
165 if i == lasti: print('-->', end=' ')
166 else: print(' ', end=' ')
167 if i in labels: print('>>', end=' ')
168 else: print(' ', end=' ')
169 print(repr(i).rjust(4), end=' ')
170 print(opname[op].ljust(20), end=' ')
171 i = i+1
172 if op >= HAVE_ARGUMENT:
173 oparg = code[i] + code[i+1]*256 + extended_arg
174 extended_arg = 0
175 i = i+2
176 if op == EXTENDED_ARG:
177 extended_arg = oparg*65536
178 print(repr(oparg).rjust(5), end=' ')
179 if op in hasconst:
180 print('(' + repr(co.co_consts[oparg]) + ')', end=' ')
181 elif op in hasname:
182 print('(' + co.co_names[oparg] + ')', end=' ')
183 elif op in hasjrel:
184 print('(to ' + repr(i + oparg) + ')', end=' ')
185 elif op in haslocal:
186 print('(' + co.co_varnames[oparg] + ')', end=' ')
187 elif op in hascompare:
188 print('(' + cmp_op[oparg] + ')', end=' ')
189 elif op in hasfree:
190 if free is None:
191 free = co.co_cellvars + co.co_freevars
192 print('(' + free[oparg] + ')', end=' ')
193 print()
194
195 def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
196 constants=None):
197 labels = findlabels(code)
198 n = len(code) 255 n = len(code)
199 i = 0 256 i = 0
200 while i < n: 257 while i < n:
201 op = code[i] 258 op = code[i]
202 if i == lasti: print('-->', end=' ') 259 offset = i
203 else: print(' ', end=' ') 260 if linestarts is not None:
204 if i in labels: print('>>', end=' ') 261 starts_line = linestarts.get(i, None)
205 else: print(' ', end=' ') 262 if starts_line is not None:
206 print(repr(i).rjust(4), end=' ') 263 starts_line += line_offset
207 print(opname[op].ljust(15), end=' ') 264 is_jump_target = i in labels
208 i = i+1 265 i = i+1
266 arg = None
267 argval = None
268 argrepr = ''
209 if op >= HAVE_ARGUMENT: 269 if op >= HAVE_ARGUMENT:
210 oparg = code[i] + code[i+1]*256 270 arg = code[i] + code[i+1]*256 + extended_arg
271 extended_arg = 0
211 i = i+2 272 i = i+2
212 print(repr(oparg).rjust(5), end=' ') 273 if op == EXTENDED_ARG:
274 extended_arg = arg*65536
275 # Set argval to the dereferenced value of the argument when
276 # availabe, and argrepr to the string representation of argval.
277 # _disassemble_bytes needs the string repr of the
278 # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
279 argval = arg
213 if op in hasconst: 280 if op in hasconst:
214 if constants: 281 argval, argrepr = _get_arg_info(arg, constants)
215 print('(' + repr(constants[oparg]) + ')', end=' ')
216 else:
217 print('(%d)'%oparg, end=' ')
218 elif op in hasname: 282 elif op in hasname:
219 if names is not None: 283 argval, argrepr = _get_arg_info(arg, names)
220 print('(' + names[oparg] + ')', end=' ')
221 else:
222 print('(%d)'%oparg, end=' ')
223 elif op in hasjrel: 284 elif op in hasjrel:
224 print('(to ' + repr(i + oparg) + ')', end=' ') 285 argval = i + arg
286 argrepr = "to " + repr(argval)
225 elif op in haslocal: 287 elif op in haslocal:
226 if varnames: 288 argval, argrepr = _get_arg_info(arg, varnames)
227 print('(' + varnames[oparg] + ')', end=' ')
228 else:
229 print('(%d)' % oparg, end=' ')
230 elif op in hascompare: 289 elif op in hascompare:
231 print('(' + cmp_op[oparg] + ')', end=' ') 290 argval = cmp_op[arg]
232 print() 291 argrepr = argval
292 elif op in hasfree:
293 argval, argrepr = _get_arg_info(arg, cells)
294 yield Instruction(opname[op], op,
295 arg, argval, argrepr,
296 offset, starts_line, is_jump_target)
233 297
234 def _disassemble_str(source): 298 def disassemble(co, lasti=-1, *, file=None):
299 """Disassemble a code object."""
300 cell_names = co.co_cellvars + co.co_freevars
301 linestarts = dict(findlinestarts(co))
302 _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
303 co.co_consts, cell_names, linestarts, file=file)
304
305 def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
306 constants=None, cells=None, linestarts=None,
307 *, file=None):
308 # Omit the line number column entirely if we have no line number info
309 show_lineno = linestarts is not None
310 # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000?
311 lineno_width = 3 if show_lineno else 0
312 for instr in _get_instructions_bytes(code, varnames, names,
313 constants, cells, linestarts):
314 new_source_line = (show_lineno and
315 instr.starts_line is not None and
316 instr.offset > 0)
317 if new_source_line:
318 print(file=file)
319 is_current_instr = instr.offset == lasti
320 print(instr._disassemble(lineno_width, is_current_instr), file=file)
321
322 def _disassemble_str(source, *, file=None):
235 """Compile the source string, then disassemble the code object.""" 323 """Compile the source string, then disassemble the code object."""
236 disassemble(_try_compile(source, '<dis>')) 324 disassemble(_try_compile(source, '<dis>'), file=file)
237 325
238 disco = disassemble # XXX For backwards compatibility 326 disco = disassemble # XXX For backwards compatibility
239 327
240 def findlabels(code): 328 def findlabels(code):
241 """Detect all offsets in a byte code which are jump targets. 329 """Detect all offsets in a byte code which are jump targets.
242 330
243 Return the list of offsets. 331 Return the list of offsets.
244 332
245 """ 333 """
246 labels = [] 334 labels = []
335 # enumerate() is not an option, since we sometimes process
336 # multiple elements on a single pass through the loop
247 n = len(code) 337 n = len(code)
248 i = 0 338 i = 0
249 while i < n: 339 while i < n:
250 op = code[i] 340 op = code[i]
251 i = i+1 341 i = i+1
252 if op >= HAVE_ARGUMENT: 342 if op >= HAVE_ARGUMENT:
253 oparg = code[i] + code[i+1]*256 343 arg = code[i] + code[i+1]*256
254 i = i+2 344 i = i+2
255 label = -1 345 label = -1
256 if op in hasjrel: 346 if op in hasjrel:
257 label = i+oparg 347 label = i+arg
258 elif op in hasjabs: 348 elif op in hasjabs:
259 label = oparg 349 label = arg
260 if label >= 0: 350 if label >= 0:
261 if label not in labels: 351 if label not in labels:
262 labels.append(label) 352 labels.append(label)
263 return labels 353 return labels
264 354
265 def findlinestarts(code): 355 def findlinestarts(code):
266 """Find the offsets in a byte code which are start of lines in the source. 356 """Find the offsets in a byte code which are start of lines in the source.
267 357
268 Generate pairs (offset, lineno) as described in Python/compile.c. 358 Generate pairs (offset, lineno) as described in Python/compile.c.
269 359
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
302 source = f.read() 392 source = f.read()
303 if fn is not None: 393 if fn is not None:
304 f.close() 394 f.close()
305 else: 395 else:
306 fn = "<stdin>" 396 fn = "<stdin>"
307 code = compile(source, fn, "exec") 397 code = compile(source, fn, "exec")
308 dis(code) 398 dis(code)
309 399
310 if __name__ == "__main__": 400 if __name__ == "__main__":
311 _test() 401 _test()
OLDNEW
« no previous file with comments | « Doc/whatsnew/3.3.rst ('k') | Lib/test/bytecode_helper.py » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld cbc36f91f3f7