You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
606 lines
17 KiB
Python
606 lines
17 KiB
Python
"""
|
|
Decompiler that can be used with the debugger (where statements correctly represent the
|
|
line numbers).
|
|
|
|
Note: this is a work in progress / proof of concept / not ready to be used.
|
|
"""
|
|
|
|
import dis
|
|
|
|
from _pydevd_bundle.pydevd_collect_bytecode_info import iter_instructions
|
|
from _pydev_bundle import pydev_log
|
|
import sys
|
|
import inspect
|
|
from io import StringIO
|
|
|
|
|
|
class _Stack(object):
|
|
def __init__(self):
|
|
self._contents = []
|
|
|
|
def push(self, obj):
|
|
# print('push', obj)
|
|
self._contents.append(obj)
|
|
|
|
def pop(self):
|
|
return self._contents.pop(-1)
|
|
|
|
|
|
INDENT_MARKER = object()
|
|
DEDENT_MARKER = object()
|
|
_SENTINEL = object()
|
|
|
|
DEBUG = False
|
|
|
|
|
|
class _Token(object):
|
|
def __init__(self, i_line, instruction=None, tok=_SENTINEL, priority=0, after=None, end_of_line=False):
|
|
"""
|
|
:param i_line:
|
|
:param instruction:
|
|
:param tok:
|
|
:param priority:
|
|
:param after:
|
|
:param end_of_line:
|
|
Marker to signal only after all the other tokens have been written.
|
|
"""
|
|
self.i_line = i_line
|
|
if tok is not _SENTINEL:
|
|
self.tok = tok
|
|
else:
|
|
if instruction is not None:
|
|
if inspect.iscode(instruction.argval):
|
|
self.tok = ""
|
|
else:
|
|
self.tok = str(instruction.argval)
|
|
else:
|
|
raise AssertionError("Either the tok or the instruction is needed.")
|
|
self.instruction = instruction
|
|
self.priority = priority
|
|
self.end_of_line = end_of_line
|
|
self._after_tokens = set()
|
|
self._after_handler_tokens = set()
|
|
if after:
|
|
self.mark_after(after)
|
|
|
|
def mark_after(self, v):
|
|
if isinstance(v, _Token):
|
|
self._after_tokens.add(v)
|
|
elif isinstance(v, _BaseHandler):
|
|
self._after_handler_tokens.add(v)
|
|
|
|
else:
|
|
raise AssertionError("Unhandled: %s" % (v,))
|
|
|
|
def get_after_tokens(self):
|
|
ret = self._after_tokens.copy()
|
|
for handler in self._after_handler_tokens:
|
|
ret.update(handler.tokens)
|
|
return ret
|
|
|
|
def __repr__(self):
|
|
return "Token(%s, after: %s)" % (self.tok, self.get_after_tokens())
|
|
|
|
__str__ = __repr__
|
|
|
|
|
|
class _Writer(object):
|
|
def __init__(self):
|
|
self.line_to_contents = {}
|
|
self.all_tokens = set()
|
|
|
|
def get_line(self, line):
|
|
lst = self.line_to_contents.get(line)
|
|
if lst is None:
|
|
lst = self.line_to_contents[line] = []
|
|
return lst
|
|
|
|
def indent(self, line):
|
|
self.get_line(line).append(INDENT_MARKER)
|
|
|
|
def dedent(self, line):
|
|
self.get_line(line).append(DEDENT_MARKER)
|
|
|
|
def write(self, line, token):
|
|
if token in self.all_tokens:
|
|
return
|
|
self.all_tokens.add(token)
|
|
assert isinstance(token, _Token)
|
|
lst = self.get_line(line)
|
|
lst.append(token)
|
|
|
|
|
|
class _BaseHandler(object):
|
|
def __init__(self, i_line, instruction, stack, writer, disassembler):
|
|
self.i_line = i_line
|
|
self.instruction = instruction
|
|
self.stack = stack
|
|
self.writer = writer
|
|
self.disassembler = disassembler
|
|
self.tokens = []
|
|
self._handle()
|
|
|
|
def _write_tokens(self):
|
|
for token in self.tokens:
|
|
self.writer.write(token.i_line, token)
|
|
|
|
def _handle(self):
|
|
raise NotImplementedError(self)
|
|
|
|
def __repr__(self, *args, **kwargs):
|
|
try:
|
|
return "%s line:%s" % (self.instruction, self.i_line)
|
|
except:
|
|
return object.__repr__(self)
|
|
|
|
__str__ = __repr__
|
|
|
|
|
|
_op_name_to_handler = {}
|
|
|
|
|
|
def _register(cls):
|
|
_op_name_to_handler[cls.opname] = cls
|
|
return cls
|
|
|
|
|
|
class _BasePushHandler(_BaseHandler):
|
|
def _handle(self):
|
|
self.stack.push(self)
|
|
|
|
|
|
class _BaseLoadHandler(_BasePushHandler):
|
|
def _handle(self):
|
|
_BasePushHandler._handle(self)
|
|
self.tokens = [_Token(self.i_line, self.instruction)]
|
|
|
|
|
|
@_register
|
|
class _LoadBuildClass(_BasePushHandler):
|
|
opname = "LOAD_BUILD_CLASS"
|
|
|
|
|
|
@_register
|
|
class _LoadConst(_BaseLoadHandler):
|
|
opname = "LOAD_CONST"
|
|
|
|
|
|
@_register
|
|
class _LoadName(_BaseLoadHandler):
|
|
opname = "LOAD_NAME"
|
|
|
|
|
|
@_register
|
|
class _LoadGlobal(_BaseLoadHandler):
|
|
opname = "LOAD_GLOBAL"
|
|
|
|
|
|
@_register
|
|
class _LoadFast(_BaseLoadHandler):
|
|
opname = "LOAD_FAST"
|
|
|
|
|
|
@_register
|
|
class _GetIter(_BaseHandler):
|
|
"""
|
|
Implements TOS = iter(TOS).
|
|
"""
|
|
|
|
opname = "GET_ITER"
|
|
iter_target = None
|
|
|
|
def _handle(self):
|
|
self.iter_target = self.stack.pop()
|
|
self.tokens.extend(self.iter_target.tokens)
|
|
self.stack.push(self)
|
|
|
|
|
|
@_register
|
|
class _ForIter(_BaseHandler):
|
|
"""
|
|
TOS is an iterator. Call its __next__() method. If this yields a new value, push it on the stack
|
|
(leaving the iterator below it). If the iterator indicates it is exhausted TOS is popped, and
|
|
the byte code counter is incremented by delta.
|
|
"""
|
|
|
|
opname = "FOR_ITER"
|
|
|
|
iter_in = None
|
|
|
|
def _handle(self):
|
|
self.iter_in = self.stack.pop()
|
|
self.stack.push(self)
|
|
|
|
def store_in_name(self, store_name):
|
|
for_token = _Token(self.i_line, None, "for ")
|
|
self.tokens.append(for_token)
|
|
prev = for_token
|
|
|
|
t_name = _Token(store_name.i_line, store_name.instruction, after=prev)
|
|
self.tokens.append(t_name)
|
|
prev = t_name
|
|
|
|
in_token = _Token(store_name.i_line, None, " in ", after=prev)
|
|
self.tokens.append(in_token)
|
|
prev = in_token
|
|
|
|
max_line = store_name.i_line
|
|
if self.iter_in:
|
|
for t in self.iter_in.tokens:
|
|
t.mark_after(prev)
|
|
max_line = max(max_line, t.i_line)
|
|
prev = t
|
|
self.tokens.extend(self.iter_in.tokens)
|
|
|
|
colon_token = _Token(self.i_line, None, ":", after=prev)
|
|
self.tokens.append(colon_token)
|
|
prev = for_token
|
|
|
|
self._write_tokens()
|
|
|
|
|
|
@_register
|
|
class _StoreName(_BaseHandler):
|
|
"""
|
|
Implements name = TOS. namei is the index of name in the attribute co_names of the code object.
|
|
The compiler tries to use STORE_FAST or STORE_GLOBAL if possible.
|
|
"""
|
|
|
|
opname = "STORE_NAME"
|
|
|
|
def _handle(self):
|
|
v = self.stack.pop()
|
|
|
|
if isinstance(v, _ForIter):
|
|
v.store_in_name(self)
|
|
else:
|
|
if not isinstance(v, _MakeFunction) or v.is_lambda:
|
|
line = self.i_line
|
|
for t in v.tokens:
|
|
line = min(line, t.i_line)
|
|
|
|
t_name = _Token(line, self.instruction)
|
|
t_equal = _Token(line, None, "=", after=t_name)
|
|
|
|
self.tokens.append(t_name)
|
|
self.tokens.append(t_equal)
|
|
|
|
for t in v.tokens:
|
|
t.mark_after(t_equal)
|
|
self.tokens.extend(v.tokens)
|
|
|
|
self._write_tokens()
|
|
|
|
|
|
@_register
|
|
class _ReturnValue(_BaseHandler):
|
|
"""
|
|
Returns with TOS to the caller of the function.
|
|
"""
|
|
|
|
opname = "RETURN_VALUE"
|
|
|
|
def _handle(self):
|
|
v = self.stack.pop()
|
|
return_token = _Token(self.i_line, None, "return ", end_of_line=True)
|
|
self.tokens.append(return_token)
|
|
for token in v.tokens:
|
|
token.mark_after(return_token)
|
|
self.tokens.extend(v.tokens)
|
|
|
|
self._write_tokens()
|
|
|
|
|
|
@_register
|
|
class _CallFunction(_BaseHandler):
|
|
"""
|
|
|
|
CALL_FUNCTION(argc)
|
|
|
|
Calls a callable object with positional arguments. argc indicates the number of positional
|
|
arguments. The top of the stack contains positional arguments, with the right-most argument
|
|
on top. Below the arguments is a callable object to call. CALL_FUNCTION pops all arguments
|
|
and the callable object off the stack, calls the callable object with those arguments, and
|
|
pushes the return value returned by the callable object.
|
|
|
|
Changed in version 3.6: This opcode is used only for calls with positional arguments.
|
|
|
|
"""
|
|
|
|
opname = "CALL_FUNCTION"
|
|
|
|
def _handle(self):
|
|
args = []
|
|
for _i in range(self.instruction.argval + 1):
|
|
arg = self.stack.pop()
|
|
args.append(arg)
|
|
it = reversed(args)
|
|
name = next(it)
|
|
max_line = name.i_line
|
|
for t in name.tokens:
|
|
self.tokens.append(t)
|
|
|
|
tok_open_parens = _Token(name.i_line, None, "(", after=name)
|
|
self.tokens.append(tok_open_parens)
|
|
|
|
prev = tok_open_parens
|
|
for i, arg in enumerate(it):
|
|
for t in arg.tokens:
|
|
t.mark_after(name)
|
|
t.mark_after(prev)
|
|
max_line = max(max_line, t.i_line)
|
|
self.tokens.append(t)
|
|
prev = arg
|
|
|
|
if i > 0:
|
|
comma_token = _Token(prev.i_line, None, ",", after=prev)
|
|
self.tokens.append(comma_token)
|
|
prev = comma_token
|
|
|
|
tok_close_parens = _Token(max_line, None, ")", after=prev)
|
|
self.tokens.append(tok_close_parens)
|
|
|
|
self._write_tokens()
|
|
|
|
self.stack.push(self)
|
|
|
|
|
|
@_register
|
|
class _MakeFunctionPy3(_BaseHandler):
|
|
"""
|
|
Pushes a new function object on the stack. From bottom to top, the consumed stack must consist
|
|
of values if the argument carries a specified flag value
|
|
|
|
0x01 a tuple of default values for positional-only and positional-or-keyword parameters in positional order
|
|
|
|
0x02 a dictionary of keyword-only parameters' default values
|
|
|
|
0x04 an annotation dictionary
|
|
|
|
0x08 a tuple containing cells for free variables, making a closure
|
|
|
|
the code associated with the function (at TOS1)
|
|
|
|
the qualified name of the function (at TOS)
|
|
"""
|
|
|
|
opname = "MAKE_FUNCTION"
|
|
is_lambda = False
|
|
|
|
def _handle(self):
|
|
stack = self.stack
|
|
self.qualified_name = stack.pop()
|
|
self.code = stack.pop()
|
|
|
|
default_node = None
|
|
if self.instruction.argval & 0x01:
|
|
default_node = stack.pop()
|
|
|
|
is_lambda = self.is_lambda = "<lambda>" in [x.tok for x in self.qualified_name.tokens]
|
|
|
|
if not is_lambda:
|
|
def_token = _Token(self.i_line, None, "def ")
|
|
self.tokens.append(def_token)
|
|
|
|
for token in self.qualified_name.tokens:
|
|
self.tokens.append(token)
|
|
if not is_lambda:
|
|
token.mark_after(def_token)
|
|
prev = token
|
|
|
|
open_parens_token = _Token(self.i_line, None, "(", after=prev)
|
|
self.tokens.append(open_parens_token)
|
|
prev = open_parens_token
|
|
|
|
code = self.code.instruction.argval
|
|
|
|
if default_node:
|
|
defaults = ([_SENTINEL] * (len(code.co_varnames) - len(default_node.instruction.argval))) + list(
|
|
default_node.instruction.argval
|
|
)
|
|
else:
|
|
defaults = [_SENTINEL] * len(code.co_varnames)
|
|
|
|
for i, arg in enumerate(code.co_varnames):
|
|
if i > 0:
|
|
comma_token = _Token(prev.i_line, None, ", ", after=prev)
|
|
self.tokens.append(comma_token)
|
|
prev = comma_token
|
|
|
|
arg_token = _Token(self.i_line, None, arg, after=prev)
|
|
self.tokens.append(arg_token)
|
|
|
|
default = defaults[i]
|
|
if default is not _SENTINEL:
|
|
eq_token = _Token(default_node.i_line, None, "=", after=prev)
|
|
self.tokens.append(eq_token)
|
|
prev = eq_token
|
|
|
|
default_token = _Token(default_node.i_line, None, str(default), after=prev)
|
|
self.tokens.append(default_token)
|
|
prev = default_token
|
|
|
|
tok_close_parens = _Token(prev.i_line, None, "):", after=prev)
|
|
self.tokens.append(tok_close_parens)
|
|
|
|
self._write_tokens()
|
|
|
|
stack.push(self)
|
|
self.writer.indent(prev.i_line + 1)
|
|
self.writer.dedent(max(self.disassembler.merge_code(code)))
|
|
|
|
|
|
_MakeFunction = _MakeFunctionPy3
|
|
|
|
|
|
def _print_after_info(line_contents, stream=None):
|
|
if stream is None:
|
|
stream = sys.stdout
|
|
for token in line_contents:
|
|
after_tokens = token.get_after_tokens()
|
|
if after_tokens:
|
|
s = "%s after: %s\n" % (repr(token.tok), ('"' + '", "'.join(t.tok for t in token.get_after_tokens()) + '"'))
|
|
stream.write(s)
|
|
else:
|
|
stream.write("%s (NO REQUISITES)" % repr(token.tok))
|
|
|
|
|
|
def _compose_line_contents(line_contents, previous_line_tokens):
|
|
lst = []
|
|
handled = set()
|
|
|
|
add_to_end_of_line = []
|
|
delete_indexes = []
|
|
for i, token in enumerate(line_contents):
|
|
if token.end_of_line:
|
|
add_to_end_of_line.append(token)
|
|
delete_indexes.append(i)
|
|
for i in reversed(delete_indexes):
|
|
del line_contents[i]
|
|
del delete_indexes
|
|
|
|
while line_contents:
|
|
added = False
|
|
delete_indexes = []
|
|
|
|
for i, token in enumerate(line_contents):
|
|
after_tokens = token.get_after_tokens()
|
|
for after in after_tokens:
|
|
if after not in handled and after not in previous_line_tokens:
|
|
break
|
|
else:
|
|
added = True
|
|
previous_line_tokens.add(token)
|
|
handled.add(token)
|
|
lst.append(token.tok)
|
|
delete_indexes.append(i)
|
|
|
|
for i in reversed(delete_indexes):
|
|
del line_contents[i]
|
|
|
|
if not added:
|
|
if add_to_end_of_line:
|
|
line_contents.extend(add_to_end_of_line)
|
|
del add_to_end_of_line[:]
|
|
continue
|
|
|
|
# Something is off, let's just add as is.
|
|
for token in line_contents:
|
|
if token not in handled:
|
|
lst.append(token.tok)
|
|
|
|
stream = StringIO()
|
|
_print_after_info(line_contents, stream)
|
|
pydev_log.critical("Error. After markers are not correct:\n%s", stream.getvalue())
|
|
break
|
|
return "".join(lst)
|
|
|
|
|
|
class _PyCodeToSource(object):
|
|
def __init__(self, co, memo=None):
|
|
if memo is None:
|
|
memo = {}
|
|
self.memo = memo
|
|
self.co = co
|
|
self.instructions = list(iter_instructions(co))
|
|
self.stack = _Stack()
|
|
self.writer = _Writer()
|
|
|
|
def _process_next(self, i_line):
|
|
instruction = self.instructions.pop(0)
|
|
handler_class = _op_name_to_handler.get(instruction.opname)
|
|
if handler_class is not None:
|
|
s = handler_class(i_line, instruction, self.stack, self.writer, self)
|
|
if DEBUG:
|
|
print(s)
|
|
|
|
else:
|
|
if DEBUG:
|
|
print("UNHANDLED", instruction)
|
|
|
|
def build_line_to_contents(self):
|
|
co = self.co
|
|
|
|
op_offset_to_line = dict(dis.findlinestarts(co))
|
|
curr_line_index = 0
|
|
|
|
instructions = self.instructions
|
|
while instructions:
|
|
instruction = instructions[0]
|
|
new_line_index = op_offset_to_line.get(instruction.offset)
|
|
if new_line_index is not None:
|
|
curr_line_index = new_line_index
|
|
|
|
self._process_next(curr_line_index)
|
|
return self.writer.line_to_contents
|
|
|
|
def merge_code(self, code):
|
|
if DEBUG:
|
|
print("merge code ----")
|
|
# for d in dir(code):
|
|
# if not d.startswith('_'):
|
|
# print(d, getattr(code, d))
|
|
line_to_contents = _PyCodeToSource(code, self.memo).build_line_to_contents()
|
|
lines = []
|
|
for line, contents in sorted(line_to_contents.items()):
|
|
lines.append(line)
|
|
self.writer.get_line(line).extend(contents)
|
|
if DEBUG:
|
|
print("end merge code ----")
|
|
return lines
|
|
|
|
def disassemble(self):
|
|
show_lines = False
|
|
line_to_contents = self.build_line_to_contents()
|
|
stream = StringIO()
|
|
last_line = 0
|
|
indent = ""
|
|
previous_line_tokens = set()
|
|
for i_line, contents in sorted(line_to_contents.items()):
|
|
while last_line < i_line - 1:
|
|
if show_lines:
|
|
stream.write("%s.\n" % (last_line + 1,))
|
|
else:
|
|
stream.write("\n")
|
|
last_line += 1
|
|
|
|
line_contents = []
|
|
dedents_found = 0
|
|
for part in contents:
|
|
if part is INDENT_MARKER:
|
|
if DEBUG:
|
|
print("found indent", i_line)
|
|
indent += " "
|
|
continue
|
|
if part is DEDENT_MARKER:
|
|
if DEBUG:
|
|
print("found dedent", i_line)
|
|
dedents_found += 1
|
|
continue
|
|
line_contents.append(part)
|
|
|
|
s = indent + _compose_line_contents(line_contents, previous_line_tokens)
|
|
if show_lines:
|
|
stream.write("%s. %s\n" % (i_line, s))
|
|
else:
|
|
stream.write("%s\n" % s)
|
|
|
|
if dedents_found:
|
|
indent = indent[: -(4 * dedents_found)]
|
|
last_line = i_line
|
|
|
|
return stream.getvalue()
|
|
|
|
|
|
def code_obj_to_source(co):
|
|
"""
|
|
Converts a code object to source code to provide a suitable representation for the compiler when
|
|
the actual source code is not found.
|
|
|
|
This is a work in progress / proof of concept / not ready to be used.
|
|
"""
|
|
ret = _PyCodeToSource(co).disassemble()
|
|
if DEBUG:
|
|
print(ret)
|
|
return ret
|