Video: A Bit about Bytes: Understanding Python Bytecode James Bennett, PyCon 2018,
EBook: Inside The Python Virtual Machine Obi Ike-Nwosu
Github: github.com/python/cpython source code for Python. ceval.c
Python source code is converted to python byte code, which is then executed by the python virtual machine. The .pyc
files contain python byte code.
Python source code goes through the following process:
Some of the modules we use to explore the byte code include:
See the local files: bytecode.py and pvm.py
import dis
import operator
import opcode
We define a simple function.
def s():
a = 1
b = 2
return (a + b)
s()
3
We check out the attributes of the function.
dir(s)
['__annotations__', '__call__', '__class__', '__closure__', '__code__', '__defaults__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__get__', '__getattribute__', '__globals__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__kwdefaults__', '__le__', '__lt__', '__module__', '__name__', '__ne__', '__new__', '__qualname__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__']
Now we drill down on the \_\_code\_\_ attribute.
dir(s.__code__)
['__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'co_argcount', 'co_cellvars', 'co_code', 'co_consts', 'co_filename', 'co_firstlineno', 'co_flags', 'co_freevars', 'co_kwonlyargcount', 'co_lnotab', 'co_name', 'co_names', 'co_nlocals', 'co_posonlyargcount', 'co_stacksize', 'co_varnames', 'replace']
We define a function, xf(), which will print out the value of the attributes of the __code__ property.
def xf(func, all=False):
for x in (dir(func.__code__)):
if all:
print ("{}:\t{}".format(x, func.__code__.__getattribute__(x)))
elif x.startswith("co"):
print ("{}:\t{}".format(x, func.__code__.__getattribute__(x)))
xf(s)
co_argcount: 0 co_cellvars: () co_code: b'd\x01}\x00d\x02}\x01|\x00|\x01\x17\x00S\x00' co_consts: (None, 1, 2) co_filename: /tmp/ipykernel_2260377/1904000924.py co_firstlineno: 1 co_flags: 67 co_freevars: () co_kwonlyargcount: 0 co_lnotab: b'\x00\x01\x04\x01\x04\x01' co_name: s co_names: () co_nlocals: 2 co_posonlyargcount: 0 co_stacksize: 2 co_varnames: ('a', 'b')
Every time you define a function, Python creates these attributes. Some are obvious, and others are obscure. We will start by looking at the co_code attribute, which is the byte code instructions needed to execute the function.
We define a simple function getbytes(f) to retrieve the bytecodes.
def getbytes(f):
return f.__code__.co_code
getbytes(s)
b'd\x01}\x00d\x02}\x01|\x00|\x01\x17\x00S\x00'
See Hexadecimal.html for a discussion of hexadecimal byte strings.
We next define a function to print out the individual bytes.
def printbytes(f):
for b in getbytes(f):
print (b)
printbytes(s)
100 1 125 0 100 2 125 1 124 0 124 1 23 0 83 0
Some of those bytes are opcodes - that is, assembly language instructions. The opcode module provides a mapping between bytes and opcodes.
dir(opcode)
['EXTENDED_ARG', 'HAVE_ARGUMENT', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'cmp_op', 'hascompare', 'hasconst', 'hasfree', 'hasjabs', 'hasjrel', 'haslocal', 'hasname', 'hasnargs', 'opmap', 'opname', 'stack_effect']
opcode.opname
['<0>', 'POP_TOP', 'ROT_TWO', 'ROT_THREE', 'DUP_TOP', 'DUP_TOP_TWO', 'ROT_FOUR', '<7>', '<8>', 'NOP', 'UNARY_POSITIVE', 'UNARY_NEGATIVE', 'UNARY_NOT', '<13>', '<14>', 'UNARY_INVERT', 'BINARY_MATRIX_MULTIPLY', 'INPLACE_MATRIX_MULTIPLY', '<18>', 'BINARY_POWER', 'BINARY_MULTIPLY', '<21>', 'BINARY_MODULO', 'BINARY_ADD', 'BINARY_SUBTRACT', 'BINARY_SUBSCR', 'BINARY_FLOOR_DIVIDE', 'BINARY_TRUE_DIVIDE', 'INPLACE_FLOOR_DIVIDE', 'INPLACE_TRUE_DIVIDE', '<30>', '<31>', '<32>', '<33>', '<34>', '<35>', '<36>', '<37>', '<38>', '<39>', '<40>', '<41>', '<42>', '<43>', '<44>', '<45>', '<46>', '<47>', 'RERAISE', 'WITH_EXCEPT_START', 'GET_AITER', 'GET_ANEXT', 'BEFORE_ASYNC_WITH', '<53>', 'END_ASYNC_FOR', 'INPLACE_ADD', 'INPLACE_SUBTRACT', 'INPLACE_MULTIPLY', '<58>', 'INPLACE_MODULO', 'STORE_SUBSCR', 'DELETE_SUBSCR', 'BINARY_LSHIFT', 'BINARY_RSHIFT', 'BINARY_AND', 'BINARY_XOR', 'BINARY_OR', 'INPLACE_POWER', 'GET_ITER', 'GET_YIELD_FROM_ITER', 'PRINT_EXPR', 'LOAD_BUILD_CLASS', 'YIELD_FROM', 'GET_AWAITABLE', 'LOAD_ASSERTION_ERROR', 'INPLACE_LSHIFT', 'INPLACE_RSHIFT', 'INPLACE_AND', 'INPLACE_XOR', 'INPLACE_OR', '<80>', '<81>', 'LIST_TO_TUPLE', 'RETURN_VALUE', 'IMPORT_STAR', 'SETUP_ANNOTATIONS', 'YIELD_VALUE', 'POP_BLOCK', '<88>', 'POP_EXCEPT', 'STORE_NAME', 'DELETE_NAME', 'UNPACK_SEQUENCE', 'FOR_ITER', 'UNPACK_EX', 'STORE_ATTR', 'DELETE_ATTR', 'STORE_GLOBAL', 'DELETE_GLOBAL', '<99>', 'LOAD_CONST', 'LOAD_NAME', 'BUILD_TUPLE', 'BUILD_LIST', 'BUILD_SET', 'BUILD_MAP', 'LOAD_ATTR', 'COMPARE_OP', 'IMPORT_NAME', 'IMPORT_FROM', 'JUMP_FORWARD', 'JUMP_IF_FALSE_OR_POP', 'JUMP_IF_TRUE_OR_POP', 'JUMP_ABSOLUTE', 'POP_JUMP_IF_FALSE', 'POP_JUMP_IF_TRUE', 'LOAD_GLOBAL', 'IS_OP', 'CONTAINS_OP', '<119>', '<120>', 'JUMP_IF_NOT_EXC_MATCH', 'SETUP_FINALLY', '<123>', 'LOAD_FAST', 'STORE_FAST', 'DELETE_FAST', '<127>', '<128>', '<129>', 'RAISE_VARARGS', 'CALL_FUNCTION', 'MAKE_FUNCTION', 'BUILD_SLICE', '<134>', 'LOAD_CLOSURE', 'LOAD_DEREF', 'STORE_DEREF', 'DELETE_DEREF', '<139>', '<140>', 'CALL_FUNCTION_KW', 'CALL_FUNCTION_EX', 'SETUP_WITH', 'EXTENDED_ARG', 'LIST_APPEND', 'SET_ADD', 'MAP_ADD', 'LOAD_CLASSDEREF', '<149>', '<150>', '<151>', '<152>', '<153>', 'SETUP_ASYNC_WITH', 'FORMAT_VALUE', 'BUILD_CONST_KEY_MAP', 'BUILD_STRING', '<158>', '<159>', 'LOAD_METHOD', 'CALL_METHOD', 'LIST_EXTEND', 'SET_UPDATE', 'DICT_MERGE', 'DICT_UPDATE', '<166>', '<167>', '<168>', '<169>', '<170>', '<171>', '<172>', '<173>', '<174>', '<175>', '<176>', '<177>', '<178>', '<179>', '<180>', '<181>', '<182>', '<183>', '<184>', '<185>', '<186>', '<187>', '<188>', '<189>', '<190>', '<191>', '<192>', '<193>', '<194>', '<195>', '<196>', '<197>', '<198>', '<199>', '<200>', '<201>', '<202>', '<203>', '<204>', '<205>', '<206>', '<207>', '<208>', '<209>', '<210>', '<211>', '<212>', '<213>', '<214>', '<215>', '<216>', '<217>', '<218>', '<219>', '<220>', '<221>', '<222>', '<223>', '<224>', '<225>', '<226>', '<227>', '<228>', '<229>', '<230>', '<231>', '<232>', '<233>', '<234>', '<235>', '<236>', '<237>', '<238>', '<239>', '<240>', '<241>', '<242>', '<243>', '<244>', '<245>', '<246>', '<247>', '<248>', '<249>', '<250>', '<251>', '<252>', '<253>', '<254>', '<255>']
len(opcode.opname)
256
opcode.opname[23]
'BINARY_ADD'
The opcode.opname array maps numbers to opcodes. We define a function printopcodes(f) to map these codes for a given function.
def printopcodes(f):
for b in getbytes(f):
print ("{}: {}".format(b, opcode.opname[b]))
printopcodes(s)
100: LOAD_CONST 1: POP_TOP 125: STORE_FAST 0: <0> 100: LOAD_CONST 2: ROT_TWO 125: STORE_FAST 1: POP_TOP 124: LOAD_FAST 0: <0> 124: LOAD_FAST 1: POP_TOP 23: BINARY_ADD 0: <0> 83: RETURN_VALUE 0: <0>
The dis
module has a dis()
function which performs this task, as well as the code\_info()
function which provides additional information.
opcode.opname[23]
'BINARY_ADD'
dis.dis(s)
2 0 LOAD_CONST 1 (1) 2 STORE_FAST 0 (a) 3 4 LOAD_CONST 2 (2) 6 STORE_FAST 1 (b) 4 8 LOAD_FAST 0 (a) 10 LOAD_FAST 1 (b) 12 BINARY_ADD 14 RETURN_VALUE
print( dis.code_info(s))
Name: s Filename: /tmp/ipykernel_886261/1904000924.py Argument count: 0 Positional-only arguments: 0 Kw-only arguments: 0 Number of locals: 2 Stack size: 2 Flags: OPTIMIZED, NEWLOCALS, NOFREE Constants: 0: None 1: 1 2: 2 Variable names: 0: a 1: b
We combine these in the showme(f)
function.
def showme(f):
dis.dis(f)
print(dis.code_info(f))
showme(s)
2 0 LOAD_CONST 1 (1) 2 STORE_FAST 0 (a) 3 4 LOAD_CONST 2 (2) 6 STORE_FAST 1 (b) 4 8 LOAD_FAST 0 (a) 10 LOAD_FAST 1 (b) 12 BINARY_ADD 14 RETURN_VALUE Name: s Filename: /tmp/ipykernel_886261/1904000924.py Argument count: 0 Positional-only arguments: 0 Kw-only arguments: 0 Number of locals: 2 Stack size: 2 Flags: OPTIMIZED, NEWLOCALS, NOFREE Constants: 0: None 1: 1 2: 2 Variable names: 0: a 1: b
See A Python Interpreter Written in Python
Note that this article was written using an earlier version of Python, 3.4 or so. In that version, bytecodes used three bytes: one for the opcode and two for the arguments. After 3.4, Python generated bytecode that used only two bytes per opcode. That is what we use in this assignment. You should make the adjustment in the reading.
what_to_execute = {
"instructions": [("LOAD_VALUE", 0),
("LOAD_VALUE", 1),
("ADD_TWO_VALUES", None),
("PRINT_ANSWER", None)],
"numbers": [7, 5] }
def s():
a = 1
b = 2
print (a + b)
swhat_to_execute = {
"instructions": [("LOAD_VALUE", 0),
("STORE_NAME", 0),
("LOAD_VALUE", 1),
("STORE_NAME", 1),
("LOAD_NAME", 0),
("LOAD_NAME", 1),
("ADD_TWO_VALUES", None),
("PRINT_ANSWER", None)],
"numbers": [1, 2],
"names": ["a", "b"] }
We implement two interpreters in one. They both use a stack to execute the instructions.
The latter relies on the getattr(object, name)
function, which returns the value of the named attribute of the object.
class Interpreter0:
def __init__(self):
self.stack = []
self.environment = {}
def STORE_NAME(self, name):
val = self.stack.pop()
self.environment[name] = val
def LOAD_NAME(self, name):
val = self.environment[name]
self.stack.append(val)
def LOAD_VALUE(self, number):
self.stack.append(number)
def PRINT_ANSWER(self):
answer = self.stack.pop()
print (answer)
def ADD_TWO_VALUES(self):
first_num = self.stack.pop()
second_num = self.stack.pop()
total = first_num + second_num
self.stack.append(total)
def run_code(self, what_to_execute):
instructions = what_to_execute["instructions"]
for each_step in instructions:
instruction, argument = each_step
argument = self.parse_argument(instruction, argument, what_to_execute)
if instruction == "LOAD_VALUE":
self.LOAD_VALUE(argument)
elif instruction == "ADD_TWO_VALUES":
self.ADD_TWO_VALUES()
elif instruction == "PRINT_ANSWER":
self.PRINT_ANSWER()
elif instruction == "STORE_NAME":
self.STORE_NAME(argument)
elif instruction == "LOAD_NAME":
self.LOAD_NAME(argument)
def parse_argument(self, instruction, argument, what_to_execute):
numbers = ["LOAD_VALUE"]
names = ["LOAD_NAME", "STORE_NAME"]
if instruction in numbers:
argument = what_to_execute["numbers"][argument]
elif instruction in names:
argument = what_to_execute["names"][argument]
return argument
## stage 2 version of PVM
## -------------------------------------------------------------
def execute(self, what_to_execute):
instructions = what_to_execute["instructions"]
for each_step in instructions:
print (each_step)
instruction, argument = each_step
argument = self.parse_argument(instruction, argument, what_to_execute)
bytecode_method = getattr(self, instruction)
if argument is None:
bytecode_method()
else:
bytecode_method(argument)
def test():
interpreter = Interpreter0()
interpreter.run_code(what_to_execute)
what_to_execute
{'instructions': [('LOAD_VALUE', 0), ('LOAD_VALUE', 1), ('ADD_TWO_VALUES', None), ('PRINT_ANSWER', None)], 'numbers': [7, 5]}
test()
12
swhat_to_execute
{'instructions': [('LOAD_VALUE', 0), ('STORE_NAME', 0), ('LOAD_VALUE', 1), ('STORE_NAME', 1), ('LOAD_NAME', 0), ('LOAD_NAME', 1), ('ADD_TWO_VALUES', None), ('PRINT_ANSWER', None)], 'numbers': [1, 2], 'names': ['a', 'b']}
def test2():
interpreter = Interpreter0()
interpreter.run_code(swhat_to_execute)
test2()
3
def test3():
interpreter = Interpreter0()
interpreter.execute(swhat_to_execute)
test3()
('LOAD_VALUE', 0) ('STORE_NAME', 0) ('LOAD_VALUE', 1) ('STORE_NAME', 1) ('LOAD_NAME', 0) ('LOAD_NAME', 1) ('ADD_TWO_VALUES', None) ('PRINT_ANSWER', None) 3
Following the description in the reading, you will implement PVM-lite: A Python Virtual Machine for a subset of Python.
Included in the subset are:
Not included are:
The dis module can disassemble a Python function, printing out the byte codes for the function. For example, given:
def s1():
a = 1
return a
import dis
dis.dis(s1)
2 0 LOAD_CONST 1 (1) 2 STORE_FAST 0 (a) 3 4 LOAD_FAST 0 (a) 6 RETURN_VALUE
Write your own version of dis.dis() that produces a dictionary object containing the following values:
Each instruction list has the following five components:
import hw5a
hw5a.makeobj(s1)
{'names': ('a',), 'consts': (None, 1), 'code': b'd\x01}\x00|\x00S\x00', 'instructions': [(0, 100, 'LOAD_CONST', 1, 1), (2, 125, 'STORE_FAST', 0, 'a'), (4, 124, 'LOAD_FAST', 0, 'a'), (6, 83, 'RETURN_VALUE', None, None)]}
Note that makeobj
is similar to dis.dis()
. You may use dis.dis()
to help test your implementation of makeobj()
.
You may also want to use dis.HAVE_ARGUMENT
to identify those
opcodes that do and do not take an argument and dis.hascompare
to process comparison operators. dis.get_instructions(f)
is very useful.
list(dis.get_instructions(s1))
[Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=1, argrepr='1', offset=0, starts_line=2, is_jump_target=False), Instruction(opname='STORE_FAST', opcode=125, arg=0, argval='a', argrepr='a', offset=2, starts_line=None, is_jump_target=False), Instruction(opname='LOAD_FAST', opcode=124, arg=0, argval='a', argrepr='a', offset=4, starts_line=3, is_jump_target=False), Instruction(opname='RETURN_VALUE', opcode=83, arg=None, argval=None, argrepr='', offset=6, starts_line=None, is_jump_target=False)]
for x in dis.get_instructions(s1):
print (x)
Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=1, argrepr='1', offset=0, starts_line=2, is_jump_target=False) Instruction(opname='STORE_FAST', opcode=125, arg=0, argval='a', argrepr='a', offset=2, starts_line=None, is_jump_target=False) Instruction(opname='LOAD_FAST', opcode=124, arg=0, argval='a', argrepr='a', offset=4, starts_line=3, is_jump_target=False) Instruction(opname='RETURN_VALUE', opcode=83, arg=None, argval=None, argrepr='', offset=6, starts_line=None, is_jump_target=False)
x = list(dis.get_instructions(s1))
x[0][0]
'LOAD_CONST'
We shall now implement an Interpreter class, along the lines of the reading, which describes a full implementation of the PVM and has the source code available on github. We encourage you to avail yourself of that resource. Our implementation is more modest. Nevertheless, you should try to borrow as much as possible from "byterun" implementation. Yes, I am telling you to copy / adapt code from the byterun github file.
We first define a test function that will execute a Python function using the Interpreter class.
def doit(func, debug=True):
interpreter = Interpreter(debug)
return interpreter.execute(func)
Next we define a bunch of test functions for the Interpreter
def s3():
a = 1
return a + 1
hw5a.doit(s3)
(0, 100, 'LOAD_CONST', 1, 1) (2, 125, 'STORE_FAST', 0, 'a') (4, 124, 'LOAD_FAST', 0, 'a') (6, 100, 'LOAD_CONST', 1, 1) (8, 23, 'BINARY_ADD', None, None) (10, 83, 'RETURN_VALUE', None, None)
2
def s3a():
a = 1
b = 2
return (a + b)
hw5a.doit(s3a)
(0, 100, 'LOAD_CONST', 1, 1) (2, 125, 'STORE_FAST', 0, 'a') (4, 100, 'LOAD_CONST', 2, 2) (6, 125, 'STORE_FAST', 1, 'b') (8, 124, 'LOAD_FAST', 0, 'a') (10, 124, 'LOAD_FAST', 1, 'b') (12, 23, 'BINARY_ADD', None, None) (14, 83, 'RETURN_VALUE', None, None)
3
def s3b():
a = 1
b = 2
return (a - b)
hw5a.doit(s3b)
(0, 100, 'LOAD_CONST', 1, 1) (2, 125, 'STORE_FAST', 0, 'a') (4, 100, 'LOAD_CONST', 2, 2) (6, 125, 'STORE_FAST', 1, 'b') (8, 124, 'LOAD_FAST', 0, 'a') (10, 124, 'LOAD_FAST', 1, 'b') (12, 24, 'BINARY_SUBTRACT', None, None) (14, 83, 'RETURN_VALUE', None, None)
-1
def s3c():
a = 2
b = 3
return (a ** b)
hw5a.doit(s3c)
(0, 100, 'LOAD_CONST', 1, 2) (2, 125, 'STORE_FAST', 0, 'a') (4, 100, 'LOAD_CONST', 2, 3) (6, 125, 'STORE_FAST', 1, 'b') (8, 124, 'LOAD_FAST', 0, 'a') (10, 124, 'LOAD_FAST', 1, 'b') (12, 19, 'BINARY_POWER', None, None) (14, 83, 'RETURN_VALUE', None, None)
8
def s3d():
a = 2
b = 3
return (a * b)
hw5a.doit(s3d)
(0, 100, 'LOAD_CONST', 1, 2) (2, 125, 'STORE_FAST', 0, 'a') (4, 100, 'LOAD_CONST', 2, 3) (6, 125, 'STORE_FAST', 1, 'b') (8, 124, 'LOAD_FAST', 0, 'a') (10, 124, 'LOAD_FAST', 1, 'b') (12, 20, 'BINARY_MULTIPLY', None, None) (14, 83, 'RETURN_VALUE', None, None)
6
def s3e():
a = 6
b = 3
return (a / b)
hw5a.doit(s3e)
(0, 100, 'LOAD_CONST', 1, 6) (2, 125, 'STORE_FAST', 0, 'a') (4, 100, 'LOAD_CONST', 2, 3) (6, 125, 'STORE_FAST', 1, 'b') (8, 124, 'LOAD_FAST', 0, 'a') (10, 124, 'LOAD_FAST', 1, 'b') (12, 27, 'BINARY_TRUE_DIVIDE', None, None) (14, 83, 'RETURN_VALUE', None, None)
2.0
def s3f():
a = 7
b = 3
return (a // b)
hw5a.doit(s3f)
(0, 100, 'LOAD_CONST', 1, 7) (2, 125, 'STORE_FAST', 0, 'a') (4, 100, 'LOAD_CONST', 2, 3) (6, 125, 'STORE_FAST', 1, 'b') (8, 124, 'LOAD_FAST', 0, 'a') (10, 124, 'LOAD_FAST', 1, 'b') (12, 26, 'BINARY_FLOOR_DIVIDE', None, None) (14, 83, 'RETURN_VALUE', None, None)
2
def s3g():
a = 7
b = 3
return (a % b)
hw5a.doit(s3g)
(0, 100, 'LOAD_CONST', 1, 7) (2, 125, 'STORE_FAST', 0, 'a') (4, 100, 'LOAD_CONST', 2, 3) (6, 125, 'STORE_FAST', 1, 'b') (8, 124, 'LOAD_FAST', 0, 'a') (10, 124, 'LOAD_FAST', 1, 'b') (12, 22, 'BINARY_MODULO', None, None) (14, 83, 'RETURN_VALUE', None, None)
1
def s3h():
''' returns string append '''
a = "hello"
b = " world"
return a + b
hw5a.doit(s3h)
(0, 100, 'LOAD_CONST', 1, 'hello') (2, 125, 'STORE_FAST', 0, 'a') (4, 100, 'LOAD_CONST', 2, ' world') (6, 125, 'STORE_FAST', 1, 'b') (8, 124, 'LOAD_FAST', 0, 'a') (10, 124, 'LOAD_FAST', 1, 'b') (12, 23, 'BINARY_ADD', None, None) (14, 83, 'RETURN_VALUE', None, None)
'hello world'
def s4(): return 1 > 2
hw5a.doit(s4)
(0, 100, 'LOAD_CONST', 1, 1) (2, 100, 'LOAD_CONST', 2, 2) (4, 107, 'COMPARE_OP', 4, 4) > (6, 83, 'RETURN_VALUE', None, None)
False
def s4a():
return 1 < 2
hw5a.doit(s4a)
(0, 100, 'LOAD_CONST', 1, 1) (2, 100, 'LOAD_CONST', 2, 2) (4, 107, 'COMPARE_OP', 0, 0) < (6, 83, 'RETURN_VALUE', None, None)
True
def s4b():
return 1 <= 2
hw5a.doit(s4b)
(0, 100, 'LOAD_CONST', 1, 1) (2, 100, 'LOAD_CONST', 2, 2) (4, 107, 'COMPARE_OP', 1, 1) <= (6, 83, 'RETURN_VALUE', None, None)
True
def s4c():
return 1 == 2
hw5a.doit(s4c)
(0, 100, 'LOAD_CONST', 1, 1) (2, 100, 'LOAD_CONST', 2, 2) (4, 107, 'COMPARE_OP', 2, 2) == (6, 83, 'RETURN_VALUE', None, None)
False
def s4d():
return 1 != 2
hw5a.doit(s4d)
(0, 100, 'LOAD_CONST', 1, 1) (2, 100, 'LOAD_CONST', 2, 2) (4, 107, 'COMPARE_OP', 3, 3) != (6, 83, 'RETURN_VALUE', None, None)
True
def s4e():
return 1 >= 2
hw5a.doit(s4e)
(0, 100, 'LOAD_CONST', 1, 1) (2, 100, 'LOAD_CONST', 2, 2) (4, 107, 'COMPARE_OP', 5, 5) >= (6, 83, 'RETURN_VALUE', None, None)
False
def s5():
a = 1
if a > 2:
return 1
else:
return 2
hw5a.doit(s5)
(0, 100, 'LOAD_CONST', 1, 1) (2, 125, 'STORE_FAST', 0, 'a') (4, 124, 'LOAD_FAST', 0, 'a') (6, 100, 'LOAD_CONST', 2, 2) (8, 107, 'COMPARE_OP', 4, 4) > (10, 114, 'POP_JUMP_IF_FALSE', 16, 16) (16, 100, 'LOAD_CONST', 2, 2) (18, 83, 'RETURN_VALUE', None, None)
2
def s5a():
a = 1
if a == 2:
return 1
else:
return 2
hw5a.doit(s5a)
(0, 100, 'LOAD_CONST', 1, 1) (2, 125, 'STORE_FAST', 0, 'a') (4, 124, 'LOAD_FAST', 0, 'a') (6, 100, 'LOAD_CONST', 2, 2) (8, 107, 'COMPARE_OP', 2, 2) == (10, 114, 'POP_JUMP_IF_FALSE', 16, 16) (16, 100, 'LOAD_CONST', 2, 2) (18, 83, 'RETURN_VALUE', None, None)
2
def s6():
''' returns tuple '''
a = 2
b = a << a
return b, b % 2
hw5a.doit(s6)
(0, 100, 'LOAD_CONST', 1, 2) (2, 125, 'STORE_FAST', 0, 'a') (4, 124, 'LOAD_FAST', 0, 'a') (6, 124, 'LOAD_FAST', 0, 'a') (8, 62, 'BINARY_LSHIFT', None, None) (10, 125, 'STORE_FAST', 1, 'b') (12, 124, 'LOAD_FAST', 1, 'b') (14, 124, 'LOAD_FAST', 1, 'b') (16, 100, 'LOAD_CONST', 1, 2) (18, 22, 'BINARY_MODULO', None, None) (20, 102, 'BUILD_TUPLE', 2, 2) (22, 83, 'RETURN_VALUE', None, None)
(8, 0)
def s6a():
''' returns list '''
b = 5
return [b, b % 2]
hw5a.doit(s6a)
(0, 100, 'LOAD_CONST', 1, 5) (2, 125, 'STORE_FAST', 0, 'b') (4, 124, 'LOAD_FAST', 0, 'b') (6, 124, 'LOAD_FAST', 0, 'b') (8, 100, 'LOAD_CONST', 2, 2) (10, 22, 'BINARY_MODULO', None, None) (12, 103, 'BUILD_LIST', 2, 2) (14, 83, 'RETURN_VALUE', None, None)
[5, 1]
def s6b():
''' returns set '''
b = 9
return {b, b % 2}
hw5a.doit(s6b)
(0, 100, 'LOAD_CONST', 1, 9) (2, 125, 'STORE_FAST', 0, 'b') (4, 124, 'LOAD_FAST', 0, 'b') (6, 124, 'LOAD_FAST', 0, 'b') (8, 100, 'LOAD_CONST', 2, 2) (10, 22, 'BINARY_MODULO', None, None) (12, 104, 'BUILD_SET', 2, 2) (14, 83, 'RETURN_VALUE', None, None)
{1, 9}
def s6c():
''' returns list subscript '''
a = [1,2,3]
return a[1]
hw5a.doit(s6c)
(0, 103, 'BUILD_LIST', 0, 0) (2, 100, 'LOAD_CONST', 1, (1, 2, 3)) (4, 162, 'LIST_EXTEND', 1, 1) (6, 125, 'STORE_FAST', 0, 'a') (8, 124, 'LOAD_FAST', 0, 'a') (10, 100, 'LOAD_CONST', 2, 1) (12, 25, 'BINARY_SUBSCR', None, None) (14, 83, 'RETURN_VALUE', None, None)
2
def s7():
''' returns list slice '''
a = [1,2,3,4,5,6]
return a[2:-1], a[2:4]
hw5a.doit(s7)
(0, 103, 'BUILD_LIST', 0, 0) (2, 100, 'LOAD_CONST', 1, (1, 2, 3, 4, 5, 6)) (4, 162, 'LIST_EXTEND', 1, 1) (6, 125, 'STORE_FAST', 0, 'a') (8, 124, 'LOAD_FAST', 0, 'a') (10, 100, 'LOAD_CONST', 2, 2) (12, 100, 'LOAD_CONST', 3, -1) (14, 133, 'BUILD_SLICE', 2, 2) (16, 25, 'BINARY_SUBSCR', None, None) (18, 124, 'LOAD_FAST', 0, 'a') (20, 100, 'LOAD_CONST', 2, 2) (22, 100, 'LOAD_CONST', 4, 4) (24, 133, 'BUILD_SLICE', 2, 2) (26, 25, 'BINARY_SUBSCR', None, None) (28, 102, 'BUILD_TUPLE', 2, 2) (30, 83, 'RETURN_VALUE', None, None)
((3, 4, 5), (3, 4))
Error: should return lists, not tuples.
def s7a():
''' returns string slice '''
a = "hello world"
return a[3:-1]
hw5a.doit(s7a)
(0, 100, 'LOAD_CONST', 1, 'hello world') (2, 125, 'STORE_FAST', 0, 'a') (4, 124, 'LOAD_FAST', 0, 'a') (6, 100, 'LOAD_CONST', 2, 3) (8, 100, 'LOAD_CONST', 3, -1) (10, 133, 'BUILD_SLICE', 2, 2) (12, 25, 'BINARY_SUBSCR', None, None) (14, 83, 'RETURN_VALUE', None, None)
'lo worl'
def s7b():
''' returns dictionary '''
a = 2
b = a ** 2
d = {}
d[a] = b
return d
hw5a.doit(s7b)
(0, 100, 'LOAD_CONST', 1, 2) (2, 125, 'STORE_FAST', 0, 'a') (4, 124, 'LOAD_FAST', 0, 'a') (6, 100, 'LOAD_CONST', 1, 2) (8, 19, 'BINARY_POWER', None, None) (10, 125, 'STORE_FAST', 1, 'b') (12, 105, 'BUILD_MAP', 0, 0) (14, 125, 'STORE_FAST', 2, 'd') (16, 124, 'LOAD_FAST', 1, 'b') (18, 124, 'LOAD_FAST', 2, 'd') (20, 124, 'LOAD_FAST', 0, 'a') (22, 60, 'STORE_SUBSCR', None, None) (24, 124, 'LOAD_FAST', 2, 'd') (26, 83, 'RETURN_VALUE', None, None)
{2: 4}
def s7c():
''' dict access '''
x = {}
x['a'] = 1
y = x['a']
return y
hw5a.doit(s7c)
(0, 105, 'BUILD_MAP', 0, 0) (2, 125, 'STORE_FAST', 0, 'x') (4, 100, 'LOAD_CONST', 1, 1) (6, 124, 'LOAD_FAST', 0, 'x') (8, 100, 'LOAD_CONST', 2, 'a') (10, 60, 'STORE_SUBSCR', None, None) (12, 124, 'LOAD_FAST', 0, 'x') (14, 100, 'LOAD_CONST', 2, 'a') (16, 25, 'BINARY_SUBSCR', None, None) (18, 125, 'STORE_FAST', 1, 'y') (20, 124, 'LOAD_FAST', 1, 'y') (22, 83, 'RETURN_VALUE', None, None)
1
def s8b():
x = 1
return -x
hw5a.doit(s8b)
(0, 100, 'LOAD_CONST', 1, 1) (2, 125, 'STORE_FAST', 0, 'x') (4, 124, 'LOAD_FAST', 0, 'x') (6, 11, 'UNARY_NEGATIVE', None, None) (8, 83, 'RETURN_VALUE', None, None)
-1
def s8c():
x = -1
return +x
hw5a.doit(s8c)
(0, 100, 'LOAD_CONST', 1, -1) (2, 125, 'STORE_FAST', 0, 'x') (4, 124, 'LOAD_FAST', 0, 'x') (6, 10, 'UNARY_POSITIVE', None, None) (8, 83, 'RETURN_VALUE', None, None)
-1
def s8d():
x = 1
return not x
hw5a.doit(s8d)
(0, 100, 'LOAD_CONST', 1, 1) (2, 125, 'STORE_FAST', 0, 'x') (4, 124, 'LOAD_FAST', 0, 'x') (6, 12, 'UNARY_NOT', None, None) (8, 83, 'RETURN_VALUE', None, None)
False
def s8e():
x = 1
return ~x
hw5a.doit(s8e)
(0, 100, 'LOAD_CONST', 1, 1) (2, 125, 'STORE_FAST', 0, 'x') (4, 124, 'LOAD_FAST', 0, 'x') (6, 15, 'UNARY_INVERT', None, None) (8, 83, 'RETURN_VALUE', None, None)
-2
def s8f():
x = 1
x += 10
return x
hw5a.doit(s8f)
(0, 100, 'LOAD_CONST', 1, 1) (2, 125, 'STORE_FAST', 0, 'x') (4, 124, 'LOAD_FAST', 0, 'x') (6, 100, 'LOAD_CONST', 2, 10) (8, 55, 'INPLACE_ADD', None, None) (10, 125, 'STORE_FAST', 0, 'x') (12, 124, 'LOAD_FAST', 0, 'x') (14, 83, 'RETURN_VALUE', None, None)
11
def s8g():
x = 1
x -= 10
return x
hw5a.doit(s8g)
(0, 100, 'LOAD_CONST', 1, 1) (2, 125, 'STORE_FAST', 0, 'x') (4, 124, 'LOAD_FAST', 0, 'x') (6, 100, 'LOAD_CONST', 2, 10) (8, 56, 'INPLACE_SUBTRACT', None, None) (10, 125, 'STORE_FAST', 0, 'x') (12, 124, 'LOAD_FAST', 0, 'x') (14, 83, 'RETURN_VALUE', None, None)
-9
def s8h():
x = 1
x *= 10
return x
hw5a.doit(s8h)
(0, 100, 'LOAD_CONST', 1, 1) (2, 125, 'STORE_FAST', 0, 'x') (4, 124, 'LOAD_FAST', 0, 'x') (6, 100, 'LOAD_CONST', 2, 10) (8, 57, 'INPLACE_MULTIPLY', None, None) (10, 125, 'STORE_FAST', 0, 'x') (12, 124, 'LOAD_FAST', 0, 'x') (14, 83, 'RETURN_VALUE', None, None)
10
def s8i():
x = 10
x %= 3
return x
hw5a.doit(s8i)
(0, 100, 'LOAD_CONST', 1, 10) (2, 125, 'STORE_FAST', 0, 'x') (4, 124, 'LOAD_FAST', 0, 'x') (6, 100, 'LOAD_CONST', 2, 3) (8, 59, 'INPLACE_MODULO', None, None) (10, 125, 'STORE_FAST', 0, 'x') (12, 124, 'LOAD_FAST', 0, 'x') (14, 83, 'RETURN_VALUE', None, None)
1
def s8j():
x = 10
x /= 3
return x
hw5a.doit(s8j)
(0, 100, 'LOAD_CONST', 1, 10) (2, 125, 'STORE_FAST', 0, 'x') (4, 124, 'LOAD_FAST', 0, 'x') (6, 100, 'LOAD_CONST', 2, 3) (8, 29, 'INPLACE_TRUE_DIVIDE', None, None) (10, 125, 'STORE_FAST', 0, 'x') (12, 124, 'LOAD_FAST', 0, 'x') (14, 83, 'RETURN_VALUE', None, None)
3.3333333333333335
def s8k():
x = 32
x <<= 2
return x
hw5a.doit(s8k)
(0, 100, 'LOAD_CONST', 1, 32) (2, 125, 'STORE_FAST', 0, 'x') (4, 124, 'LOAD_FAST', 0, 'x') (6, 100, 'LOAD_CONST', 2, 2) (8, 75, 'INPLACE_LSHIFT', None, None) (10, 125, 'STORE_FAST', 0, 'x') (12, 124, 'LOAD_FAST', 0, 'x') (14, 83, 'RETURN_VALUE', None, None)
128
def s8l():
x = 32
x >>= 2
return x
hw5a.doit(s8l)
(0, 100, 'LOAD_CONST', 1, 32) (2, 125, 'STORE_FAST', 0, 'x') (4, 124, 'LOAD_FAST', 0, 'x') (6, 100, 'LOAD_CONST', 2, 2) (8, 76, 'INPLACE_RSHIFT', None, None) (10, 125, 'STORE_FAST', 0, 'x') (12, 124, 'LOAD_FAST', 0, 'x') (14, 83, 'RETURN_VALUE', None, None)
8
def s9():
a = 2
return a << 3
hw5a.doit(s9)
(0, 100, 'LOAD_CONST', 1, 2) (2, 125, 'STORE_FAST', 0, 'a') (4, 124, 'LOAD_FAST', 0, 'a') (6, 100, 'LOAD_CONST', 2, 3) (8, 62, 'BINARY_LSHIFT', None, None) (10, 83, 'RETURN_VALUE', None, None)
16
def s9a():
a = 19
return a >> 3
hw5a.doit(s9a)
(0, 100, 'LOAD_CONST', 1, 19) (2, 125, 'STORE_FAST', 0, 'a') (4, 124, 'LOAD_FAST', 0, 'a') (6, 100, 'LOAD_CONST', 2, 3) (8, 63, 'BINARY_RSHIFT', None, None) (10, 83, 'RETURN_VALUE', None, None)
2
def s9b():
a = 19
return a & 3
hw5a.doit(s9b)
(0, 100, 'LOAD_CONST', 1, 19) (2, 125, 'STORE_FAST', 0, 'a') (4, 124, 'LOAD_FAST', 0, 'a') (6, 100, 'LOAD_CONST', 2, 3) (8, 64, 'BINARY_AND', None, None) (10, 83, 'RETURN_VALUE', None, None)
3
def s9c():
a = 19
return a | 3
hw5a.doit(s9c)
(0, 100, 'LOAD_CONST', 1, 19) (2, 125, 'STORE_FAST', 0, 'a') (4, 124, 'LOAD_FAST', 0, 'a') (6, 100, 'LOAD_CONST', 2, 3) (8, 66, 'BINARY_OR', None, None) (10, 83, 'RETURN_VALUE', None, None)
19
def s9d():
a = 19
return a ^ 3
hw5a.doit(s9d)
(0, 100, 'LOAD_CONST', 1, 19) (2, 125, 'STORE_FAST', 0, 'a') (4, 124, 'LOAD_FAST', 0, 'a') (6, 100, 'LOAD_CONST', 2, 3) (8, 65, 'BINARY_XOR', None, None) (10, 83, 'RETURN_VALUE', None, None)
16
# binary in
def s10():
a = [1,2,3,4,5,6]
b = 4
if b in a:
return "yes"
else:
return "no"
hw5a.doit(s10)
(0, 103, 'BUILD_LIST', 0, 0) (2, 100, 'LOAD_CONST', 1, (1, 2, 3, 4, 5, 6)) (4, 162, 'LIST_EXTEND', 1, 1) (6, 125, 'STORE_FAST', 0, 'a') (8, 100, 'LOAD_CONST', 2, 4) (10, 125, 'STORE_FAST', 1, 'b') (12, 124, 'LOAD_FAST', 1, 'b') (14, 124, 'LOAD_FAST', 0, 'a') (16, 118, 'CONTAINS_OP', 0, 0) (18, 114, 'POP_JUMP_IF_FALSE', 24, 24) (20, 100, 'LOAD_CONST', 3, 'yes') (22, 83, 'RETURN_VALUE', None, None)
'yes'
def s10a():
a = [1,2,3,4,5,6]
b = 4
if b not in a:
return "yes"
else:
return "no"
hw5a.doit(s10a)
(0, 103, 'BUILD_LIST', 0, 0) (2, 100, 'LOAD_CONST', 1, (1, 2, 3, 4, 5, 6)) (4, 162, 'LIST_EXTEND', 1, 1) (6, 125, 'STORE_FAST', 0, 'a') (8, 100, 'LOAD_CONST', 2, 4) (10, 125, 'STORE_FAST', 1, 'b') (12, 124, 'LOAD_FAST', 1, 'b') (14, 124, 'LOAD_FAST', 0, 'a') (16, 118, 'CONTAINS_OP', 1, 1) (18, 114, 'POP_JUMP_IF_FALSE', 24, 24) (24, 100, 'LOAD_CONST', 4, 'no') (26, 83, 'RETURN_VALUE', None, None)
'no'
# binary is
def s10b():
a = [1,2,3,4,5,6]
b = 4
if b is a:
return "yes"
else:
return "no"
hw5a.doit(s10b)
(0, 103, 'BUILD_LIST', 0, 0) (2, 100, 'LOAD_CONST', 1, (1, 2, 3, 4, 5, 6)) (4, 162, 'LIST_EXTEND', 1, 1) (6, 125, 'STORE_FAST', 0, 'a') (8, 100, 'LOAD_CONST', 2, 4) (10, 125, 'STORE_FAST', 1, 'b') (12, 124, 'LOAD_FAST', 1, 'b') (14, 124, 'LOAD_FAST', 0, 'a') (16, 117, 'IS_OP', 0, 0) (18, 114, 'POP_JUMP_IF_FALSE', 24, 24) (24, 100, 'LOAD_CONST', 4, 'no') (26, 83, 'RETURN_VALUE', None, None)
'no'
def s10c():
a = [1,2,3,4,5,6]
b = 4
if b is not a:
return "yes"
else:
return "no"
hw5a.doit(s10c)
(0, 103, 'BUILD_LIST', 0, 0) (2, 100, 'LOAD_CONST', 1, (1, 2, 3, 4, 5, 6)) (4, 162, 'LIST_EXTEND', 1, 1) (6, 125, 'STORE_FAST', 0, 'a') (8, 100, 'LOAD_CONST', 2, 4) (10, 125, 'STORE_FAST', 1, 'b') (12, 124, 'LOAD_FAST', 1, 'b') (14, 124, 'LOAD_FAST', 0, 'a') (16, 117, 'IS_OP', 1, 1) (18, 114, 'POP_JUMP_IF_FALSE', 24, 24) (20, 100, 'LOAD_CONST', 3, 'yes') (22, 83, 'RETURN_VALUE', None, None)
'yes'
Now we start to define the Interpreter itself.
The PVM uses a stack to control execution and pass arguments to functions and operators.
class Interpreter:
def __init__(self, debug = True):
self.stack = []
self.result = None
self.debug = debug
self.environment = {}
self.pc = 0
The execute method controls the action.
It first decodes the function using makeobj()
which we defined above.
It sets the program counter (pc) to 0. If the pc
becomes
greater than the number of instructions, the program halts.
execute
cycles through the instructions, printing them out if debug is True.
Each PVM opcode/opname is defined as a method of the Interpreter
class.
This means that each opname is an attribute of the class and
can be accessed using getattr(self, instruction)
. The
opcode method is called with or without an argument, as appropriate.
When the while()
loop finishes, execute returns the result.
def execute(self, func):
codedict = makeobj(func)
self.instructions = codedict["instructions"]
self.pc = 0
while (self.pc < len(self.instructions)):
each_step = self.instructions[self.pc]
## if debug, print out symbol for comparison operator
if self.debug:
suffix = ''
if each_step[2] == 'COMPARE_OP':
suffix = self.COMPARE_OPERATORS_SYMBOLS[each_step[3]]
print (each_step, suffix)
self.pc += 1
lineno, inst, instruction, index, argument = each_step
bytecode_method = getattr(self, instruction)
if argument is None:
bytecode_method()
else:
bytecode_method(argument)
return self.result
Note that execute calls makeobj()
defined above.
If your makeobj
function is not working correctly, you can still
debug your execute function by importing the bytecode version of
makeobj
from hw5a.pyc
:
from hw5a import makeobjWe define a few stack operations that are used in implementing the opcode methods.
def top(self):
return self.stack[-1]
def pop(self):
return self.stack.pop()
def push(self, *vals):
self.stack.extend(vals)
def popn(self, n):
"""Pop a number of values from the value stack.
A list of `n` values is returned, the deepest value first.
"""
if n:
ret = self.stack[-n:]
self.stack[-n:] = []
return ret
else:
return []
We first implement the opcodes needed to execute function s1()
.
Write the following methods:
def STORE_FAST(self, name):
val = self.stack.pop()
self.environment[name] = val
def LOAD_FAST(self, name):
pass
def LOAD_CONST(self, number):
pass
## set result from stack
## set pc to be out of range and thereby halt.
def RETURN_VALUE(self):
pass
Define the following set of binary operators needed to execute the s3 set of functions.
def BINARY_ADD(self):
pass
def BINARY_SUBTRACT(self):
pass
def BINARY_POWER(self):
pass
def BINARY_MULTIPLY(self):
pass
def BINARY_DIVIDE(self):
pass
## same as divide for Python 3, ich glaube.
def BINARY_TRUE_DIVIDE(self):
pass
def BINARY_FLOOR_DIVIDE(self):
pass
def BINARY_MODULO(self):
pass
Define the set of comparison operators needed to execute the s4 set of functions.
## comparison operators
## -----------------------------------
COMPARE_OPERATORS_SYMBOLS = [
'<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', 'is not', 'subclass']
COMPARE_OPERATORS = [
operator.lt,
operator.le,
operator.eq,
operator.ne,
operator.gt,
operator.ge,
lambda x, y: x in y,
lambda x, y: x not in y,
lambda x, y: x is y,
lambda x, y: x is not y,
lambda x, y: issubclass(x, Exception) and issubclass(x, y),
]
def COMPARE_OP(self, opnum):
pass
Define the set of jump operators needed to execute the s5 set of functions. Some are given.
## jumps
## -----------------------------------
## This method is called by the other jump methods.
## It causes the given address to be the next
## instruction to execute
def jump(self, address):
pass
def JUMP_FORWARD(self, jump):
self.jump(jump)
def JUMP_ABSOLUTE(self, jump):
self.jump(jump)
def JUMP_IF_TRUE(self, jump):
pass
def JUMP_IF_FALSE(self, jump):
pass
def POP_JUMP_IF_FALSE(self, jump):
pass
def JUMP_IF_TRUE_OR_POP(self, jump):
pass
def JUMP_IF_FALSE_OR_POP(self, jump):
pass
Define the set of operators needed to build and index lists, tuples, and sets, and execute the s6 set of functions.
def BUILD_TUPLE(self, count):
pass
def BUILD_LIST(self, count):
pass
def BUILD_SET(self, count):
pass
def BINARY_SUBSCR(self):
pass
### new bytecode in 3.9
def LIST_EXTEND(self,count):
elts = self.popn(2)
self.push(elts[count])
Define the set of operators needed to implement the slice function and define the set of operators needed to implement dictionaries and execute the s7 set of functions.
def BUILD_SLICE(self, count):
pass
def BUILD_MAP(self, size):
pass
def STORE_MAP(self):
pass
def STORE_SUBSCR(self):
pass
Define the set of operators to implement unary and inplace functions and execute the s8 set of functions.
def UNARY_POSITIVE(self):
pass
def UNARY_NEGATIVE(self):
pass
def UNARY_NOT(self):
pass
def UNARY_INVERT(self):
pass
def INPLACE_ADD(self):
pass
def INPLACE_SUBTRACT(self):
pass
def INPLACE_MULTIPLY(self):
pass
def INPLACE_TRUE_DIVIDE(self):
pass
def INPLACE_MODULO(self):
pass
def INPLACE_LSHIFT(self):
pass
def INPLACE_RSHIFT(self):
pass
Define the set of operators needed to implement the logical binary operators and execute the s9 set of functions.
## binary operators
## -----------------------------------
def BINARY_LSHIFT(self):
pass
def BINARY_RSHIFT(self):
pass
def BINARY_AND(self):
pass
def BINARY_XOR(self):
pass
def BINARY_OR(self):
pass
### problem 10
### new bytecode in 3.9
def CONTAINS_OP(self,flag):
if flag:
self.COMPARE_OP(7)
else:
self.COMPARE_OP(6)
### new bytecode in 3.9
def IS_OP(self,flag):
if flag:
self.COMPARE_OP(9)
else:
self.COMPARE_OP(8)