PyCodeObject
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| typedef struct { PyObject_HEAD int co_argcount; int co_posonlyargcount; int co_kwonlyargcount; int co_nlocals; int co_stacksize; int co_flags; int co_firstlineno; PyObject *co_code; PyObject *co_consts; PyObject *co_names; PyObject *co_varnames; PyObject *co_freevars; PyObject *co_cellvars; Py_ssize_t *co_cell2arg; PyObject *co_filename; PyObject *co_name; PyObject *co_lnotab; void *co_zombieframe; PyObject *co_weakreflist; void *co_extra; unsigned char *co_opcache_map; _PyOpcache *co_opcache; int co_opcache_flag; unsigned char co_opcache_size; } PyCodeObject;
|
在Python中通过PyObject.__code__.xxx
获取。
部分解释
posonlyargcount、kwonlyargcount
nlocals
names
freevars、cellvars
lnotab
svn.python.org/projects/python/branches/pep-0384/Objects/lnotab_notes.txt
co_lnotab: 字节码指令与python源代码的行号之间的对应关系,以PyByteObject的形式存在
然而事实上,Python不会直接记录这些信息,而是会记录增量值。
比如说:
字节码在co_code中的偏移量 .py文件中源代码的行号
0 1
6 2
50 7
那么co_lnotab就应该是: 0 1 6 1 44 5。
0和1很好理解, 就是co_code和.py文件的起始位置,而6和1表示字节码的偏移量是6,.py文件的行号增加了1,而44和5表示字节码的偏移量是44,.py文件的行号增加了5
Code字节码
具体可在pycdc/bytes/python_310.map中(mingw64\opt\include\python2.7\opcode.h也有):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
| 0x1 POP_TOP 0x2 ROT_TWO 0x3 ROT_THREE 0x4 DUP_TOP 0x5 DUP_TOP_TWO 0x6 ROT_FOUR 0x9 NOP 0xa UNARY_POSITIVE 0xb UNARY_NEGATIVE 0xc UNARY_NOT 0xf UNARY_INVERT 0x10 BINARY_MATRIX_MULTIPLY 0x11 INPLACE_MATRIX_MULTIPLY 0x13 BINARY_POWER 0x14 BINARY_MULTIPLY 0x16 BINARY_MODULO 0x17 BINARY_ADD 0x18 BINARY_SUBTRACT 0x19 BINARY_SUBSCR 0x1a BINARY_FLOOR_DIVIDE 0x1b BINARY_TRUE_DIVIDE 0x1c INPLACE_FLOOR_DIVIDE 0x1d INPLACE_TRUE_DIVIDE 0x1e GET_LEN 0x1f MATCH_MAPPING 0x20 MATCH_SEQUENCE 0x21 MATCH_KEYS 0x22 COPY_DICT_WITHOUT_KEYS 0x31 WITH_EXCEPT_START 0x32 GET_AITER 0x33 GET_ANEXT 0x34 BEFORE_ASYNC_WITH 0x36 END_ASYNC_FOR 0x37 INPLACE_ADD 0x38 INPLACE_SUBTRACT 0x39 INPLACE_MULTIPLY 0x3b INPLACE_MODULO 0x3c STORE_SUBSCR 0x3d DELETE_SUBSCR 0x3e BINARY_LSHIFT 0x3f BINARY_RSHIFT 0x40 BINARY_AND 0x41 BINARY_XOR 0x42 BINARY_OR 0x43 INPLACE_POWER 0x44 GET_ITER 0x45 GET_YIELD_FROM_ITER 0x46 PRINT_EXPR 0x47 LOAD_BUILD_CLASS 0x48 YIELD_FROM 0x49 GET_AWAITABLE 0x4a LOAD_ASSERTION_ERROR 0x4b INPLACE_LSHIFT 0x4c INPLACE_RSHIFT 0x4d INPLACE_AND 0x4e INPLACE_XOR 0x4f INPLACE_OR 0x52 LIST_TO_TUPLE 0x53 RETURN_VALUE 0x54 IMPORT_STAR 0x55 SETUP_ANNOTATIONS 0x56 YIELD_VALUE 0x57 POP_BLOCK 0x59 POP_EXCEPT 0x5a STORE_NAME 0x5b DELETE_NAME 0x5c UNPACK_SEQUENCE 0x5d FOR_ITER 0x5e UNPACK_EX 0x5f STORE_ATTR 0x60 DELETE_ATTR 0x61 STORE_GLOBAL 0x62 DELETE_GLOBAL 0x63 ROT_N 0x64 LOAD_CONST 0x65 LOAD_NAME 0x66 BUILD_TUPLE 0x67 BUILD_LIST 0x68 BUILD_SET 0x69 BUILD_MAP 0x6a LOAD_ATTR 0x6b COMPARE_OP 0x6c IMPORT_NAME 0x6d IMPORT_FROM 0x6e JUMP_FORWARD 0x6f JUMP_IF_FALSE_OR_POP 0x70 JUMP_IF_TRUE_OR_POP 0x71 JUMP_ABSOLUTE 0x72 POP_JUMP_IF_FALSE 0x73 POP_JUMP_IF_TRUE 0x74 LOAD_GLOBAL 0x75 IS_OP 0x76 CONTAINS_OP 0x77 RERAISE 0x79 JUMP_IF_NOT_EXC_MATCH 0x7a SETUP_FINALLY 0x7c LOAD_FAST 0x7d STORE_FAST 0x7e DELETE_FAST 0x81 GEN_START 0x82 RAISE_VARARGS 0x83 CALL_FUNCTION 0x84 MAKE_FUNCTION 0x85 BUILD_SLICE 0x87 LOAD_CLOSURE 0x88 LOAD_DEREF 0x89 STORE_DEREF 0x8a DELETE_DEREF 0x8d CALL_FUNCTION_KW 0x8e CALL_FUNCTION_EX 0x8f SETUP_WITH 0x90 EXTENDED_ARG 0x91 LIST_APPEND 0x92 SET_ADD 0x93 MAP_ADD 0x94 LOAD_CLASSDEREF 0x98 MATCH_CLASS 0x9a SETUP_ASYNC_WITH 0x9b FORMAT_VALUE 0x9c BUILD_CONST_KEY_MAP 0x9d BUILD_STRING 0xa0 LOAD_METHOD 0xa1 CALL_METHOD 0xa2 LIST_EXTEND 0xa3 SET_UPDATE 0xa4 DICT_MERGE 0xa5 DICT_UPDATE
|
或者执行脚本获得:
1 2 3
| import opcode for i in opcode.opmap: print(hex(opcode.opmap[i]), i)
|
对象类型
具体可在pycdc/PythonBytecode.txt中:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| 3.0 3.4 TYPE_NULL '0' '0' TYPE_NONE 'N' 'N' TYPE_FALSE 'F' 'F' TYPE_TRUE 'T' 'T' TYPE_STOPITER 'S' 'S' TYPE_ELLIPSIS '.' '.' TYPE_INT 'i' 'i' TYPE_INT64 'I' TYPE_FLOAT 'f' 'f' TYPE_BINARY_FLOAT 'g' 'g' TYPE_COMPLEX 'x' 'x' TYPE_BINARY_COMPLEX 'y' 'y' TYPE_LONG 'l' 'l' TYPE_STRING 's' 's' TYPE_INTERNED 't' TYPE_REF 'r' TYPE_TUPLE '(' '(' TYPE_LIST '[' '[' TYPE_DICT '{' '{' TYPE_CODE 'c' 'c' TYPE_UNICODE 'u' 'u' TYPE_UNKNOWN '?' '?' TYPE_SET '<' '<' TYPE_FROZENSET '>' '>' TYPE_ASCII 'a' TYPE_ASCII_INTERNED 'A' TYPE_SMALL_TUPLE ')' TYPE_SHORT_ASCII 'z' TYPE_SHORT_ASCII_INTERNED 'Z'
|
pyc文件中每个类型的对象都有着固定的形式:
- 数值类型:type(1 byte)+ num
E9 0A 00 00 00
:10
69 11 01 00 00
:111
67 a4 70 3d 0a d7 a3 0a 40
:3.33 (IEEE 754 浮点数转化)
- 大于0x80000000的数,type为0x6C。
6C 03 00 00 00 90 78 AC 68 48 00
:0x1234567890
6C 03 00 00 00 00 00 00 00 02 00
:0x80000000
- 字符串类型:type(1 byte)+ 长度(1 byte)+ string
DA 05 70 72 69 6E 74
:“print”
FA 07 74 65 73 74 2E 70 79
:“test.py”
7a 10 74 68 69 73 20 69 73 20 73 74 72 20 74 79 70 65
:“this is str type”
5a 03 61 67 65
:“age”
- 元组(tuple)0x29:type(1 byte)+ 元素数量(1 byte)+ 元素
29 + 03 + a + b + c
:(a,b,c)
- 集合(set):type(1 byte)+ 元素数量(1 byte)+ 元素
3e 03 00 00 00 + a + b + c
:{a,b,c}
- 字典(dict):
5a 05 41 6c 69 63 65 + e9 1e 00 00 00 + 29 02 + da 04 6e 61 6d 65 + 5a 03 61 67 65
:{‘name’: ‘Alice’, ‘age’: 30}
- 代码(code)0x73:type(1 byte)+ 长度(4 byte)+ code
72 + xx xx xx xx
:获取已声明的对象
- 若
72 05 00 00 00
,则获取已声明的第五个对象(从先到后)
TYPE_NONE = 0x4E; TYPE_FALSE = 0x46; TYPE_TRUE = 0x54
73 04 00 00 00 0C 01 0C 02
:lnotab: type + length +lnotab
具体pyc文件
分析一下python代码的pyc文件。
1 2 3 4 5 6 7 8
| def add(a, b, /, c): def funtest(): print(a) return a + b + c
if __name__ == '__main__': print(add(10, 12, c=3))
|
pyc文件对象:
add函数对象:
funtest函数对象:
其中,按先后顺序声明的对象为:
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| 1. “printf” 2. A9 00 (应该是类似null,其他对象为空的属性都是:72 02 00 00 00) 3. (“a”, ) 4. “a” 5. “test.py” 6. “funtest” 7. “b” 8. “c” 9. “add” 10. “__main__” 11. 10 12. 12 13. 3 14. “__name__”
|
解析的脚本:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
| import marshal import dis import struct import time import types import binascii
def print_metadata(fp): magic = struct.unpack('<l', fp.read(4))[0] print(f"magic number = {hex(magic)}") bit_field = struct.unpack('<l', fp.read(4))[0] print(f"bit filed = {bit_field}") t = struct.unpack('<l', fp.read(4))[0] print(f"time = {time.asctime(time.localtime(t))}") file_size = struct.unpack('<l', fp.read(4))[0] print(f"file size = {file_size}")
def show_code(code, indent=''): print("%scode" % indent) indent += ' ' print("%sargcount %d" % (indent, code.co_argcount)) print("%sposonlyargcount %d" % (indent, code.co_posonlyargcount)) print("%skwonlyargcount %d" % (indent, code.co_kwonlyargcount)) print("%snlocals %d" % (indent, code.co_nlocals)) print("%sstacksize %d" % (indent, code.co_stacksize)) print("%sflags %04x" % (indent, code.co_flags)) show_hex("code", code.co_code, indent=indent) dis.disassemble(code) print("%sconsts" % indent) for const in code.co_consts: if type(const) == types.CodeType: show_code(const, indent + ' ') continue else: print(" %s%r" % (indent, const)) print("%snames %r" % (indent, code.co_names)) print("%svarnames %r" % (indent, code.co_varnames)) print("%sfreevars %r" % (indent, code.co_freevars)) print("%scellvars %r" % (indent, code.co_cellvars)) print("%sfilename %r" % (indent, code.co_filename)) print("%sname %r" % (indent, code.co_name)) print("%sfirstlineno %d" % (indent, code.co_firstlineno)) show_hex("lnotab", code.co_lnotab, indent=indent)
def show_hex(label, h, indent): h = binascii.hexlify(h) if len(h) < 60: print("%s%s %s" % (indent, label, h)) else: print("%s%s" % (indent, label)) for i in range(0, len(h), 60): print("%s %s" % (indent, h[i:i + 60]))
if __name__ == '__main__': filename = "./__pycache__/rev_RightBack.pyc" with open(filename, "rb") as fp: print_metadata(fp) code_object = marshal.load(fp) show_code(code_object)
|
参考
《深度剖析CPython解释器》10. Python中的PyCodeObject对象与pyc文件 - 古明地盆 - 博客园 (cnblogs.com)