py: Compress first part of bytecode prelude. · csamuelson/circuitpython@b5ebfad · GitHub
Skip to content

Commit b5ebfad

Browse files
committed
py: Compress first part of bytecode prelude.
The start of the bytecode prelude contains 6 numbers telling the amount of stack needed for the Python values and exceptions, and the signature of the function. Prior to this patch these numbers were all encoded one after the other (2x variable unsigned integers, then 4x bytes), but using so many bytes is unnecessary. An entropy analysis of around 150,000 bytecode functions from the CPython standard library showed that the optimal Shannon coding would need about 7.1 bits on average to encode these 6 numbers, compared to the existing 48 bits. This patch attempts to get close to this optimal value by packing the 6 numbers into a single, varible-length unsigned integer via bit-wise interleaving. The interleaving scheme is chosen to minimise the average number of bytes needed, and at the same time keep the scheme simple enough so it can be implemented without too much overhead in code size or speed. The scheme requires about 10.5 bits on average to store the 6 numbers. As a result most functions which originally took 6 bytes to encode these 6 numbers now need only 1 byte (in 80% of cases).
1 parent 81d04a0 commit b5ebfad

14 files changed

Lines changed: 171 additions & 106 deletions

File tree

py/bc.c

Lines changed: 8 additions & 7 deletions

py/bc.h

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,15 @@
3232

3333
// bytecode layout:
3434
//
35-
// n_state : var uint
36-
// n_exc_stack : var uint
37-
// scope_flags : byte
38-
// n_pos_args : byte number of arguments this function takes
39-
// n_kwonly_args : byte number of keyword-only arguments this function takes
40-
// n_def_pos_args : byte number of default positional arguments
35+
// func signature : var uint
36+
// contains six values interleaved bit-wise as: xSSSSEAA [xFSSKAED repeated]
37+
// x = extension another byte follows
38+
// S = n_state - 1 number of entries in Python value stack
39+
// E = n_exc_stack number of entries in exception stack
40+
// F = scope_flags four bits of flags, MP_SCOPE_FLAG_xxx
41+
// A = n_pos_args number of arguments this function takes
42+
// K = n_kwonly_args number of keyword-only arguments this function takes
43+
// D = n_def_pos_args number of default positional arguments
4144
//
4245
// code_info_size : var uint | code_info_size counts bytes in this chunk
4346
// simple_name : var qstr |
@@ -60,6 +63,65 @@
6063
// const0 : obj
6164
// constN : obj
6265

66+
#define MP_BC_PRELUDE_SIG_ENCODE(S, E, scope, out_byte, out_env) \
67+
do { \
68+
/*// Get values to store in prelude */ \
69+
size_t F = scope->scope_flags & 0x0f; /* only need to store lower 4 flag bits */ \
70+
size_t A = scope->num_pos_args; \
71+
size_t K = scope->num_kwonly_args; \
72+
size_t D = scope->num_def_pos_args; \
73+
\
74+
/* Adjust S to shrink range, to compress better */ \
75+
S -= 1; \
76+
\
77+
/* Encode prelude */ \
78+
/* xSSSSEAA */ \
79+
uint8_t z = (S & 0xf) << 3 | (E & 1) << 2 | (A & 3); \
80+
S >>= 4; \
81+
E >>= 1; \
82+
A >>= 2; \
83+
while (S | E | F | A | K | D) { \
84+
out_byte(out_env, 0x80 | z); \
85+
/* xFSSKAED */ \
86+
z = (F & 1) << 6 | (S & 3) << 4 | (K & 1) << 3 \
87+
| (A & 1) << 2 | (E & 1) << 1 | (D & 1); \
88+
S >>= 2; \
89+
E >>= 1; \
90+
F >>= 1; \
91+
A >>= 1; \
92+
K >>= 1; \
93+
D >>= 1; \
94+
} \
95+
out_byte(out_env, z); \
96+
} while (0)
97+
98+
#define MP_BC_PRELUDE_SIG_DECODE_INTO(ip, S, E, F, A, K, D) \
99+
do { \
100+
uint8_t z = *(ip)++; \
101+
/* xSSSSEAA */ \
102+
S = (z >> 3) & 0xf; \
103+
E = (z >> 2) & 0x1; \
104+
F = 0; \
105+
A = z & 0x3; \
106+
K = 0; \
107+
D = 0; \
108+
for (unsigned n = 0; z & 0x80; ++n) { \
109+
z = *(ip)++; \
110+
/* xFSSKAED */ \
111+
S |= (z & 0x30) << (2 * n); \
112+
E |= (z & 0x02) << n; \
113+
F |= ((z & 0x40) >> 6) << n; \
114+
A |= (z & 0x4) << n; \
115+
K |= ((z & 0x08) >> 3) << n; \
116+
D |= (z & 0x1) << n; \
117+
} \
118+
S += 1; \
119+
} while (0)
120+
121+
#define MP_BC_PRELUDE_SIG_DECODE(ip) \
122+
size_t n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args; \
123+
MP_BC_PRELUDE_SIG_DECODE_INTO(ip, n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args)
124+
63125
// Sentinel value for mp_code_state_t.exc_sp_idx
64126
#define MP_CODE_STATE_EXC_SP_IDX_SENTINEL ((uint16_t)-1)
65127

py/emitbc.c

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
328328
emit->bytecode_offset = 0;
329329
emit->code_info_offset = 0;
330330

331-
// Write local state size and exception stack size.
331+
// Write local state size, exception stack size, scope flags and number of arguments
332332
{
333333
mp_uint_t n_state = scope->num_locals + scope->stack_size;
334334
if (n_state == 0) {
@@ -341,16 +341,10 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
341341
// An extra slot in the stack is needed to detect VM stack overflow
342342
n_state += 1;
343343
#endif
344-
emit_write_code_info_uint(emit, n_state);
345-
emit_write_code_info_uint(emit, scope->exc_stack_size);
346-
}
347344

348-
// Write scope flags and number of arguments.
349-
// TODO check that num args all fit in a byte
350-
emit_write_code_info_byte(emit, emit->scope->scope_flags);
351-
emit_write_code_info_byte(emit, emit->scope->num_pos_args);
352-
emit_write_code_info_byte(emit, emit->scope->num_kwonly_args);
353-
emit_write_code_info_byte(emit, emit->scope->num_def_pos_args);
345+
size_t n_exc_stack = scope->exc_stack_size;
346+
MP_BC_PRELUDE_SIG_ENCODE(n_state, n_exc_stack, scope, emit_write_code_info_byte, emit);
347+
}
354348

355349
// Write size of the rest of the code info. We don't know how big this
356350
// variable uint will be on the MP_PASS_CODE_SIZE pass so we reserve 2 bytes

py/emitnative.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -573,18 +573,19 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
573573

574574
}
575575

576+
static inline void emit_native_write_code_info_byte(emit_t *emit, byte val) {
577+
mp_asm_base_data(&emit->as->base, 1, val);
578+
}
579+
576580
STATIC void emit_native_end_pass(emit_t *emit) {
577581
emit_native_global_exc_exit(emit);
578582

579583
if (!emit->do_viper_types) {
580584
emit->prelude_offset = mp_asm_base_get_code_pos(&emit->as->base);
581-
mp_asm_base_data(&emit->as->base, 1, 0x80 | ((emit->n_state >> 7) & 0x7f));
582-
mp_asm_base_data(&emit->as->base, 1, emit->n_state & 0x7f);
583-
mp_asm_base_data(&emit->as->base, 1, 0); // n_exc_stack
584-
mp_asm_base_data(&emit->as->base, 1, emit->scope->scope_flags);
585-
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_pos_args);
586-
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_kwonly_args);
587-
mp_asm_base_data(&emit->as->base, 1, emit->scope->num_def_pos_args);
585+
586+
size_t n_state = emit->n_state;
587+
size_t n_exc_stack = 0; // exc-stack not needed for native code
588+
MP_BC_PRELUDE_SIG_ENCODE(n_state, n_exc_stack, emit->scope, emit_native_write_code_info_byte, emit);
588589

589590
// write code info
590591
#if MICROPY_PERSISTENT_CODE

py/objfun.c

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,7 @@ qstr mp_obj_fun_get_name(mp_const_obj_t fun_in) {
161161
#endif
162162

163163
const byte *bc = fun->bytecode;
164-
bc = mp_decode_uint_skip(bc); // skip n_state
165-
bc = mp_decode_uint_skip(bc); // skip n_exc_stack
166-
bc++; // skip scope_params
167-
bc++; // skip n_pos_args
168-
bc++; // skip n_kwonly_args
169-
bc++; // skip n_def_pos_args
164+
MP_BC_PRELUDE_SIG_DECODE(bc);
170165
return mp_obj_code_get_name(bc);
171166
}
172167

@@ -197,10 +192,10 @@ STATIC void dump_args(const mp_obj_t *a, size_t sz) {
197192

198193
#define DECODE_CODESTATE_SIZE(bytecode, n_state_out_var, state_size_out_var) \
199194
{ \
200-
/* bytecode prelude: state size and exception stack size */ \
201-
n_state_out_var = mp_decode_uint_value(bytecode); \
202-
size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(bytecode)); \
203-
\
195+
const uint8_t *ip = bytecode; \
196+
size_t n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_args; \
197+
MP_BC_PRELUDE_SIG_DECODE_INTO(ip, n_state_out_var, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_args); \
198+
\
204199
/* state size in bytes */ \
205200
state_size_out_var = n_state_out_var * sizeof(mp_obj_t) \
206201
+ n_exc_stack * sizeof(mp_exc_stack_t); \
@@ -295,9 +290,11 @@ STATIC mp_obj_t fun_bc_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const
295290
assert(0);
296291
}
297292
}
298-
const byte *bytecode_ptr = mp_decode_uint_skip(mp_decode_uint_skip(self->bytecode));
299-
size_t n_pos_args = bytecode_ptr[1];
300-
size_t n_kwonly_args = bytecode_ptr[2];
293+
const byte *bytecode_ptr = self->bytecode;
294+
size_t n_state_unused, n_exc_stack_unused, scope_flags_unused;
295+
size_t n_pos_args, n_kwonly_args, n_def_args_unused;
296+
MP_BC_PRELUDE_SIG_DECODE_INTO(bytecode_ptr, n_state_unused, n_exc_stack_unused,
297+
scope_flags_unused, n_pos_args, n_kwonly_args, n_def_args_unused);
301298
// We can't check the case when an exception is returned in state[0]
302299
// and there are no arguments, because in this case our detection slot may have
303300
// been overwritten by the returned exception (which is allowed).

py/objgenerator.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_kw, cons
5151
mp_obj_fun_bc_t *self_fun = MP_OBJ_TO_PTR(self_in);
5252

5353
// bytecode prelude: get state size and exception stack size
54-
size_t n_state = mp_decode_uint_value(self_fun->bytecode);
55-
size_t n_exc_stack = mp_decode_uint_value(mp_decode_uint_skip(self_fun->bytecode));
54+
const uint8_t *ip = self_fun->bytecode;
55+
MP_BC_PRELUDE_SIG_DECODE(ip);
5656

5757
// allocate the generator object, with room for local stack and exception stack
5858
mp_obj_gen_instance_t *o = m_new_obj_var(mp_obj_gen_instance_t, byte,
@@ -88,7 +88,9 @@ STATIC mp_obj_t native_gen_wrap_call(mp_obj_t self_in, size_t n_args, size_t n_k
8888

8989
// Determine start of prelude, and extract n_state from it
9090
uintptr_t prelude_offset = ((uintptr_t*)self_fun->bytecode)[0];
91-
size_t n_state = mp_decode_uint_value(self_fun->bytecode + prelude_offset);
91+
const uint8_t *ip = self_fun->bytecode + prelude_offset;
92+
size_t n_state, n_exc_stack_unused, scope_flags, n_pos_args, n_kwonly_args, n_def_args;
93+
MP_BC_PRELUDE_SIG_DECODE_INTO(ip, n_state, n_exc_stack_unused, scope_flags, n_pos_args, n_kwonly_args, n_def_args);
9294
size_t n_exc_stack = 0;
9395

9496
// Allocate the generator object, with room for local stack and exception stack

py/persistentcode.c

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -157,26 +157,23 @@ typedef struct _bytecode_prelude_t {
157157
uint code_info_size;
158158
} bytecode_prelude_t;
159159

160-
#if MICROPY_PERSISTENT_CODE_SAVE || MICROPY_EMIT_MACHINE_CODE
161-
162160
// ip will point to start of opcodes
163161
// ip2 will point to simple_name, source_file qstrs
164162
STATIC void extract_prelude(const byte **ip, const byte **ip2, bytecode_prelude_t *prelude) {
165-
prelude->n_state = mp_decode_uint(ip);
166-
prelude->n_exc_stack = mp_decode_uint(ip);
167-
prelude->scope_flags = *(*ip)++;
168-
prelude->n_pos_args = *(*ip)++;
169-
prelude->n_kwonly_args = *(*ip)++;
170-
prelude->n_def_pos_args = *(*ip)++;
163+
MP_BC_PRELUDE_SIG_DECODE(*ip);
164+
prelude->n_state = n_state;
165+
prelude->n_exc_stack = n_exc_stack;
166+
prelude->scope_flags = scope_flags;
167+
prelude->n_pos_args = n_pos_args;
168+
prelude->n_kwonly_args = n_kwonly_args;
169+
prelude->n_def_pos_args = n_def_pos_args;
171170
*ip2 = *ip;
172171
prelude->code_info_size = mp_decode_uint(ip2);
173172
*ip += prelude->code_info_size;
174173
while (*(*ip)++ != 255) {
175174
}
176175
}
177176

178-
#endif
179-
180177
#endif // MICROPY_PERSISTENT_CODE_LOAD || MICROPY_PERSISTENT_CODE_SAVE
181178

182179
#if MICROPY_PERSISTENT_CODE_LOAD
@@ -285,19 +282,19 @@ STATIC mp_obj_t load_obj(mp_reader_t *reader) {
285282
}
286283

287284
STATIC void load_prelude(mp_reader_t *reader, byte **ip, byte **ip2, bytecode_prelude_t *prelude) {
288-
prelude->n_state = read_uint(reader, ip);
289-
prelude->n_exc_stack = read_uint(reader, ip);
290-
read_bytes(reader, *ip, 4);
291-
prelude->scope_flags = *(*ip)++;
292-
prelude->n_pos_args = *(*ip)++;
293-
prelude->n_kwonly_args = *(*ip)++;
294-
prelude->n_def_pos_args = *(*ip)++;
295-
*ip2 = *ip;
296-
prelude->code_info_size = read_uint(reader, ip2);
297-
read_bytes(reader, *ip2, prelude->code_info_size - (*ip2 - *ip));
298-
*ip += prelude->code_info_size;
299-
while ((*(*ip)++ = read_byte(reader)) != 255) {
285+
// Read in the prelude
286+
byte *ip_read = *ip;
287+
read_uint(reader, &ip_read); // read in n_state/etc (is effectively a var-uint)
288+
byte *ip_read_save = ip_read;
289+
size_t code_info_size = read_uint(reader, &ip_read); // read in code_info_size
290+
code_info_size -= ip_read - ip_read_save; // subtract bytes taken by code_info_size itself
291+
read_bytes(reader, ip_read, code_info_size); // read remaining code info
292+
ip_read += code_info_size;
293+
while ((*ip_read++ = read_byte(reader)) != 255) {
300294
}
295+
296+
// Entire prelude has been read into *ip, now decode and extract values from it
297+
extract_prelude((const byte**)ip, (const byte**)ip2, prelude);
301298
}
302299

303300
STATIC void load_bytecode(mp_reader_t *reader, qstr_window_t *qw, byte *ip, byte *ip_top) {

py/profile.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,13 @@ STATIC uint mp_prof_bytecode_lineno(const mp_raw_code_t *rc, size_t bc) {
4040
void mp_prof_extract_prelude(const byte *bytecode, mp_bytecode_prelude_t *prelude) {
4141
const byte *ip = bytecode;
4242

43-
prelude->n_state = mp_decode_uint(&ip);
44-
prelude->n_exc_stack = mp_decode_uint(&ip);
45-
prelude->scope_flags = *ip++;
46-
prelude->n_pos_args = *ip++;
47-
prelude->n_kwonly_args = *ip++;
48-
prelude->n_def_pos_args = *ip++;
43+
MP_BC_PRELUDE_SIG_DECODE(ip);
44+
prelude->n_state = n_state;
45+
prelude->n_exc_stack = n_exc_stack;
46+
prelude->scope_flags = scope_flags;
47+
prelude->n_pos_args = n_pos_args;
48+
prelude->n_kwonly_args = n_kwonly_args;
49+
prelude->n_def_pos_args = n_def_pos_args;
4950

5051
const byte *code_info = ip;
5152
size_t code_info_size = mp_decode_uint(&ip);

py/runtime0.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@
2828

2929
// The first four must fit in 8 bits, see emitbc.c
3030
// The remaining must fit in 16 bits, see scope.h
31-
#define MP_SCOPE_FLAG_VARARGS (0x01)
31+
#define MP_SCOPE_FLAG_GENERATOR (0x01)
3232
#define MP_SCOPE_FLAG_VARKEYWORDS (0x02)
33-
#define MP_SCOPE_FLAG_GENERATOR (0x04)
33+
#define MP_SCOPE_FLAG_VARARGS (0x04)
3434
#define MP_SCOPE_FLAG_DEFKWARGS (0x08)
3535
#define MP_SCOPE_FLAG_REFGLOBALS (0x10) // used only if native emitter enabled
3636
#define MP_SCOPE_FLAG_HASCONSTS (0x20) // used only if native emitter enabled

py/showbc.c

Lines changed: 4 additions & 9 deletions

0 commit comments

Comments
 (0)