py: Change jump opcodes to emit 1-byte jump offset when possible. · csamuelson/circuitpython@538c3c0 · GitHub
Skip to content

Commit 538c3c0

Browse files
committed
py: Change jump opcodes to emit 1-byte jump offset when possible.
This commit introduces changes: - All jump opcodes are changed to have variable length arguments, of either 1 or 2 bytes (previously they were fixed at 2 bytes). In most cases only 1 byte is needed to encode the short jump offset, saving bytecode size. - The bytecode emitter now selects 1 byte jump arguments when the jump offset is guaranteed to fit in 1 byte. This is achieved by checking if the code size changed during the last pass and, if it did (if it shrank), then requesting that the compiler make another pass to get the correct offsets of the now-smaller code. This can continue multiple times until the code stabilises. The code can only ever shrink so this iteration is guaranteed to complete. In most cases no extra passes are needed, the original 4 passes are enough to get it right by the 4th pass (because the 2nd pass computes roughly the correct labels and the 3rd pass computes the correct size for the jump argument). This change to the jump opcode encoding reduces .mpy files and RAM usage (when bytecode is in RAM) by about 2% on average. The performance of the VM is not impacted, at least within measurment of the performance benchmark suite. Code size is reduced for builds that include a decent amount of frozen bytecode. ARM Cortex-M builds without any frozen code increase by about 350 bytes. Signed-off-by: Damien George <damien@micropython.org>
1 parent 9e3e67b commit 538c3c0

14 files changed

Lines changed: 583 additions & 437 deletions

File tree

py/bc.c

Lines changed: 5 additions & 1 deletion

py/bc0.h

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,18 @@
2828

2929
// MicroPython bytecode opcodes, grouped based on the format of the opcode
3030

31+
// All opcodes are encoded as a byte with an optional argument. Arguments are
32+
// variable-length encoded so they can be as small as possible. The possible
33+
// encodings for arguments are (ip[0] is the opcode):
34+
//
35+
// - unsigned relative bytecode offset:
36+
// - if ip[1] high bit is clear then: arg = ip[1]
37+
// - if ip[1] high bit is set then: arg = ip[1] & 0x7f | ip[2] << 7
38+
//
39+
// - signed relative bytecode offset:
40+
// - if ip[1] high bit is clear then: arg = ip[1] - 0x40
41+
// - if ip[1] high bit is set then: arg = (ip[1] & 0x7f | ip[2] << 7) - 0x4000
42+
3143
#define MP_BC_MASK_FORMAT (0xf0)
3244
#define MP_BC_MASK_EXTRA_BYTE (0x9e)
3345

@@ -101,17 +113,17 @@
101113
#define MP_BC_ROT_TWO (MP_BC_BASE_BYTE_O + 0x0a)
102114
#define MP_BC_ROT_THREE (MP_BC_BASE_BYTE_O + 0x0b)
103115

104-
#define MP_BC_JUMP (MP_BC_BASE_JUMP_E + 0x02) // rel byte code offset, 16-bit signed, in excess
105-
#define MP_BC_POP_JUMP_IF_TRUE (MP_BC_BASE_JUMP_E + 0x03) // rel byte code offset, 16-bit signed, in excess
106-
#define MP_BC_POP_JUMP_IF_FALSE (MP_BC_BASE_JUMP_E + 0x04) // rel byte code offset, 16-bit signed, in excess
107-
#define MP_BC_JUMP_IF_TRUE_OR_POP (MP_BC_BASE_JUMP_E + 0x05) // rel byte code offset, 16-bit signed, in excess
108-
#define MP_BC_JUMP_IF_FALSE_OR_POP (MP_BC_BASE_JUMP_E + 0x06) // rel byte code offset, 16-bit signed, in excess
109-
#define MP_BC_UNWIND_JUMP (MP_BC_BASE_JUMP_E + 0x00) // rel byte code offset, 16-bit signed, in excess; then a byte
110-
#define MP_BC_SETUP_WITH (MP_BC_BASE_JUMP_E + 0x07) // rel byte code offset, 16-bit unsigned
111-
#define MP_BC_SETUP_EXCEPT (MP_BC_BASE_JUMP_E + 0x08) // rel byte code offset, 16-bit unsigned
112-
#define MP_BC_SETUP_FINALLY (MP_BC_BASE_JUMP_E + 0x09) // rel byte code offset, 16-bit unsigned
113-
#define MP_BC_POP_EXCEPT_JUMP (MP_BC_BASE_JUMP_E + 0x0a) // rel byte code offset, 16-bit unsigned
114-
#define MP_BC_FOR_ITER (MP_BC_BASE_JUMP_E + 0x0b) // rel byte code offset, 16-bit unsigned
116+
#define MP_BC_UNWIND_JUMP (MP_BC_BASE_JUMP_E + 0x00) // signed relative bytecode offset; then a byte
117+
#define MP_BC_JUMP (MP_BC_BASE_JUMP_E + 0x02) // signed relative bytecode offset
118+
#define MP_BC_POP_JUMP_IF_TRUE (MP_BC_BASE_JUMP_E + 0x03) // signed relative bytecode offset
119+
#define MP_BC_POP_JUMP_IF_FALSE (MP_BC_BASE_JUMP_E + 0x04) // signed relative bytecode offset
120+
#define MP_BC_JUMP_IF_TRUE_OR_POP (MP_BC_BASE_JUMP_E + 0x05) // signed relative bytecode offset
121+
#define MP_BC_JUMP_IF_FALSE_OR_POP (MP_BC_BASE_JUMP_E + 0x06) // signed relative bytecode offset
122+
#define MP_BC_SETUP_WITH (MP_BC_BASE_JUMP_E + 0x07) // unsigned relative bytecode offset
123+
#define MP_BC_SETUP_EXCEPT (MP_BC_BASE_JUMP_E + 0x08) // unsigned relative bytecode offset
124+
#define MP_BC_SETUP_FINALLY (MP_BC_BASE_JUMP_E + 0x09) // unsigned relative bytecode offset
125+
#define MP_BC_POP_EXCEPT_JUMP (MP_BC_BASE_JUMP_E + 0x0a) // unsigned relative bytecode offset
126+
#define MP_BC_FOR_ITER (MP_BC_BASE_JUMP_E + 0x0b) // unsigned relative bytecode offset
115127
#define MP_BC_WITH_CLEANUP (MP_BC_BASE_BYTE_O + 0x0c)
116128
#define MP_BC_END_FINALLY (MP_BC_BASE_BYTE_O + 0x0d)
117129
#define MP_BC_GET_ITER (MP_BC_BASE_BYTE_O + 0x0e)

py/compile.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ STATIC void mp_emit_common_start_pass(mp_emit_common_t *emit, pass_kind_t pass)
219219
} else if (pass > MP_PASS_STACK_SIZE) {
220220
emit->ct_cur_obj = emit->ct_cur_obj_base;
221221
}
222-
if (pass == MP_PASS_EMIT) {
222+
if (pass == MP_PASS_CODE_SIZE) {
223223
if (emit->ct_cur_child == 0) {
224224
emit->children = NULL;
225225
} else {
@@ -3020,7 +3020,7 @@ STATIC void check_for_doc_string(compiler_t *comp, mp_parse_node_t pn) {
30203020
#endif
30213021
}
30223022

3023-
STATIC void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) {
3023+
STATIC bool compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) {
30243024
comp->pass = pass;
30253025
comp->scope_cur = scope;
30263026
comp->next_label = 0;
@@ -3187,10 +3187,12 @@ STATIC void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) {
31873187
EMIT(return_value);
31883188
}
31893189

3190-
EMIT(end_pass);
3190+
bool pass_complete = EMIT(end_pass);
31913191

31923192
// make sure we match all the exception levels
31933193
assert(comp->cur_except_level == 0);
3194+
3195+
return pass_complete;
31943196
}
31953197

31963198
#if MICROPY_EMIT_INLINE_ASM
@@ -3600,8 +3602,10 @@ mp_compiled_module_t mp_compile_to_raw_code(mp_parse_tree_t *parse_tree, qstr so
36003602
}
36013603

36023604
// final pass: emit code
3605+
// the emitter can request multiple of these passes
36033606
if (comp->compile_error == MP_OBJ_NULL) {
3604-
compile_scope(comp, s, MP_PASS_EMIT);
3607+
while (!compile_scope(comp, s, MP_PASS_EMIT)) {
3608+
}
36053609
}
36063610
}
36073611
}

py/emit.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ typedef enum {
4343
MP_PASS_SCOPE = 1, // work out id's and their kind, and number of labels
4444
MP_PASS_STACK_SIZE = 2, // work out maximum stack size
4545
MP_PASS_CODE_SIZE = 3, // work out code size and label offsets
46-
MP_PASS_EMIT = 4, // emit code
46+
MP_PASS_EMIT = 4, // emit code (may be run multiple times if the emitter requests it)
4747
} pass_kind_t;
4848

4949
#define MP_EMIT_STAR_FLAG_SINGLE (0x01)
@@ -116,7 +116,7 @@ typedef struct _emit_method_table_t {
116116
#endif
117117

118118
void (*start_pass)(emit_t *emit, pass_kind_t pass, scope_t *scope);
119-
void (*end_pass)(emit_t *emit);
119+
bool (*end_pass)(emit_t *emit);
120120
bool (*last_emit_was_return_value)(emit_t *emit);
121121
void (*adjust_stack_size)(emit_t *emit, mp_int_t delta);
122122
void (*set_source_line)(emit_t *emit, mp_uint_t line);
@@ -233,7 +233,7 @@ void emit_native_xtensa_free(emit_t *emit);
233233
void emit_native_xtensawin_free(emit_t *emit);
234234

235235
void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope);
236-
void mp_emit_bc_end_pass(emit_t *emit);
236+
bool mp_emit_bc_end_pass(emit_t *emit);
237237
bool mp_emit_bc_last_emit_was_return_value(emit_t *emit);
238238
void mp_emit_bc_adjust_stack_size(emit_t *emit, mp_int_t delta);
239239
void mp_emit_bc_set_source_line(emit_t *emit, mp_uint_t line);

py/emitbc.c

Lines changed: 86 additions & 53 deletions

0 commit comments

Comments
 (0)