2828#include <stdint.h>
2929#include <stdio.h>
3030#include <string.h>
31+ #include <unistd.h>
3132#include <assert.h>
3233
3334#include "py/mpstate.h"
@@ -55,8 +56,8 @@ struct _emit_t {
5556 mp_uint_t last_source_line_offset ;
5657 mp_uint_t last_source_line ;
5758
58- mp_uint_t max_num_labels ;
59- mp_uint_t * label_offsets ;
59+ size_t max_num_labels ;
60+ size_t * label_offsets ;
6061
6162 size_t code_info_offset ;
6263 size_t code_info_size ;
@@ -76,11 +77,11 @@ emit_t *emit_bc_new(mp_emit_common_t *emit_common) {
7677
7778void emit_bc_set_max_num_labels (emit_t * emit , mp_uint_t max_num_labels ) {
7879 emit -> max_num_labels = max_num_labels ;
79- emit -> label_offsets = m_new (mp_uint_t , emit -> max_num_labels );
80+ emit -> label_offsets = m_new (size_t , emit -> max_num_labels );
8081}
8182
8283void emit_bc_free (emit_t * emit ) {
83- m_del (mp_uint_t , emit -> label_offsets , emit -> max_num_labels );
84+ m_del (size_t , emit -> label_offsets , emit -> max_num_labels );
8485 m_del_obj (emit_t , emit );
8586}
8687
@@ -213,34 +214,55 @@ STATIC void emit_write_bytecode_byte_child(emit_t *emit, int stack_adj, byte b,
213214 #endif
214215}
215216
216- // unsigned labels are relative to ip following this instruction, stored as 16 bits
217- STATIC void emit_write_bytecode_byte_unsigned_label (emit_t * emit , int stack_adj , byte b1 , mp_uint_t label ) {
217+ // Emit a jump opcode to a destination label.
218+ // The offset to the label is relative to the ip following this instruction.
219+ // The offset is encoded as either 1 or 2 bytes, depending on how big it is.
220+ // The encoding of this jump opcode can change size from one pass to the next,
221+ // but it must only ever decrease in size on successive passes.
222+ STATIC void emit_write_bytecode_byte_label (emit_t * emit , int stack_adj , byte b1 , mp_uint_t label ) {
218223 mp_emit_bc_adjust_stack_size (emit , stack_adj );
219- mp_uint_t bytecode_offset ;
220- if (emit -> pass < MP_PASS_EMIT ) {
221- bytecode_offset = 0 ;
222- } else {
223- bytecode_offset = emit -> label_offsets [label ] - emit -> bytecode_offset - 3 ;
224+
225+ // Determine if the jump offset is signed or unsigned, based on the opcode.
226+ const bool is_signed = b1 <= MP_BC_JUMP_IF_FALSE_OR_POP ;
227+
228+ // Default to a 2-byte encoding (the largest) with an unknown jump offset.
229+ unsigned int jump_encoding_size = 1 ;
230+ ssize_t bytecode_offset = 0 ;
231+
232+ // Compute the jump size and offset only when code size is known.
233+ if (emit -> pass >= MP_PASS_CODE_SIZE ) {
234+ // The -2 accounts for this jump opcode taking 2 bytes (at least).
235+ bytecode_offset = emit -> label_offsets [label ] - emit -> bytecode_offset - 2 ;
236+
237+ // Check if the bytecode_offset is small enough to use a 1-byte encoding.
238+ if ((is_signed && -64 <= bytecode_offset && bytecode_offset <= 63 )
239+ || (!is_signed && (size_t )bytecode_offset <= 127 )) {
240+ // Use a 1-byte jump offset.
241+ jump_encoding_size = 0 ;
242+ }
243+
244+ // Adjust the offset depending on the size of the encoding of the offset.
245+ bytecode_offset -= jump_encoding_size ;
246+
247+ assert (is_signed || bytecode_offset >= 0 );
224248 }
225- byte * c = emit_get_cur_to_write_bytecode (emit , 3 );
226- c [0 ] = b1 ;
227- c [1 ] = bytecode_offset ;
228- c [2 ] = bytecode_offset >> 8 ;
229- }
230249
231- // signed labels are relative to ip following this instruction, stored as 16 bits, in excess
232- STATIC void emit_write_bytecode_byte_signed_label (emit_t * emit , int stack_adj , byte b1 , mp_uint_t label ) {
233- mp_emit_bc_adjust_stack_size (emit , stack_adj );
234- int bytecode_offset ;
235- if (emit -> pass < MP_PASS_EMIT ) {
236- bytecode_offset = 0 ;
250+ // Emit the opcode.
251+ byte * c = emit_get_cur_to_write_bytecode (emit , 2 + jump_encoding_size );
252+ c [0 ] = b1 ;
253+ if (jump_encoding_size == 0 ) {
254+ if (is_signed ) {
255+ bytecode_offset += 0x40 ;
256+ }
257+ assert (0 <= bytecode_offset && bytecode_offset <= 0x7f );
258+ c [1 ] = bytecode_offset ;
237259 } else {
238- bytecode_offset = emit -> label_offsets [label ] - emit -> bytecode_offset - 3 + 0x8000 ;
260+ if (is_signed ) {
261+ bytecode_offset += 0x4000 ;
262+ }
263+ c [1 ] = 0x80 | (bytecode_offset & 0x7f );
264+ c [2 ] = bytecode_offset >> 7 ;
239265 }
240- byte * c = emit_get_cur_to_write_bytecode (emit , 3 );
241- c [0 ] = b1 ;
242- c [1 ] = bytecode_offset ;
243- c [2 ] = bytecode_offset >> 8 ;
244266}
245267
246268void mp_emit_bc_start_pass (emit_t * emit , pass_kind_t pass , scope_t * scope ) {
@@ -250,12 +272,6 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
250272 emit -> scope = scope ;
251273 emit -> last_source_line_offset = 0 ;
252274 emit -> last_source_line = 1 ;
253- #ifndef NDEBUG
254- // With debugging enabled labels are checked for unique assignment
255- if (pass < MP_PASS_EMIT && emit -> label_offsets != NULL ) {
256- memset (emit -> label_offsets , -1 , emit -> max_num_labels * sizeof (mp_uint_t ));
257- }
258- #endif
259275 emit -> bytecode_offset = 0 ;
260276 emit -> code_info_offset = 0 ;
261277
@@ -315,9 +331,9 @@ void mp_emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
315331 }
316332}
317333
318- void mp_emit_bc_end_pass (emit_t * emit ) {
334+ bool mp_emit_bc_end_pass (emit_t * emit ) {
319335 if (emit -> pass == MP_PASS_SCOPE ) {
320- return ;
336+ return true ;
321337 }
322338
323339 // check stack is back to zero size
@@ -344,6 +360,20 @@ void mp_emit_bc_end_pass(emit_t *emit) {
344360 emit -> code_base = m_new0 (byte , emit -> code_info_size + emit -> bytecode_size );
345361
346362 } else if (emit -> pass == MP_PASS_EMIT ) {
363+ // Code info and/or bytecode can shrink during this pass.
364+ assert (emit -> code_info_offset <= emit -> code_info_size );
365+ assert (emit -> bytecode_offset <= emit -> bytecode_size );
366+
367+ if (emit -> code_info_offset != emit -> code_info_size
368+ || emit -> bytecode_offset != emit -> bytecode_size ) {
369+ // Code info and/or bytecode changed size in this pass, so request the
370+ // compiler to do another pass with these updated sizes.
371+ emit -> code_info_size = emit -> code_info_offset ;
372+ emit -> bytecode_size = emit -> bytecode_offset ;
373+ return false;
374+ }
375+
376+ // Bytecode is finalised, assign it to the raw code object.
347377 mp_emit_glue_assign_bytecode (emit -> scope -> raw_code , emit -> code_base ,
348378 #if MICROPY_PERSISTENT_CODE_SAVE || MICROPY_DEBUG_PRINTERS
349379 emit -> code_info_size + emit -> bytecode_size ,
@@ -354,6 +384,8 @@ void mp_emit_bc_end_pass(emit_t *emit) {
354384 #endif
355385 emit -> scope -> scope_flags );
356386 }
387+
388+ return true;
357389}
358390
359391bool mp_emit_bc_last_emit_was_return_value (emit_t * emit ) {
@@ -396,15 +428,16 @@ void mp_emit_bc_label_assign(emit_t *emit, mp_uint_t l) {
396428 if (emit -> pass == MP_PASS_SCOPE ) {
397429 return ;
398430 }
431+
432+ // Label offsets can change from one pass to the next, but they must only
433+ // decrease (ie code can only shrink). There will be multiple MP_PASS_EMIT
434+ // stages until the labels no longer change, which is when the code size
435+ // stays constant after a MP_PASS_EMIT.
399436 assert (l < emit -> max_num_labels );
400- if (emit -> pass < MP_PASS_EMIT ) {
401- // assign label offset
402- assert (emit -> label_offsets [l ] == (mp_uint_t )- 1 );
403- emit -> label_offsets [l ] = emit -> bytecode_offset ;
404- } else {
405- // ensure label offset has not changed from MP_PASS_CODE_SIZE to MP_PASS_EMIT
406- assert (emit -> label_offsets [l ] == emit -> bytecode_offset );
407- }
437+ assert (emit -> pass == MP_PASS_STACK_SIZE || emit -> bytecode_offset <= emit -> label_offsets [l ]);
438+
439+ // Assign label offset.
440+ emit -> label_offsets [l ] = emit -> bytecode_offset ;
408441}
409442
410443void mp_emit_bc_import (emit_t * emit , qstr qst , int kind ) {
@@ -552,22 +585,22 @@ void mp_emit_bc_rot_three(emit_t *emit) {
552585}
553586
554587void mp_emit_bc_jump (emit_t * emit , mp_uint_t label ) {
555- emit_write_bytecode_byte_signed_label (emit , 0 , MP_BC_JUMP , label );
588+ emit_write_bytecode_byte_label (emit , 0 , MP_BC_JUMP , label );
556589}
557590
558591void mp_emit_bc_pop_jump_if (emit_t * emit , bool cond , mp_uint_t label ) {
559592 if (cond ) {
560- emit_write_bytecode_byte_signed_label (emit , -1 , MP_BC_POP_JUMP_IF_TRUE , label );
593+ emit_write_bytecode_byte_label (emit , -1 , MP_BC_POP_JUMP_IF_TRUE , label );
561594 } else {
562- emit_write_bytecode_byte_signed_label (emit , -1 , MP_BC_POP_JUMP_IF_FALSE , label );
595+ emit_write_bytecode_byte_label (emit , -1 , MP_BC_POP_JUMP_IF_FALSE , label );
563596 }
564597}
565598
566599void mp_emit_bc_jump_if_or_pop (emit_t * emit , bool cond , mp_uint_t label ) {
567600 if (cond ) {
568- emit_write_bytecode_byte_signed_label (emit , -1 , MP_BC_JUMP_IF_TRUE_OR_POP , label );
601+ emit_write_bytecode_byte_label (emit , -1 , MP_BC_JUMP_IF_TRUE_OR_POP , label );
569602 } else {
570- emit_write_bytecode_byte_signed_label (emit , -1 , MP_BC_JUMP_IF_FALSE_OR_POP , label );
603+ emit_write_bytecode_byte_label (emit , -1 , MP_BC_JUMP_IF_FALSE_OR_POP , label );
571604 }
572605}
573606
@@ -581,9 +614,9 @@ void mp_emit_bc_unwind_jump(emit_t *emit, mp_uint_t label, mp_uint_t except_dept
581614 emit_write_bytecode_raw_byte (emit , MP_BC_POP_TOP );
582615 }
583616 }
584- emit_write_bytecode_byte_signed_label (emit , 0 , MP_BC_JUMP , label & ~MP_EMIT_BREAK_FROM_FOR );
617+ emit_write_bytecode_byte_label (emit , 0 , MP_BC_JUMP , label & ~MP_EMIT_BREAK_FROM_FOR );
585618 } else {
586- emit_write_bytecode_byte_signed_label (emit , 0 , MP_BC_UNWIND_JUMP , label & ~MP_EMIT_BREAK_FROM_FOR );
619+ emit_write_bytecode_byte_label (emit , 0 , MP_BC_UNWIND_JUMP , label & ~MP_EMIT_BREAK_FROM_FOR );
587620 emit_write_bytecode_raw_byte (emit , ((label & MP_EMIT_BREAK_FROM_FOR ) ? 0x80 : 0 ) | except_depth );
588621 }
589622}
@@ -595,7 +628,7 @@ void mp_emit_bc_setup_block(emit_t *emit, mp_uint_t label, int kind) {
595628 // The SETUP_WITH opcode pops ctx_mgr from the top of the stack
596629 // and then pushes 3 entries: __exit__, ctx_mgr, as_value.
597630 int stack_adj = kind == MP_EMIT_SETUP_BLOCK_WITH ? 2 : 0 ;
598- emit_write_bytecode_byte_unsigned_label (emit , stack_adj , MP_BC_SETUP_WITH + kind , label );
631+ emit_write_bytecode_byte_label (emit , stack_adj , MP_BC_SETUP_WITH + kind , label );
599632}
600633
601634void mp_emit_bc_with_cleanup (emit_t * emit , mp_uint_t label ) {
@@ -617,7 +650,7 @@ void mp_emit_bc_get_iter(emit_t *emit, bool use_stack) {
617650}
618651
619652void mp_emit_bc_for_iter (emit_t * emit , mp_uint_t label ) {
620- emit_write_bytecode_byte_unsigned_label (emit , 1 , MP_BC_FOR_ITER , label );
653+ emit_write_bytecode_byte_label (emit , 1 , MP_BC_FOR_ITER , label );
621654}
622655
623656void mp_emit_bc_for_iter_end (emit_t * emit ) {
@@ -626,7 +659,7 @@ void mp_emit_bc_for_iter_end(emit_t *emit) {
626659
627660void mp_emit_bc_pop_except_jump (emit_t * emit , mp_uint_t label , bool within_exc_handler ) {
628661 (void )within_exc_handler ;
629- emit_write_bytecode_byte_unsigned_label (emit , 0 , MP_BC_POP_EXCEPT_JUMP , label );
662+ emit_write_bytecode_byte_label (emit , 0 , MP_BC_POP_EXCEPT_JUMP , label );
630663}
631664
632665void mp_emit_bc_unary_op (emit_t * emit , mp_unary_op_t op ) {
0 commit comments