Skip to content

Commit 1f5ba69

Browse files
agattidpgeorge
authored andcommitted
py/asmthumb: Extend load/store generators with ARMv7-M opcodes.
This commit lets the Thumb native code generator backend emit ARMv7-M specific opcodes for indexed load/store operations if possible. Now T3 opcode encodings are used if the generator backend is configured to allow emitting ARMv7-M opcodes and if the (unsigned) scaled index fits in 12 bits. Or, in other words, LDR{B,H}.W and STR{B,H}.W opcodes are now emitted if possible. Signed-off-by: Alessandro Gatti <[email protected]>
1 parent 78ee1ba commit 1f5ba69

File tree

3 files changed

+57
-79
lines changed

3 files changed

+57
-79
lines changed

py/asmthumb.c

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#define UNSIGNED_FIT5(x) ((uint32_t)(x) < 32)
4141
#define UNSIGNED_FIT7(x) ((uint32_t)(x) < 128)
4242
#define UNSIGNED_FIT8(x) (((x) & 0xffffff00) == 0)
43+
#define UNSIGNED_FIT12(x) (((x) & 0xfffff000) == 0)
4344
#define UNSIGNED_FIT16(x) (((x) & 0xffff0000) == 0)
4445
#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
4546
#define SIGNED_FIT9(x) (((x) & 0xffffff00) == 0) || (((x) & 0xffffff00) == 0xffffff00)
@@ -52,12 +53,6 @@
5253
#define OP_SUB_W_RRI_HI(reg_src) (0xf2a0 | (reg_src))
5354
#define OP_SUB_W_RRI_LO(reg_dest, imm11) ((imm11 << 4 & 0x7000) | reg_dest << 8 | (imm11 & 0xff))
5455

55-
#define OP_LDR_W_HI(reg_base) (0xf8d0 | (reg_base))
56-
#define OP_LDR_W_LO(reg_dest, imm12) ((reg_dest) << 12 | (imm12))
57-
58-
#define OP_LDRH_W_HI(reg_base) (0xf8b0 | (reg_base))
59-
#define OP_LDRH_W_LO(reg_dest, imm12) ((reg_dest) << 12 | (imm12))
60-
6156
static inline byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int n) {
6257
return mp_asm_base_get_cur_to_write_bytes(&as->base, n);
6358
}
@@ -432,11 +427,6 @@ void asm_thumb_mov_reg_pcrel(asm_thumb_t *as, uint rlo_dest, uint label) {
432427
asm_thumb_add_reg_reg(as, rlo_dest, ASM_THUMB_REG_R15); // 2 bytes
433428
}
434429

435-
// ARMv7-M only
436-
static inline void asm_thumb_ldr_reg_reg_i12(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset) {
437-
asm_thumb_op32(as, OP_LDR_W_HI(reg_base), OP_LDR_W_LO(reg_dest, word_offset * 4));
438-
}
439-
440430
// emits code for: reg_dest = reg_base + offset << offset_shift
441431
static void asm_thumb_add_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint reg_base, uint offset, uint offset_shift) {
442432
if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8) {
@@ -464,30 +454,44 @@ static void asm_thumb_add_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint re
464454
}
465455
}
466456

467-
void asm_thumb_ldr_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset) {
468-
if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8 && UNSIGNED_FIT5(word_offset)) {
469-
asm_thumb_ldr_rlo_rlo_i5(as, reg_dest, reg_base, word_offset);
470-
} else if (asm_thumb_allow_armv7m(as)) {
471-
asm_thumb_ldr_reg_reg_i12(as, reg_dest, reg_base, word_offset);
457+
#define OP_LDR_STR_W_HI(shift, reg) ((0xf880 | (shift) << 5) | (reg))
458+
#define OP_LDR_STR_W_LO(reg, imm12) (((reg) << 12) | (imm12))
459+
460+
#define OP_LDR 0x01
461+
#define OP_STR 0x00
462+
463+
#define OP_LDR_W 0x10
464+
#define OP_STR_W 0x00
465+
466+
static const uint8_t OP_LDR_STR_TABLE[3] = {
467+
0x0E, 0x10, 0x0C
468+
};
469+
470+
void asm_thumb_load_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint reg_base, uint offset, uint shift) {
471+
if (UNSIGNED_FIT5(offset) && (reg_dest < ASM_THUMB_REG_R8) && (reg_base < ASM_THUMB_REG_R8)) {
472+
// Can use T1 encoding
473+
asm_thumb_op16(as, ((OP_LDR_STR_TABLE[shift] | OP_LDR) << 11) | (offset << 6) | (reg_base << 3) | reg_dest);
474+
} else if (asm_thumb_allow_armv7m(as) && UNSIGNED_FIT12(offset << shift)) {
475+
// Can use T3 encoding
476+
asm_thumb_op32(as, (OP_LDR_STR_W_HI(shift, reg_base) | OP_LDR_W), OP_LDR_STR_W_LO(reg_dest, (offset << shift)));
472477
} else {
473-
asm_thumb_add_reg_reg_offset(as, reg_dest, reg_base, word_offset - 31, 2);
474-
asm_thumb_ldr_rlo_rlo_i5(as, reg_dest, reg_dest, 31);
478+
// Must use the generic sequence
479+
asm_thumb_add_reg_reg_offset(as, reg_dest, reg_base, offset - 31, shift);
480+
asm_thumb_op16(as, ((OP_LDR_STR_TABLE[shift] | OP_LDR) << 11) | (31 << 6) | (reg_dest << 3) | (reg_dest));
475481
}
476482
}
477483

478-
// ARMv7-M only
479-
static inline void asm_thumb_ldrh_reg_reg_i12(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset) {
480-
asm_thumb_op32(as, OP_LDRH_W_HI(reg_base), OP_LDRH_W_LO(reg_dest, uint16_offset * 2));
481-
}
482-
483-
void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset) {
484-
if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8 && UNSIGNED_FIT5(uint16_offset)) {
485-
asm_thumb_ldrh_rlo_rlo_i5(as, reg_dest, reg_base, uint16_offset);
486-
} else if (asm_thumb_allow_armv7m(as)) {
487-
asm_thumb_ldrh_reg_reg_i12(as, reg_dest, reg_base, uint16_offset);
484+
void asm_thumb_store_reg_reg_offset(asm_thumb_t *as, uint reg_src, uint reg_base, uint offset, uint shift) {
485+
if (UNSIGNED_FIT5(offset) && (reg_src < ASM_THUMB_REG_R8) && (reg_base < ASM_THUMB_REG_R8)) {
486+
// Can use T1 encoding
487+
asm_thumb_op16(as, ((OP_LDR_STR_TABLE[shift] | OP_STR) << 11) | (offset << 6) | (reg_base << 3) | reg_src);
488+
} else if (asm_thumb_allow_armv7m(as) && UNSIGNED_FIT12(offset << shift)) {
489+
// Can use T3 encoding
490+
asm_thumb_op32(as, (OP_LDR_STR_W_HI(shift, reg_base) | OP_STR_W), OP_LDR_STR_W_LO(reg_src, (offset << shift)));
488491
} else {
489-
asm_thumb_add_reg_reg_offset(as, reg_dest, reg_base, uint16_offset - 31, 1);
490-
asm_thumb_ldrh_rlo_rlo_i5(as, reg_dest, reg_dest, 31);
492+
// Must use the generic sequence
493+
asm_thumb_add_reg_reg_offset(as, reg_base, reg_base, offset - 31, shift);
494+
asm_thumb_op16(as, ((OP_LDR_STR_TABLE[shift] | OP_STR) << 11) | (31 << 6) | (reg_base << 3) | reg_src);
491495
}
492496
}
493497

@@ -569,7 +573,7 @@ void asm_thumb_b_rel12(asm_thumb_t *as, int rel) {
569573

570574
void asm_thumb_bl_ind(asm_thumb_t *as, uint fun_id, uint reg_temp) {
571575
// Load ptr to function from table, indexed by fun_id, then call it
572-
asm_thumb_ldr_reg_reg_i12_optimised(as, reg_temp, ASM_THUMB_REG_FUN_TABLE, fun_id);
576+
asm_thumb_load_reg_reg_offset(as, reg_temp, ASM_THUMB_REG_FUN_TABLE, fun_id, 2);
573577
asm_thumb_op16(as, OP_BLX(reg_temp));
574578
}
575579

py/asmthumb.h

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -382,8 +382,10 @@ void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num); //
382382
void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint rlo_dest, int local_num); // convenience
383383
void asm_thumb_mov_reg_pcrel(asm_thumb_t *as, uint rlo_dest, uint label);
384384

385-
void asm_thumb_ldr_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset); // convenience
386-
void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset); // convenience
385+
// Generate optimised load dest, [src, #offset] sequence
386+
void asm_thumb_load_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint reg_base, uint offset, uint shift);
387+
// Generate optimised store src, [dest, #offset] sequence
388+
void asm_thumb_store_reg_reg_offset(asm_thumb_t *as, uint reg_src, uint reg_base, uint offset, uint shift);
387389

388390
void asm_thumb_b_label(asm_thumb_t *as, uint label); // convenience: picks narrow or wide branch
389391
void asm_thumb_bcc_label(asm_thumb_t *as, int cc, uint label); // convenience: picks narrow or wide branch
@@ -463,17 +465,20 @@ void asm_thumb_b_rel12(asm_thumb_t *as, int rel);
463465
#define ASM_MUL_REG_REG(as, reg_dest, reg_src) asm_thumb_format_4((as), ASM_THUMB_FORMAT_4_MUL, (reg_dest), (reg_src))
464466

465467
#define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), (word_offset))
466-
#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_thumb_ldrb_rlo_rlo_i5((as), (reg_dest), (reg_base), 0)
467-
#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_thumb_ldrh_rlo_rlo_i5((as), (reg_dest), (reg_base), 0)
468-
#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, uint16_offset) asm_thumb_ldrh_reg_reg_i12_optimised((as), (reg_dest), (reg_base), (uint16_offset))
469-
#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_thumb_ldr_rlo_rlo_i5((as), (reg_dest), (reg_base), 0)
470-
#define ASM_LOAD32_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_thumb_ldr_reg_reg_i12_optimised((as), (reg_dest), (reg_base), (word_offset))
468+
#define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) ASM_LOAD8_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0)
469+
#define ASM_LOAD8_REG_REG_OFFSET(as, reg_dest, reg_base, byte_offset) asm_thumb_load_reg_reg_offset((as), (reg_dest), (reg_base), (byte_offset), 0)
470+
#define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) ASM_LOAD16_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0)
471+
#define ASM_LOAD16_REG_REG_OFFSET(as, reg_dest, reg_base, halfword_offset) asm_thumb_load_reg_reg_offset((as), (reg_dest), (reg_base), (halfword_offset), 1)
472+
#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) ASM_LOAD32_REG_REG_OFFSET((as), (reg_dest), (reg_base), 0)
473+
#define ASM_LOAD32_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_thumb_load_reg_reg_offset((as), (reg_dest), (reg_base), (word_offset), 2)
471474

472475
#define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), (word_offset))
473-
#define ASM_STORE8_REG_REG(as, reg_src, reg_base) asm_thumb_strb_rlo_rlo_i5((as), (reg_src), (reg_base), 0)
474-
#define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_thumb_strh_rlo_rlo_i5((as), (reg_src), (reg_base), 0)
475-
#define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), 0)
476-
#define ASM_STORE32_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), (word_offset))
476+
#define ASM_STORE8_REG_REG(as, reg_src, reg_base) ASM_STORE8_REG_REG_OFFSET((as), (reg_src), (reg_base), 0)
477+
#define ASM_STORE8_REG_REG_OFFSET(as, reg_src, reg_base, byte_offset) asm_thumb_store_reg_reg_offset((as), (reg_src), (reg_base), (byte_offset), 0)
478+
#define ASM_STORE16_REG_REG(as, reg_src, reg_base) ASM_STORE16_REG_REG_OFFSET((as), (reg_src), (reg_base), 0)
479+
#define ASM_STORE16_REG_REG_OFFSET(as, reg_src, reg_base, halfword_offset) asm_thumb_store_reg_reg_offset((as), (reg_src), (reg_base), (halfword_offset), 1)
480+
#define ASM_STORE32_REG_REG(as, reg_src, reg_base) ASM_STORE32_REG_REG_OFFSET((as), (reg_src), (reg_base), 0)
481+
#define ASM_STORE32_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_thumb_store_reg_reg_offset((as), (reg_src), (reg_base), (word_offset), 2)
477482

478483
#define ASM_LOAD8_REG_REG_REG(as, reg_dest, reg_base, reg_index) asm_thumb_ldrb_rlo_rlo_rlo((as), (reg_dest), (reg_base), (reg_index))
479484
#define ASM_LOAD16_REG_REG_REG(as, reg_dest, reg_base, reg_index) \

py/emitnative.c

Lines changed: 6 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1540,12 +1540,7 @@ static void emit_native_load_subscr(emit_t *emit) {
15401540
#ifdef ASM_LOAD8_REG_REG_OFFSET
15411541
ASM_LOAD8_REG_REG_OFFSET(emit->as, REG_RET, reg_base, index_value);
15421542
#else
1543-
#if N_THUMB
1544-
if (index_value >= 0 && index_value < 32) {
1545-
asm_thumb_ldrb_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value);
1546-
break;
1547-
}
1548-
#elif N_RV32
1543+
#if N_RV32
15491544
if (FIT_SIGNED(index_value, 12)) {
15501545
asm_rv32_opcode_lbu(emit->as, REG_RET, reg_base, index_value);
15511546
break;
@@ -1572,12 +1567,7 @@ static void emit_native_load_subscr(emit_t *emit) {
15721567
#ifdef ASM_LOAD16_REG_REG_OFFSET
15731568
ASM_LOAD16_REG_REG_OFFSET(emit->as, REG_RET, reg_base, index_value);
15741569
#else
1575-
#if N_THUMB
1576-
if (index_value >= 0 && index_value < 32) {
1577-
asm_thumb_ldrh_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value);
1578-
break;
1579-
}
1580-
#elif N_RV32
1570+
#if N_RV32
15811571
if (FIT_SIGNED(index_value, 11)) {
15821572
asm_rv32_opcode_lhu(emit->as, REG_RET, reg_base, index_value << 1);
15831573
break;
@@ -1604,12 +1594,7 @@ static void emit_native_load_subscr(emit_t *emit) {
16041594
#ifdef ASM_LOAD32_REG_REG_OFFSET
16051595
ASM_LOAD32_REG_REG_OFFSET(emit->as, REG_RET, reg_base, index_value);
16061596
#else
1607-
#if N_THUMB
1608-
if (index_value >= 0 && index_value < 32) {
1609-
asm_thumb_ldr_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value);
1610-
break;
1611-
}
1612-
#elif N_RV32
1597+
#if N_RV32
16131598
if (FIT_SIGNED(index_value, 10)) {
16141599
asm_rv32_opcode_lw(emit->as, REG_RET, reg_base, index_value << 2);
16151600
break;
@@ -1824,13 +1809,7 @@ static void emit_native_store_subscr(emit_t *emit) {
18241809
#ifdef ASM_STORE8_REG_REG_OFFSET
18251810
ASM_STORE8_REG_REG_OFFSET(emit->as, reg_value, reg_base, index_value);
18261811
#else
1827-
// TODO optimise to use thumb strb r1, [r2, r3]
1828-
#if N_THUMB
1829-
if (index_value >= 0 && index_value < 32) {
1830-
asm_thumb_strb_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value);
1831-
break;
1832-
}
1833-
#elif N_RV32
1812+
#if N_RV32
18341813
if (FIT_SIGNED(index_value, 12)) {
18351814
asm_rv32_opcode_sb(emit->as, reg_value, reg_base, index_value);
18361815
break;
@@ -1860,12 +1839,7 @@ static void emit_native_store_subscr(emit_t *emit) {
18601839
#ifdef ASM_STORE16_REG_REG_OFFSET
18611840
ASM_STORE16_REG_REG_OFFSET(emit->as, reg_value, reg_base, index_value);
18621841
#else
1863-
#if N_THUMB
1864-
if (index_value >= 0 && index_value < 32) {
1865-
asm_thumb_strh_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value);
1866-
break;
1867-
}
1868-
#elif N_RV32
1842+
#if N_RV32
18691843
if (FIT_SIGNED(index_value, 11)) {
18701844
asm_rv32_opcode_sh(emit->as, reg_value, reg_base, index_value << 1);
18711845
break;
@@ -1891,12 +1865,7 @@ static void emit_native_store_subscr(emit_t *emit) {
18911865
#ifdef ASM_STORE32_REG_REG_OFFSET
18921866
ASM_STORE32_REG_REG_OFFSET(emit->as, reg_value, reg_base, index_value);
18931867
#else
1894-
#if N_THUMB
1895-
if (index_value >= 0 && index_value < 32) {
1896-
asm_thumb_str_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value);
1897-
break;
1898-
}
1899-
#elif N_RV32
1868+
#if N_RV32
19001869
if (FIT_SIGNED(index_value, 10)) {
19011870
asm_rv32_opcode_sw(emit->as, reg_value, reg_base, index_value << 2);
19021871
break;

0 commit comments

Comments
 (0)