1212
1313if 1
1414
15- ; Optimized for the no-overlap case
15+ ; Optimized for when src != dst
1616_memmove:
17- ; no-overlap | LDIR | 32F + 12R + 1
18- ; src > dst | LDIR | 32F + 12R + 1
19- ; src < dst | LDDR | 36F + 9R + 2
20- ; src = dst | LDDR | 36F + 9R + 2
21- ; zero size | | 24F + 9R + 2
17+ ; src > dst | LDIR | 32F + 12R + 1
18+ ; src < dst | LDDR | 35F + 9R + 2
19+ ; src = dst | LDDR | 35F + 9R + 2
20+ ; zero size | | 24F + 9R + 2
2221
2322 ld iy, -1
2423 add iy, sp
@@ -34,35 +33,31 @@ _memmove:
3433; .copy_forwards:
3534 add hl, de
3635 inc hl
37- ; src > dst or no-overlap
36+ ; src > dst
3837 ldir
3938.zero:
4039 ld hl, (iy + 4)
4140 ret
4241.copy_backwards:
4342 ; move HL and DE to the end
44- add hl, de ; HL = src - 1
45- dec de ; DE = dst - 1
46- add hl, bc
4743 ex de, hl
4844 add hl, bc
49- ex de, hl
50- ; HL = src + size - 1
51- ; DE = dst + size - 1
45+ ex de, hl ; HL = src - dst - 1, DE = dst + size
46+ add hl, de ; HL = src + size - 1
47+ dec de ; DE = dst + size - 1
5248 lddr
5349 ex de, hl
5450 inc hl
5551 ret
5652
5753else
5854
59- ; Optimized for the overlapping case
55+ ; Optimized for when src = dst
6056_memmove:
61- ; no overlap | LDIR | 34F + 12R + 2
62- ; src > dst | LDIR | 34F + 12R + 2
63- ; src < dst | LDDR | 36F + 9R + 2
64- ; src = dst | | 30F + 9R + 2
65- ; zero size | | 24F + 9R + 2
57+ ; src > dst | LDIR | 34F + 12R + 2
58+ ; src < dst | LDDR | 36F + 9R + 2
59+ ; src = dst | | 30F + 9R + 2
60+ ; zero size | | 24F + 9R + 2
6661
6762 ld iy, -1
6863 add iy, sp
0 commit comments