File tree Expand file tree Collapse file tree 1 file changed +106
-93
lines changed Expand file tree Collapse file tree 1 file changed +106
-93
lines changed Original file line number Diff line number Diff line change 66
77; BC:UDE:UHL = ((uint128_t)BC:UDE:UHL * (uint128_t)(SP64)) >> 64
88__llmulhu:
9+ ; modified version of __llmulu that uses exx to obtain the upper 64 bits of the result.
10+ ; __llmulhu runs slightly faster than two calls to __llmulu, and is much faster
11+ ; than the naive implementation of __llmulhu that calls __llmulu four times.
12+ push af
13+ ld a, i
14+ di
15+ push af
16+
917 push ix
1018 push iy
11- ld ix, -36
12- add ix, sp
13- ld sp, ix
14- lea ix, ix + 36
15-
16- ld (ix - 3), bc
17- ld (ix - 6), de
18- ld (ix - 9), hl
19-
20- ld bc, 0
21- ld (ix - 10), b
22- ld (ix - 13), bc
23- ld (ix - 30), bc
24- ld c, (ix + 12)
25- ld (ix - 33), bc
26- ld iy, (ix + 9)
27- ld (ix - 36), iy
28-
29- ; x_lo * y_lo
30- ld c, b
31- ld d, b
32- inc de
33- dec.s de
34- call __llmulu
35- inc bc
36- dec.s bc
37- ld (ix - 16), bc
38- ld (ix - 19), de
39- ld b, 0
40- ld c, b
41-
42- ; x_hi * y_lo
43- inc.s de
44- ld d, b
45- ld e, (ix - 2)
46- ld hl, (ix - 5)
47- call __llmulu
48- inc bc
49- dec.s bc
50- ld (ix - 21), bc
51- ld (ix - 24), de
52- ld (ix - 27), hl
53-
54- ld c, (ix + 16)
55- ld (ix - 33), c
56- ld iy, (ix + 13)
57- ld (ix - 36), iy
58-
59- ; x_lo * y_hi
60- ld b, 0
61- ld c, b
62- inc.s de
63- ld d, b
64- ld e, (ix - 6)
65- ld hl, (ix - 9)
66- call __llmulu
67- inc bc
68- dec.s bc
69- lea iy, ix - 27
70- call __llmulhu_i72add
71- lea iy, ix - 18
72- call __llmulhu_i72add
73- ld (ix - 16), bc
74- ld (ix - 19), de
75- ld bc, 0
76-
77- ; x_hi * y_hi
78- inc.s de
79- ld d, b
80- ld e, (ix - 2)
81- ld hl, (ix - 5)
82- call __llmulu
83- inc bc
84- dec.s bc
85- lea iy, ix - 18
86- call __llmulhu_i72add
87- ld sp, ix
88- pop iy
89- pop ix
90- ret
9119
92- __llmulhu_i72add:
93- ; similar to __lladd, except iy points to the stack and is destroyed
94- push bc
95- ld bc, (iy + 0)
96- add hl, bc
20+ ld ix, 0
21+ lea iy, ix - 6
22+ add iy, sp ; cf=1
23+
24+ push de
25+ push hl
26+ ld l, c
27+ ld h, b
28+ ld.s sp, hl
29+
30+ lea hl, iy + 21
31+ ld b, 8
32+ .push_loop:
33+ push af
34+ ld a, (hl)
35+ inc hl
36+ or a, a ; cf=0
37+ djnz .push_loop
38+
39+ sbc hl, hl
40+ ld e, l
41+ ld d, h
42+
43+ exx
44+ sbc hl, hl
45+ ex de, hl
46+ sbc hl, hl
47+ ld c, l
48+ ld b, l
49+ exx
50+
51+ .byte_loop:
52+ scf
53+ adc a, a
54+
55+ .bit_loop:
56+ ex af, af'
57+
58+ add ix, ix
59+ adc hl, hl
9760 ex de, hl
61+ adc.s hl, hl
62+ ex de, hl
63+
64+ exx
65+ adc hl, hl
66+ ex de, hl
67+ adc hl, hl
68+ ex de, hl
69+ rl c
70+ rl b
71+ exx
72+
73+ ex af, af'
74+
75+ jr nc, .add_end
76+ ld bc, (iy)
77+ add ix, bc
9878 ld bc, (iy + 3)
9979 adc hl, bc
10080 ex de, hl
101- pop bc
102- jr nc, .no_carry48
81+ adc.s hl, sp
82+ ex de, hl
83+ jr nc, .add_end
84+ exx
85+ inc hl
86+ add hl, de
87+ or a, a
88+ sbc hl, de
89+ jr nz, .add_end_exx
90+ inc de
91+ sbc hl, de
92+ add hl, de
93+ jr nz, .add_end_exx
10394 inc bc
104- .no_carry48:
105- ld iy, (iy + 6)
106- add iy, bc
107- lea bc, iy
108- ret
95+ .add_end_exx:
96+ exx
97+ .add_end:
98+
99+ add a, a
100+ jr nz, .bit_loop
101+
102+ pop af
103+ jr nc, .byte_loop
104+
105+ ; ld b, d
106+ ; ld c, e
107+ ; ex de, hl
108+ ; lea hl, ix
109+ ; BC:UDE:UHL = lower 64 bits
110+ ; shadow BC:UDE:UHL = upper 64 bits
111+ exx
109112
110- extern __llmulu
113+ pop af ; reset SP
114+ pop af ; reset SP
115+ pop iy
116+ pop ix
117+
118+ pop af
119+ jp po, .skipEI
120+ ei
121+ .skipEI:
122+ pop af
123+ ret
You can’t perform that action at this time.
0 commit comments