Skip to content

Commit 77c4617

Browse files
committed
Dont have loop < 4
1 parent 57fa1d2 commit 77c4617

File tree

1 file changed

+56
-15
lines changed

1 file changed

+56
-15
lines changed

src/day25.rs

Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
6666
"je 2f", // Jump on empty
6767
"mov {i}, {max_i}",
6868
"cmp {i}, 16",
69-
"jb 6f", // Jump to < 16 case
69+
"jb 7f", // Jump to < 16 case
7070

7171
"4:",
7272
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
@@ -96,26 +96,47 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
9696
"add {sum},{t}",
9797
"cmp {i}, 16",
9898
"jae 4b", // Loop
99-
"6:",
99+
"7:",
100+
// i < 16
100101
"cmp {i}, 4",
101102
"jb 3f", // Is < 4
102-
// Is >= 4 and < 16
103+
"cmp {i}, 8",
104+
"jb 5f", // Is < 8
105+
"cmp {i}, 12",
106+
"jb 6f", // Is < 8
107+
108+
// i < 16
109+
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
110+
"add {i}, -4",
111+
"vpand {vt}, {vt}, {msb}",
112+
"vpcmpeqq {vt}, {vt}, {zero}",
113+
"vpmovmskb {t}, {vt}",
114+
"popcnt {t}, {t}",
115+
"add {sum},{t}",
116+
117+
"6:",
118+
// i < 12
119+
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
120+
"add {i}, -4",
121+
"vpand {vt}, {vt}, {msb}",
122+
"vpcmpeqq {vt}, {vt}, {zero}",
123+
"vpmovmskb {t}, {vt}",
124+
"popcnt {t}, {t}",
125+
"add {sum},{t}",
103126

104127
"5:",
128+
// i < 8
105129
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
106130
"add {i}, -4",
107131
"vpand {vt}, {vt}, {msb}",
108132
"vpcmpeqq {vt}, {vt}, {zero}",
109133
"vpmovmskb {t}, {vt}",
110134
"popcnt {t}, {t}",
111135
"add {sum},{t}",
112-
"cmp {i}, 4",
113-
"jae 5b", // Loop
136+
114137
"3:",
115138
"test {i}, {i}",
116139
"je 2f", // Is zero
117-
118-
// Is > 0 and < 4
119140
"vpaddq {vt}, {d}, ymmword ptr [{os}]",
120141
"vpand {vt}, {vt}, {msb}",
121142
"vpcmpeqq {vt}, {vt}, {zero}",
@@ -140,12 +161,11 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
140161
keys_i += 1;
141162
} else {
142163
std::arch::asm!(
143-
144164
"test {max_i}, {max_i}",
145165
"je 2f", // Jump on empty
146166
"mov {i}, {max_i}",
147167
"cmp {i}, 16",
148-
"jb 6f", // Jump to < 16 case
168+
"jb 7f", // Jump to < 16 case
149169

150170
"4:",
151171
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
@@ -175,26 +195,47 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
175195
"add {sum},{t}",
176196
"cmp {i}, 16",
177197
"jae 4b", // Loop
178-
"6:",
198+
"7:",
199+
// i < 16
179200
"cmp {i}, 4",
180201
"jb 3f", // Is < 4
181-
// Is >= 4 and < 16
202+
"cmp {i}, 8",
203+
"jb 5f", // Is < 8
204+
"cmp {i}, 12",
205+
"jb 6f", // Is < 8
206+
207+
// i < 16
208+
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
209+
"add {i}, -4",
210+
"vpand {vt}, {vt}, {msb}",
211+
"vpcmpeqq {vt}, {vt}, {zero}",
212+
"vpmovmskb {t}, {vt}",
213+
"popcnt {t}, {t}",
214+
"add {sum},{t}",
215+
216+
"6:",
217+
// i < 12
218+
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
219+
"add {i}, -4",
220+
"vpand {vt}, {vt}, {msb}",
221+
"vpcmpeqq {vt}, {vt}, {zero}",
222+
"vpmovmskb {t}, {vt}",
223+
"popcnt {t}, {t}",
224+
"add {sum},{t}",
182225

183226
"5:",
227+
// i < 8
184228
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
185229
"add {i}, -4",
186230
"vpand {vt}, {vt}, {msb}",
187231
"vpcmpeqq {vt}, {vt}, {zero}",
188232
"vpmovmskb {t}, {vt}",
189233
"popcnt {t}, {t}",
190234
"add {sum},{t}",
191-
"cmp {i}, 4",
192-
"jae 5b", // Loop
235+
193236
"3:",
194237
"test {i}, {i}",
195238
"je 2f", // Is zero
196-
197-
// Is > 0 and < 4
198239
"vpaddq {vt}, {d}, ymmword ptr [{os}]",
199240
"vpand {vt}, {vt}, {msb}",
200241
"vpcmpeqq {vt}, {vt}, {zero}",

0 commit comments

Comments
 (0)