Skip to content

Commit f7b965f

Browse files
committed
Revert "Dont have loop < 4"
This reverts commit 77c4617.
1 parent 77c4617 commit f7b965f

File tree

1 file changed

+15
-56
lines changed

1 file changed

+15
-56
lines changed

src/day25.rs

Lines changed: 15 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
6666
"je 2f", // Jump on empty
6767
"mov {i}, {max_i}",
6868
"cmp {i}, 16",
69-
"jb 7f", // Jump to < 16 case
69+
"jb 6f", // Jump to < 16 case
7070

7171
"4:",
7272
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
@@ -96,47 +96,26 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
9696
"add {sum},{t}",
9797
"cmp {i}, 16",
9898
"jae 4b", // Loop
99-
"7:",
100-
// i < 16
99+
"6:",
101100
"cmp {i}, 4",
102101
"jb 3f", // Is < 4
103-
"cmp {i}, 8",
104-
"jb 5f", // Is < 8
105-
"cmp {i}, 12",
106-
"jb 6f", // Is < 8
107-
108-
// i < 16
109-
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
110-
"add {i}, -4",
111-
"vpand {vt}, {vt}, {msb}",
112-
"vpcmpeqq {vt}, {vt}, {zero}",
113-
"vpmovmskb {t}, {vt}",
114-
"popcnt {t}, {t}",
115-
"add {sum},{t}",
116-
117-
"6:",
118-
// i < 12
119-
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
120-
"add {i}, -4",
121-
"vpand {vt}, {vt}, {msb}",
122-
"vpcmpeqq {vt}, {vt}, {zero}",
123-
"vpmovmskb {t}, {vt}",
124-
"popcnt {t}, {t}",
125-
"add {sum},{t}",
102+
// Is >= 4 and < 16
126103

127104
"5:",
128-
// i < 8
129105
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
130106
"add {i}, -4",
131107
"vpand {vt}, {vt}, {msb}",
132108
"vpcmpeqq {vt}, {vt}, {zero}",
133109
"vpmovmskb {t}, {vt}",
134110
"popcnt {t}, {t}",
135111
"add {sum},{t}",
136-
112+
"cmp {i}, 4",
113+
"jae 5b", // Loop
137114
"3:",
138115
"test {i}, {i}",
139116
"je 2f", // Is zero
117+
118+
// Is > 0 and < 4
140119
"vpaddq {vt}, {d}, ymmword ptr [{os}]",
141120
"vpand {vt}, {vt}, {msb}",
142121
"vpcmpeqq {vt}, {vt}, {zero}",
@@ -161,11 +140,12 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
161140
keys_i += 1;
162141
} else {
163142
std::arch::asm!(
143+
164144
"test {max_i}, {max_i}",
165145
"je 2f", // Jump on empty
166146
"mov {i}, {max_i}",
167147
"cmp {i}, 16",
168-
"jb 7f", // Jump to < 16 case
148+
"jb 6f", // Jump to < 16 case
169149

170150
"4:",
171151
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
@@ -195,47 +175,26 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
195175
"add {sum},{t}",
196176
"cmp {i}, 16",
197177
"jae 4b", // Loop
198-
"7:",
199-
// i < 16
178+
"6:",
200179
"cmp {i}, 4",
201180
"jb 3f", // Is < 4
202-
"cmp {i}, 8",
203-
"jb 5f", // Is < 8
204-
"cmp {i}, 12",
205-
"jb 6f", // Is < 8
206-
207-
// i < 16
208-
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
209-
"add {i}, -4",
210-
"vpand {vt}, {vt}, {msb}",
211-
"vpcmpeqq {vt}, {vt}, {zero}",
212-
"vpmovmskb {t}, {vt}",
213-
"popcnt {t}, {t}",
214-
"add {sum},{t}",
215-
216-
"6:",
217-
// i < 12
218-
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
219-
"add {i}, -4",
220-
"vpand {vt}, {vt}, {msb}",
221-
"vpcmpeqq {vt}, {vt}, {zero}",
222-
"vpmovmskb {t}, {vt}",
223-
"popcnt {t}, {t}",
224-
"add {sum},{t}",
181+
// Is >= 4 and < 16
225182

226183
"5:",
227-
// i < 8
228184
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
229185
"add {i}, -4",
230186
"vpand {vt}, {vt}, {msb}",
231187
"vpcmpeqq {vt}, {vt}, {zero}",
232188
"vpmovmskb {t}, {vt}",
233189
"popcnt {t}, {t}",
234190
"add {sum},{t}",
235-
191+
"cmp {i}, 4",
192+
"jae 5b", // Loop
236193
"3:",
237194
"test {i}, {i}",
238195
"je 2f", // Is zero
196+
197+
// Is > 0 and < 4
239198
"vpaddq {vt}, {d}, ymmword ptr [{os}]",
240199
"vpand {vt}, {vt}, {msb}",
241200
"vpcmpeqq {vt}, {vt}, {zero}",

0 commit comments

Comments
 (0)