Skip to content

Commit 91bd74c

Browse files
committed
Better instruction layout?
1 parent 4790430 commit 91bd74c

File tree

1 file changed

+17
-16
lines changed

1 file changed

+17
-16
lines changed

src/day25.rs

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -72,27 +72,27 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
7272
"jb 6f", // Jump to < 16 case
7373

7474
"4:",
75+
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
7576
"add {i}, -16",
76-
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*3]",
7777
"vpand {vt}, {vt}, {msb}",
7878
"vpcmpeqq {vt}, {vt}, {zero}",
7979
"vpmovmskb {t}, {vt}",
80-
"popcnt {t}, {t}",
81-
"add {sum},{t}",
8280
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*2]",
81+
"popcnt {t}, {t}",
8382
"vpand {vt}, {vt}, {msb}",
83+
"add {sum},{t}",
8484
"vpcmpeqq {vt}, {vt}, {zero}",
8585
"vpmovmskb {t}, {vt}",
86-
"popcnt {t}, {t}",
87-
"add {sum},{t}",
8886
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*1]",
87+
"popcnt {t}, {t}",
8988
"vpand {vt}, {vt}, {msb}",
89+
"add {sum},{t}",
9090
"vpcmpeqq {vt}, {vt}, {zero}",
9191
"vpmovmskb {t}, {vt}",
92-
"popcnt {t}, {t}",
93-
"add {sum},{t}",
9492
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*0]",
93+
"popcnt {t}, {t}",
9594
"vpand {vt}, {vt}, {msb}",
95+
"add {sum},{t}",
9696
"vpcmpeqq {vt}, {vt}, {zero}",
9797
"vpmovmskb {t}, {vt}",
9898
"popcnt {t}, {t}",
@@ -105,8 +105,8 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
105105
// Is >= 4 and < 16
106106

107107
"5:",
108+
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
108109
"add {i}, -4",
109-
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i}]",
110110
"vpand {vt}, {vt}, {msb}",
111111
"vpcmpeqq {vt}, {vt}, {zero}",
112112
"vpmovmskb {t}, {vt}",
@@ -143,34 +143,35 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
143143
keys_i += 1;
144144
} else {
145145
std::arch::asm!(
146+
146147
"test {max_i}, {max_i}",
147148
"je 2f", // Jump on empty
148149
"mov {i}, {max_i}",
149150
"cmp {i}, 16",
150151
"jb 6f", // Jump to < 16 case
151152

152153
"4:",
154+
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
153155
"add {i}, -16",
154-
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*3]",
155156
"vpand {vt}, {vt}, {msb}",
156157
"vpcmpeqq {vt}, {vt}, {zero}",
157158
"vpmovmskb {t}, {vt}",
158-
"popcnt {t}, {t}",
159-
"add {sum},{t}",
160159
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*2]",
160+
"popcnt {t}, {t}",
161161
"vpand {vt}, {vt}, {msb}",
162+
"add {sum},{t}",
162163
"vpcmpeqq {vt}, {vt}, {zero}",
163164
"vpmovmskb {t}, {vt}",
164-
"popcnt {t}, {t}",
165-
"add {sum},{t}",
166165
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*1]",
166+
"popcnt {t}, {t}",
167167
"vpand {vt}, {vt}, {msb}",
168+
"add {sum},{t}",
168169
"vpcmpeqq {vt}, {vt}, {zero}",
169170
"vpmovmskb {t}, {vt}",
170-
"popcnt {t}, {t}",
171-
"add {sum},{t}",
172171
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*0]",
172+
"popcnt {t}, {t}",
173173
"vpand {vt}, {vt}, {msb}",
174+
"add {sum},{t}",
174175
"vpcmpeqq {vt}, {vt}, {zero}",
175176
"vpmovmskb {t}, {vt}",
176177
"popcnt {t}, {t}",
@@ -183,8 +184,8 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
183184
// Is >= 4 and < 16
184185

185186
"5:",
187+
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} - 32]",
186188
"add {i}, -4",
187-
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i}]",
188189
"vpand {vt}, {vt}, {msb}",
189190
"vpcmpeqq {vt}, {vt}, {zero}",
190191
"vpmovmskb {t}, {vt}",

0 commit comments

Comments
 (0)