@@ -74,6 +74,7 @@ pub const Feature = enum {
74
74
scalarize_int_from_float ,
75
75
scalarize_int_from_float_optimized ,
76
76
scalarize_float_from_int ,
77
+ scalarize_select ,
77
78
scalarize_mul_add ,
78
79
79
80
/// Legalize (shift lhs, (splat rhs)) -> (shift lhs, rhs)
@@ -167,6 +168,7 @@ pub const Feature = enum {
167
168
.int_from_float = > .scalarize_int_from_float ,
168
169
.int_from_float_optimized = > .scalarize_int_from_float_optimized ,
169
170
.float_from_int = > .scalarize_float_from_int ,
171
+ .select = > .scalarize_select ,
170
172
.mul_add = > .scalarize_mul_add ,
171
173
};
172
174
}
@@ -520,7 +522,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
520
522
},
521
523
.splat ,
522
524
.shuffle ,
525
+ = > {},
523
526
.select ,
527
+ = > if (l .features .contains (.scalarize_select )) continue :inst try l .scalarize (inst , .select_pl_op_bin ),
524
528
.memset ,
525
529
.memset_safe ,
526
530
.memcpy ,
@@ -568,7 +572,7 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
568
572
}
569
573
}
570
574
571
- const ScalarizeDataTag = enum { un_op , ty_op , bin_op , ty_pl_vector_cmp , pl_op_bin };
575
+ const ScalarizeDataTag = enum { un_op , ty_op , bin_op , ty_pl_vector_cmp , pl_op_bin , select_pl_op_bin };
572
576
inline fn scalarize (l : * Legalize , orig_inst : Air.Inst.Index , comptime data_tag : ScalarizeDataTag ) Error ! Air.Inst.Tag {
573
577
return l .replaceInst (orig_inst , .block , try l .scalarizeBlockPayload (orig_inst , data_tag ));
574
578
}
@@ -584,6 +588,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_
584
588
.un_op , .ty_op = > 1 ,
585
589
.bin_op , .ty_pl_vector_cmp = > 2 ,
586
590
.pl_op_bin = > 3 ,
591
+ .select_pl_op_bin = > 6 ,
587
592
} + 9
588
593
]Air .Inst .Index = undefined ;
589
594
try l .air_instructions .ensureUnusedCapacity (zcu .gpa , inst_buf .len );
@@ -722,23 +727,67 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_
722
727
} },
723
728
});
724
729
},
730
+ .select_pl_op_bin = > {
731
+ const extra = l .extraData (Air .Bin , orig .data .pl_op .payload ).data ;
732
+ var res_elem : Result = .init (l , l .typeOf (extra .lhs ).scalarType (zcu ), & loop .block );
733
+ res_elem .block = .init (loop .block .stealCapacity (6 ));
734
+ {
735
+ var select_cond_br : CondBr = .init (l , res_elem .block .add (l , .{
736
+ .tag = .array_elem_val ,
737
+ .data = .{ .bin_op = .{
738
+ .lhs = orig .data .pl_op .operand ,
739
+ .rhs = cur_index_inst .toRef (),
740
+ } },
741
+ }).toRef (), & res_elem .block , .{});
742
+ select_cond_br .then_block = .init (res_elem .block .stealRemainingCapacity ());
743
+ {
744
+ _ = select_cond_br .then_block .add (l , .{
745
+ .tag = .br ,
746
+ .data = .{ .br = .{
747
+ .block_inst = res_elem .inst ,
748
+ .operand = select_cond_br .then_block .add (l , .{
749
+ .tag = .array_elem_val ,
750
+ .data = .{ .bin_op = .{
751
+ .lhs = extra .lhs ,
752
+ .rhs = cur_index_inst .toRef (),
753
+ } },
754
+ }).toRef (),
755
+ } },
756
+ });
757
+ }
758
+ select_cond_br .else_block = .init (select_cond_br .then_block .stealRemainingCapacity ());
759
+ {
760
+ _ = select_cond_br .else_block .add (l , .{
761
+ .tag = .br ,
762
+ .data = .{ .br = .{
763
+ .block_inst = res_elem .inst ,
764
+ .operand = select_cond_br .else_block .add (l , .{
765
+ .tag = .array_elem_val ,
766
+ .data = .{ .bin_op = .{
767
+ .lhs = extra .rhs ,
768
+ .rhs = cur_index_inst .toRef (),
769
+ } },
770
+ }).toRef (),
771
+ } },
772
+ });
773
+ }
774
+ try select_cond_br .finish (l );
775
+ }
776
+ try res_elem .finish (l );
777
+ break :res_elem res_elem .inst ;
778
+ },
725
779
}.toRef (),
726
780
}),
727
781
} },
728
782
});
729
783
730
- var loop_cond_br : CondBr = .init (
784
+ var loop_cond_br : CondBr = .init (l , ( try loop . block . addCmp (
731
785
l ,
732
- (try loop .block .addCmp (
733
- l ,
734
- .lt ,
735
- cur_index_inst .toRef (),
736
- try pt .intRef (.usize , res_ty .vectorLen (zcu ) - 1 ),
737
- .{},
738
- )).toRef (),
739
- & loop .block ,
786
+ .lt ,
787
+ cur_index_inst .toRef (),
788
+ try pt .intRef (.usize , res_ty .vectorLen (zcu ) - 1 ),
740
789
.{},
741
- );
790
+ )). toRef (), & loop . block , .{}) ;
742
791
loop_cond_br .then_block = .init (loop .block .stealRemainingCapacity ());
743
792
{
744
793
_ = loop_cond_br .then_block .add (l , .{
@@ -1138,9 +1187,21 @@ const Block = struct {
1138
1187
/// This is useful when you've provided a buffer big enough for all your instructions, but you are
1139
1188
/// now starting a new block and some of them need to live there instead.
1140
1189
fn stealRemainingCapacity (b : * Block ) []Air.Inst.Index {
1141
- const remaining = b .instructions [b .len .. ];
1142
- b .instructions = b .instructions [0.. b .len ];
1143
- return remaining ;
1190
+ return b .stealFrom (b .len );
1191
+ }
1192
+
1193
+ /// Returns `len` elements taken from the unused capacity of `b.instructions`, and shrinks
1194
+ /// `b.instructions` down to not include them anymore.
1195
+ /// This is useful when you've provided a buffer big enough for all your instructions, but you are
1196
+ /// now starting a new block and some of them need to live there instead.
1197
+ fn stealCapacity (b : * Block , len : usize ) []Air.Inst.Index {
1198
+ return b .stealFrom (b .instructions .len - len );
1199
+ }
1200
+
1201
+ fn stealFrom (b : * Block , start : usize ) []Air.Inst.Index {
1202
+ assert (start >= b .len );
1203
+ defer b .instructions .len = start ;
1204
+ return b .instructions [start .. ];
1144
1205
}
1145
1206
1146
1207
fn body (b : * const Block ) []const Air.Inst.Index {
@@ -1149,6 +1210,31 @@ const Block = struct {
1149
1210
}
1150
1211
};
1151
1212
1213
+ const Result = struct {
1214
+ inst : Air.Inst.Index ,
1215
+ block : Block ,
1216
+
1217
+ /// The return value has `block` initialized to `undefined`; it is the caller's reponsibility
1218
+ /// to initialize it.
1219
+ fn init (l : * Legalize , ty : Type , parent_block : * Block ) Result {
1220
+ return .{
1221
+ .inst = parent_block .add (l , .{
1222
+ .tag = .block ,
1223
+ .data = .{ .ty_pl = .{
1224
+ .ty = Air .internedToRef (ty .toIntern ()),
1225
+ .payload = undefined ,
1226
+ } },
1227
+ }),
1228
+ .block = undefined ,
1229
+ };
1230
+ }
1231
+
1232
+ fn finish (res : Result , l : * Legalize ) Error ! void {
1233
+ const data = & l .air_instructions .items (.data )[@intFromEnum (res .inst )];
1234
+ data .ty_pl .payload = try l .addBlockBody (res .block .body ());
1235
+ }
1236
+ };
1237
+
1152
1238
const Loop = struct {
1153
1239
inst : Air.Inst.Index ,
1154
1240
block : Block ,
0 commit comments