@@ -875,3 +875,115 @@ if.end:
875875 %exitcond.not = icmp eq i64 %indvars.iv.next , %wide.trip.count
876876 br i1 %exitcond.not , label %for.cond.cleanup , label %for.body
877877}
878+
879+ declare i64 @payload (i64 , ptr , ptr , i64 )
880+
881+ define void @outer_latch_heuristic (ptr %dst , ptr %src , i64 %p , i64 %dim ) {
882+ ; CHECKOO-LABEL: @outer_latch_heuristic(
883+ ; CHECKOO-NEXT: entry:
884+ ; CHECKOO-NEXT: br label [[OUTER_LOOP:%.*]]
885+ ; CHECKOO: outer.loop:
886+ ; CHECKOO-NEXT: [[K_020_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[SELECT_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
887+ ; CHECKOO-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[SELECT_END]] ], [ 0, [[ENTRY]] ]
888+ ; CHECKOO-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[SELECT_END]] ], [ 0, [[ENTRY]] ]
889+ ; CHECKOO-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
890+ ; CHECKOO-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX_US]], align 8
891+ ; CHECKOO-NEXT: [[ARRAYIDX1_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
892+ ; CHECKOO-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX1_US]], align 8
893+ ; CHECKOO-NEXT: br label [[INNER_LOOP:%.*]]
894+ ; CHECKOO: inner.loop:
895+ ; CHECKOO-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[DIM:%.*]], [[OUTER_LOOP]] ], [ [[LSR_IV_NEXT:%.*]], [[INNER_LOOP]] ]
896+ ; CHECKOO-NEXT: [[DIFF_04_I_US:%.*]] = phi i64 [ [[CALL_I_US:%.*]], [[INNER_LOOP]] ], [ 0, [[OUTER_LOOP]] ]
897+ ; CHECKOO-NEXT: [[CALL_I_US]] = tail call i64 @payload(i64 [[DIFF_04_I_US]], ptr [[TMP0]], ptr [[TMP1]], i64 [[P:%.*]])
898+ ; CHECKOO-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
899+ ; CHECKOO-NEXT: [[EXITCOND_NOT_I_US:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
900+ ; CHECKOO-NEXT: br i1 [[EXITCOND_NOT_I_US]], label [[SELECT_END]], label [[INNER_LOOP]]
901+ ; CHECKOO: latch:
902+ ; CHECKOO-NEXT: [[CMP2_US:%.*]] = icmp sgt i64 [[CALL_I_US]], -1
903+ ; CHECKOO-NEXT: [[DIFF_0_LCSSA_I_LOBIT_US:%.*]] = lshr i64 [[CALL_I_US]], 63
904+ ; CHECKOO-NEXT: [[I_NEXT]] = add nsw i64 [[DIFF_0_LCSSA_I_LOBIT_US]], [[I]]
905+ ; CHECKOO-NEXT: [[INC4_US:%.*]] = zext i1 [[CMP2_US]] to i64
906+ ; CHECKOO-NEXT: [[J_NEXT]] = add nsw i64 [[J]], [[INC4_US]]
907+ ; CHECKOO-NEXT: [[COND_IN_US:%.*]] = select i1 [[CMP2_US]], ptr [[ARRAYIDX1_US]], ptr [[ARRAYIDX_US]]
908+ ; CHECKOO-NEXT: [[COND_US:%.*]] = load ptr, ptr [[COND_IN_US]], align 8
909+ ; CHECKOO-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[K_020_US]]
910+ ; CHECKOO-NEXT: store ptr [[COND_US]], ptr [[ARRAYIDX6_US]], align 8
911+ ; CHECKOO-NEXT: [[INC7_US]] = add i64 [[K_020_US]], 1
912+ ; CHECKOO-NEXT: [[EXITCOND23_NOT:%.*]] = icmp eq i64 [[K_020_US]], 1000
913+ ; CHECKOO-NEXT: br i1 [[EXITCOND23_NOT]], label [[EXIT:%.*]], label [[OUTER_LOOP]]
914+ ; CHECKOO: exit:
915+ ; CHECKOO-NEXT: ret void
916+ ;
917+ ; CHECKII-LABEL: @outer_latch_heuristic(
918+ ; CHECKII-NEXT: entry:
919+ ; CHECKII-NEXT: br label [[OUTER_LOOP:%.*]]
920+ ; CHECKII: outer.loop:
921+ ; CHECKII-NEXT: [[K_020_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
922+ ; CHECKII-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
923+ ; CHECKII-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
924+ ; CHECKII-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
925+ ; CHECKII-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX_US]], align 8
926+ ; CHECKII-NEXT: [[ARRAYIDX1_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
927+ ; CHECKII-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX1_US]], align 8
928+ ; CHECKII-NEXT: br label [[INNER_LOOP:%.*]]
929+ ; CHECKII: inner.loop:
930+ ; CHECKII-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[DIM:%.*]], [[OUTER_LOOP]] ], [ [[LSR_IV_NEXT:%.*]], [[INNER_LOOP]] ]
931+ ; CHECKII-NEXT: [[DIFF_04_I_US:%.*]] = phi i64 [ [[CALL_I_US:%.*]], [[INNER_LOOP]] ], [ 0, [[OUTER_LOOP]] ]
932+ ; CHECKII-NEXT: [[CALL_I_US]] = tail call i64 @payload(i64 [[DIFF_04_I_US]], ptr [[TMP0]], ptr [[TMP1]], i64 [[P:%.*]])
933+ ; CHECKII-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
934+ ; CHECKII-NEXT: [[EXITCOND_NOT_I_US:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
935+ ; CHECKII-NEXT: br i1 [[EXITCOND_NOT_I_US]], label [[LATCH]], label [[INNER_LOOP]]
936+ ; CHECKII: latch:
937+ ; CHECKII-NEXT: [[CMP2_US:%.*]] = icmp sgt i64 [[CALL_I_US]], -1
938+ ; CHECKII-NEXT: [[DIFF_0_LCSSA_I_LOBIT_US:%.*]] = lshr i64 [[CALL_I_US]], 63
939+ ; CHECKII-NEXT: [[I_NEXT]] = add nsw i64 [[DIFF_0_LCSSA_I_LOBIT_US]], [[I]]
940+ ; CHECKII-NEXT: [[INC4_US:%.*]] = zext i1 [[CMP2_US]] to i64
941+ ; CHECKII-NEXT: [[J_NEXT]] = add nsw i64 [[J]], [[INC4_US]]
942+ ; CHECKII-NEXT: [[COND_IN_US:%.*]] = select i1 [[CMP2_US]], ptr [[ARRAYIDX1_US]], ptr [[ARRAYIDX_US]]
943+ ; CHECKII-NEXT: [[COND_US:%.*]] = load ptr, ptr [[COND_IN_US]], align 8
944+ ; CHECKII-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[K_020_US]]
945+ ; CHECKII-NEXT: store ptr [[COND_US]], ptr [[ARRAYIDX6_US]], align 8
946+ ; CHECKII-NEXT: [[INC7_US]] = add i64 [[K_020_US]], 1
947+ ; CHECKII-NEXT: [[EXITCOND23_NOT:%.*]] = icmp eq i64 [[K_020_US]], 1000
948+ ; CHECKII-NEXT: br i1 [[EXITCOND23_NOT]], label [[EXIT:%.*]], label [[OUTER_LOOP]]
949+ ; CHECKII: exit:
950+ ; CHECKII-NEXT: ret void
951+ ;
952+ entry:
953+ br label %outer.loop
954+
955+ outer.loop:
956+ %k.020.us = phi i64 [ %inc7.us , %latch ], [ 0 , %entry ]
957+ %j = phi i64 [ %j.next , %latch ], [ 0 , %entry ]
958+ %i = phi i64 [ %i.next , %latch ], [ 0 , %entry ]
959+ %arrayidx.us = getelementptr inbounds ptr , ptr %src , i64 %i
960+ %4 = load ptr , ptr %arrayidx.us , align 8
961+ %arrayidx1.us = getelementptr inbounds ptr , ptr %src , i64 %j
962+ %5 = load ptr , ptr %arrayidx1.us , align 8
963+ br label %inner.loop
964+
965+ inner.loop:
966+ %lsr.iv = phi i64 [ %dim , %outer.loop ], [ %lsr.iv.next , %inner.loop ]
967+ %diff.04.i.us = phi i64 [ %call.i.us , %inner.loop ], [ 0 , %outer.loop ]
968+ %call.i.us = tail call i64 @payload (i64 %diff.04.i.us , ptr %4 , ptr %5 , i64 %p )
969+ %lsr.iv.next = add i64 %lsr.iv , -1
970+ %exitcond.not.i.us = icmp eq i64 %lsr.iv.next , 0
971+ br i1 %exitcond.not.i.us , label %latch , label %inner.loop
972+
973+ latch:
974+ %cmp2.us = icmp sgt i64 %call.i.us , -1
975+ %diff.0.lcssa.i.lobit.us = lshr i64 %call.i.us , 63
976+ %i.next = add nsw i64 %diff.0.lcssa.i.lobit.us , %i
977+ %inc4.us = zext i1 %cmp2.us to i64
978+ %j.next = add nsw i64 %j , %inc4.us
979+ %cond.in.us = select i1 %cmp2.us , ptr %arrayidx1.us , ptr %arrayidx.us
980+ %cond.us = load ptr , ptr %cond.in.us , align 8
981+ %arrayidx6.us = getelementptr inbounds ptr , ptr %dst , i64 %k.020.us
982+ store ptr %cond.us , ptr %arrayidx6.us , align 8
983+ %inc7.us = add i64 %k.020.us , 1
984+ %exitcond23.not = icmp eq i64 %k.020.us , 1000
985+ br i1 %exitcond23.not , label %exit , label %outer.loop
986+
987+ exit:
988+ ret void
989+ }
0 commit comments