@@ -21,74 +21,39 @@ fn square(x: &f32) -> f32 {
2121 x * x
2222}
2323
24- // d_square2
25- // CHECK: define internal fastcc [4 x float] @fwddiffe4square(float %x.0.val, [4 x ptr] %"x'")
26- // CHECK-NEXT: start:
27- // CHECK-NEXT: %0 = extractvalue [4 x ptr] %"x'", 0
28- // CHECK-NEXT: %"_2'ipl" = load float, ptr %0, align 4
29- // CHECK-NEXT: %1 = extractvalue [4 x ptr] %"x'", 1
30- // CHECK-NEXT: %"_2'ipl1" = load float, ptr %1, align 4
31- // CHECK-NEXT: %2 = extractvalue [4 x ptr] %"x'", 2
32- // CHECK-NEXT: %"_2'ipl2" = load float, ptr %2, align 4
33- // CHECK-NEXT: %3 = extractvalue [4 x ptr] %"x'", 3
34- // CHECK-NEXT: %"_2'ipl3" = load float, ptr %3, align 4
35- // CHECK-NEXT: %4 = fadd fast float %"_2'ipl", %"_2'ipl"
36- // CHECK-NEXT: %5 = fmul fast float %4, %x.0.val
37- // CHECK-NEXT: %6 = insertvalue [4 x float] undef, float %5, 0
38- // CHECK-NEXT: %7 = fadd fast float %"_2'ipl1", %"_2'ipl1"
39- // CHECK-NEXT: %8 = fmul fast float %7, %x.0.val
40- // CHECK-NEXT: %9 = insertvalue [4 x float] %6, float %8, 1
41- // CHECK-NEXT: %10 = fadd fast float %"_2'ipl2", %"_2'ipl2"
42- // CHECK-NEXT: %11 = fmul fast float %10, %x.0.val
43- // CHECK-NEXT: %12 = insertvalue [4 x float] %9, float %11, 2
44- // CHECK-NEXT: %13 = fadd fast float %"_2'ipl3", %"_2'ipl3"
45- // CHECK-NEXT: %14 = fmul fast float %13, %x.0.val
46- // CHECK-NEXT: %15 = insertvalue [4 x float] %12, float %14, 3
47- // CHECK-NEXT: ret [4 x float] %15
48- // CHECK-NEXT: }
49-
50- // d_square3, the extra float is the original return value (x * x)
51- // CHECK: define internal fastcc { float, [4 x float] } @fwddiffe4square.1(float %x.0.val, [4 x ptr] %"x'")
52- // CHECK-NEXT: start:
53- // CHECK-NEXT: %0 = extractvalue [4 x ptr] %"x'", 0
54- // CHECK-NEXT: %"_2'ipl" = load float, ptr %0, align 4
55- // CHECK-NEXT: %1 = extractvalue [4 x ptr] %"x'", 1
56- // CHECK-NEXT: %"_2'ipl1" = load float, ptr %1, align 4
57- // CHECK-NEXT: %2 = extractvalue [4 x ptr] %"x'", 2
58- // CHECK-NEXT: %"_2'ipl2" = load float, ptr %2, align 4
59- // CHECK-NEXT: %3 = extractvalue [4 x ptr] %"x'", 3
60- // CHECK-NEXT: %"_2'ipl3" = load float, ptr %3, align 4
61- // CHECK-NEXT: %_0 = fmul float %x.0.val, %x.0.val
62- // CHECK-NEXT: %4 = fadd fast float %"_2'ipl", %"_2'ipl"
63- // CHECK-NEXT: %5 = fmul fast float %4, %x.0.val
64- // CHECK-NEXT: %6 = insertvalue [4 x float] undef, float %5, 0
65- // CHECK-NEXT: %7 = fadd fast float %"_2'ipl1", %"_2'ipl1"
66- // CHECK-NEXT: %8 = fmul fast float %7, %x.0.val
67- // CHECK-NEXT: %9 = insertvalue [4 x float] %6, float %8, 1
68- // CHECK-NEXT: %10 = fadd fast float %"_2'ipl2", %"_2'ipl2"
69- // CHECK-NEXT: %11 = fmul fast float %10, %x.0.val
70- // CHECK-NEXT: %12 = insertvalue [4 x float] %9, float %11, 2
71- // CHECK-NEXT: %13 = fadd fast float %"_2'ipl3", %"_2'ipl3"
72- // CHECK-NEXT: %14 = fmul fast float %13, %x.0.val
73- // CHECK-NEXT: %15 = insertvalue [4 x float] %12, float %14, 3
74- // CHECK-NEXT: %16 = insertvalue { float, [4 x float] } undef, float %_0, 0
75- // CHECK-NEXT: %17 = insertvalue { float, [4 x float] } %16, [4 x float] %15, 1
76- // CHECK-NEXT: ret { float, [4 x float] } %17
77- // CHECK-NEXT: }
78-
7924fn main ( ) {
8025 let x = std:: hint:: black_box ( 3.0 ) ;
26+
27+ // square(&x)
28+ // CHECK: %_0.i = fmul float %_2.i, %_2.i
29+ // CHECK-NEXT: store float %_0.i, ptr %output, align 4
8130 let output = square ( & x) ;
8231 dbg ! ( & output) ;
8332 assert_eq ! ( 9.0 , output) ;
33+
34+ // square(&x)
35+ // CHECK: %_2.i26 = load float, ptr %x, align 4
36+ // CHECK-NEXT: %_0.i27 = fmul float %_2.i26, %_2.i26
8437 dbg ! ( square( & x) ) ;
8538
8639 let mut df_dx1 = 1.0 ;
8740 let mut df_dx2 = 2.0 ;
8841 let mut df_dx3 = 3.0 ;
8942 let mut df_dx4 = 0.0 ;
43+
44+ // [o1, o2, o3, o4] (o4 is being optimized away as its smth * 0.0)
45+ // CHECK: %x.val = load float, ptr %x, align 4
46+ // CHECK-NEXT: %13 = fmul fast float %x.val, 2.000000e+00
47+ // CHECK-NEXT: %14 = fmul fast float %x.val, 4.000000e+00
48+ // CHECK-NEXT: %15 = fmul fast float %x.val, 6.000000e+00
9049 let [ o1, o2, o3, o4] = d_square2 ( & x, & mut df_dx1, & mut df_dx2, & mut df_dx3, & mut df_dx4) ;
9150 dbg ! ( o1, o2, o3, o4) ;
51+
52+ // [output2, o1, o2, o3, o4] (o4 is being optimized away as its smth * 0.0)
53+ // CHECK: %_0.i45 = fmul float %x.val35, %x.val35
54+ // CHECK-NEXT: %40 = fmul fast float %x.val35, 2.000000e+00
55+ // CHECK-NEXT: %41 = fmul fast float %x.val35, 4.000000e+00
56+ // CHECK-NEXT: %42 = fmul fast float %x.val35, 6.000000e+00
9257 let [ output2, o1, o2, o3, o4] =
9358 d_square1 ( & x, & mut df_dx1, & mut df_dx2, & mut df_dx3, & mut df_dx4) ;
9459 dbg ! ( o1, o2, o3, o4) ;
@@ -101,8 +66,22 @@ fn main() {
10166 assert_eq ! ( 2.0 , df_dx2) ;
10267 assert_eq ! ( 3.0 , df_dx3) ;
10368 assert_eq ! ( 0.0 , df_dx4) ;
69+
70+ // d_square3(&x, &mut df_dx1)
71+ // CHECK: %x.val39 = load float, ptr %x, align 4
72+ // CHECK-NEXT: %72 = fmul fast float %x.val39, 2.000000e+00
10473 assert_eq ! ( d_square3( & x, & mut df_dx1) , 2.0 * o1) ;
74+
75+ // d_square3(&x, &mut df_dx2)
76+ // CHECK: %74 = fmul fast float %x.val39, 4.000000e+00
77+ // CHECK-NEXT: store float %74, ptr %_191, align 4
10578 assert_eq ! ( d_square3( & x, & mut df_dx2) , 2.0 * o2) ;
79+
80+ // d_square3(&x, &mut df_dx3)
81+ // CHECK: %76 = fmul fast float %x.val39, 6.000000e+00
82+ // CHECK-NEXT: store float %76, ptr %_200, align 4
10683 assert_eq ! ( d_square3( & x, & mut df_dx3) , 2.0 * o3) ;
84+
85+ // d_square3(&x, &mut df_dx3) is being optimized away as it's smth * 0.0
10786 assert_eq ! ( d_square3( & x, & mut df_dx4) , 2.0 * o4) ;
10887}
0 commit comments