@@ -28,6 +28,7 @@ declare i8 @llvm.ctpop.i8(i8)
2828declare i16 @llvm.ctpop.i16 (i16 )
2929declare i32 @llvm.ctpop.i32 (i32 )
3030declare i64 @llvm.ctpop.i64 (i64 )
31+ declare i128 @llvm.ctpop.i128 (i128 )
3132
3233define i8 @test_cttz_i8 (i8 %a ) nounwind {
3334; RV32_NOZBB-LABEL: test_cttz_i8:
@@ -2094,6 +2095,154 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
20942095 ret i64 %1
20952096}
20962097
2098+ define i128 @test_ctpop_i128 (i128 %a ) nounwind {
2099+ ; RV32_NOZBB-LABEL: test_ctpop_i128:
2100+ ; RV32_NOZBB: # %bb.0:
2101+ ; RV32_NOZBB-NEXT: addi sp, sp, -32
2102+ ; RV32_NOZBB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
2103+ ; RV32_NOZBB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
2104+ ; RV32_NOZBB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
2105+ ; RV32_NOZBB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
2106+ ; RV32_NOZBB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
2107+ ; RV32_NOZBB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
2108+ ; RV32_NOZBB-NEXT: lw s0, 0(a1)
2109+ ; RV32_NOZBB-NEXT: lw s1, 4(a1)
2110+ ; RV32_NOZBB-NEXT: lw a2, 8(a1)
2111+ ; RV32_NOZBB-NEXT: lw a1, 12(a1)
2112+ ; RV32_NOZBB-NEXT: mv s2, a0
2113+ ; RV32_NOZBB-NEXT: mv a0, a2
2114+ ; RV32_NOZBB-NEXT: call __popcountdi2
2115+ ; RV32_NOZBB-NEXT: mv s3, a0
2116+ ; RV32_NOZBB-NEXT: mv s4, a1
2117+ ; RV32_NOZBB-NEXT: mv a0, s0
2118+ ; RV32_NOZBB-NEXT: mv a1, s1
2119+ ; RV32_NOZBB-NEXT: call __popcountdi2
2120+ ; RV32_NOZBB-NEXT: add a1, a1, s4
2121+ ; RV32_NOZBB-NEXT: add s3, a0, s3
2122+ ; RV32_NOZBB-NEXT: sltu a0, s3, a0
2123+ ; RV32_NOZBB-NEXT: add a0, a1, a0
2124+ ; RV32_NOZBB-NEXT: sw zero, 12(s2)
2125+ ; RV32_NOZBB-NEXT: sw zero, 8(s2)
2126+ ; RV32_NOZBB-NEXT: sw s3, 0(s2)
2127+ ; RV32_NOZBB-NEXT: sw a0, 4(s2)
2128+ ; RV32_NOZBB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
2129+ ; RV32_NOZBB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
2130+ ; RV32_NOZBB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
2131+ ; RV32_NOZBB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
2132+ ; RV32_NOZBB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
2133+ ; RV32_NOZBB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
2134+ ; RV32_NOZBB-NEXT: addi sp, sp, 32
2135+ ; RV32_NOZBB-NEXT: ret
2136+ ;
2137+ ; RV64NOZBB-LABEL: test_ctpop_i128:
2138+ ; RV64NOZBB: # %bb.0:
2139+ ; RV64NOZBB-NEXT: addi sp, sp, -32
2140+ ; RV64NOZBB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
2141+ ; RV64NOZBB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
2142+ ; RV64NOZBB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
2143+ ; RV64NOZBB-NEXT: mv s0, a0
2144+ ; RV64NOZBB-NEXT: mv a0, a1
2145+ ; RV64NOZBB-NEXT: call __popcountdi2
2146+ ; RV64NOZBB-NEXT: mv s1, a0
2147+ ; RV64NOZBB-NEXT: mv a0, s0
2148+ ; RV64NOZBB-NEXT: call __popcountdi2
2149+ ; RV64NOZBB-NEXT: add a0, a0, s1
2150+ ; RV64NOZBB-NEXT: li a1, 0
2151+ ; RV64NOZBB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
2152+ ; RV64NOZBB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
2153+ ; RV64NOZBB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
2154+ ; RV64NOZBB-NEXT: addi sp, sp, 32
2155+ ; RV64NOZBB-NEXT: ret
2156+ ;
2157+ ; RV32ZBB-LABEL: test_ctpop_i128:
2158+ ; RV32ZBB: # %bb.0:
2159+ ; RV32ZBB-NEXT: lw a2, 12(a1)
2160+ ; RV32ZBB-NEXT: lw a3, 8(a1)
2161+ ; RV32ZBB-NEXT: lw a4, 0(a1)
2162+ ; RV32ZBB-NEXT: lw a1, 4(a1)
2163+ ; RV32ZBB-NEXT: cpop a2, a2
2164+ ; RV32ZBB-NEXT: cpop a3, a3
2165+ ; RV32ZBB-NEXT: add a2, a3, a2
2166+ ; RV32ZBB-NEXT: cpop a1, a1
2167+ ; RV32ZBB-NEXT: cpop a3, a4
2168+ ; RV32ZBB-NEXT: add a1, a3, a1
2169+ ; RV32ZBB-NEXT: add a2, a1, a2
2170+ ; RV32ZBB-NEXT: sltu a1, a2, a1
2171+ ; RV32ZBB-NEXT: sw zero, 12(a0)
2172+ ; RV32ZBB-NEXT: sw zero, 8(a0)
2173+ ; RV32ZBB-NEXT: sw a2, 0(a0)
2174+ ; RV32ZBB-NEXT: sw a1, 4(a0)
2175+ ; RV32ZBB-NEXT: ret
2176+ ;
2177+ ; RV64ZBB-LABEL: test_ctpop_i128:
2178+ ; RV64ZBB: # %bb.0:
2179+ ; RV64ZBB-NEXT: cpop a1, a1
2180+ ; RV64ZBB-NEXT: cpop a0, a0
2181+ ; RV64ZBB-NEXT: add a0, a0, a1
2182+ ; RV64ZBB-NEXT: li a1, 0
2183+ ; RV64ZBB-NEXT: ret
2184+ ;
2185+ ; RV32XTHEADBB-LABEL: test_ctpop_i128:
2186+ ; RV32XTHEADBB: # %bb.0:
2187+ ; RV32XTHEADBB-NEXT: addi sp, sp, -32
2188+ ; RV32XTHEADBB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
2189+ ; RV32XTHEADBB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
2190+ ; RV32XTHEADBB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
2191+ ; RV32XTHEADBB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
2192+ ; RV32XTHEADBB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
2193+ ; RV32XTHEADBB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
2194+ ; RV32XTHEADBB-NEXT: lw s0, 0(a1)
2195+ ; RV32XTHEADBB-NEXT: lw s1, 4(a1)
2196+ ; RV32XTHEADBB-NEXT: lw a2, 8(a1)
2197+ ; RV32XTHEADBB-NEXT: lw a1, 12(a1)
2198+ ; RV32XTHEADBB-NEXT: mv s2, a0
2199+ ; RV32XTHEADBB-NEXT: mv a0, a2
2200+ ; RV32XTHEADBB-NEXT: call __popcountdi2
2201+ ; RV32XTHEADBB-NEXT: mv s3, a0
2202+ ; RV32XTHEADBB-NEXT: mv s4, a1
2203+ ; RV32XTHEADBB-NEXT: mv a0, s0
2204+ ; RV32XTHEADBB-NEXT: mv a1, s1
2205+ ; RV32XTHEADBB-NEXT: call __popcountdi2
2206+ ; RV32XTHEADBB-NEXT: add a1, a1, s4
2207+ ; RV32XTHEADBB-NEXT: add s3, a0, s3
2208+ ; RV32XTHEADBB-NEXT: sltu a0, s3, a0
2209+ ; RV32XTHEADBB-NEXT: add a0, a1, a0
2210+ ; RV32XTHEADBB-NEXT: sw zero, 12(s2)
2211+ ; RV32XTHEADBB-NEXT: sw zero, 8(s2)
2212+ ; RV32XTHEADBB-NEXT: sw s3, 0(s2)
2213+ ; RV32XTHEADBB-NEXT: sw a0, 4(s2)
2214+ ; RV32XTHEADBB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
2215+ ; RV32XTHEADBB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
2216+ ; RV32XTHEADBB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
2217+ ; RV32XTHEADBB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
2218+ ; RV32XTHEADBB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
2219+ ; RV32XTHEADBB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
2220+ ; RV32XTHEADBB-NEXT: addi sp, sp, 32
2221+ ; RV32XTHEADBB-NEXT: ret
2222+ ;
2223+ ; RV64XTHEADBB-LABEL: test_ctpop_i128:
2224+ ; RV64XTHEADBB: # %bb.0:
2225+ ; RV64XTHEADBB-NEXT: addi sp, sp, -32
2226+ ; RV64XTHEADBB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
2227+ ; RV64XTHEADBB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
2228+ ; RV64XTHEADBB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
2229+ ; RV64XTHEADBB-NEXT: mv s0, a0
2230+ ; RV64XTHEADBB-NEXT: mv a0, a1
2231+ ; RV64XTHEADBB-NEXT: call __popcountdi2
2232+ ; RV64XTHEADBB-NEXT: mv s1, a0
2233+ ; RV64XTHEADBB-NEXT: mv a0, s0
2234+ ; RV64XTHEADBB-NEXT: call __popcountdi2
2235+ ; RV64XTHEADBB-NEXT: add a0, a0, s1
2236+ ; RV64XTHEADBB-NEXT: li a1, 0
2237+ ; RV64XTHEADBB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
2238+ ; RV64XTHEADBB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
2239+ ; RV64XTHEADBB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
2240+ ; RV64XTHEADBB-NEXT: addi sp, sp, 32
2241+ ; RV64XTHEADBB-NEXT: ret
2242+ %1 = call i128 @llvm.ctpop.i128 (i128 %a )
2243+ ret i128 %1
2244+ }
2245+
20972246define i8 @test_parity_i8 (i8 %a ) {
20982247; RV32_NOZBB-LABEL: test_parity_i8:
20992248; RV32_NOZBB: # %bb.0:
0 commit comments