Skip to content

Commit 1381277

Browse files
committed
Some initial benchmarks for simd integer arithmetic.
1 parent 8c2a040 commit 1381277

File tree

4 files changed

+248
-1
lines changed

4 files changed

+248
-1
lines changed

benchmark/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ set(SWIFT_BENCH_MODULES
158158
single-source/RemoveWhere
159159
single-source/ReversedCollections
160160
single-source/RomanNumbers
161+
single-source/SIMDReduceInteger
161162
single-source/SequenceAlgos
162163
single-source/SetTests
163164
single-source/SevenBoom
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
//===--- SIMDReduceInteger.swift ------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
import TestsUtils
14+
15+
public let SIMDReduceInteger = [
16+
BenchmarkInfo(
17+
name: "SIMDReduceInt32x1",
18+
runFunction: run_SIMDReduceInt32x1,
19+
tags: [.validation, .SIMD],
20+
setUpFunction: { blackHole(int32Data) }
21+
),
22+
BenchmarkInfo(
23+
name: "SIMDReduceInt32x4_init",
24+
runFunction: run_SIMDReduceInt32x4_init,
25+
tags: [.validation, .SIMD],
26+
setUpFunction: { blackHole(int32Data) }
27+
),
28+
BenchmarkInfo(
29+
name: "SIMDReduceInt32x4_cast",
30+
runFunction: run_SIMDReduceInt32x4_cast,
31+
tags: [.validation, .SIMD],
32+
setUpFunction: { blackHole(int32Data) }
33+
),
34+
BenchmarkInfo(
35+
name: "SIMDReduceInt32x16_init",
36+
runFunction: run_SIMDReduceInt32x16_init,
37+
tags: [.validation, .SIMD],
38+
setUpFunction: { blackHole(int32Data) }
39+
),
40+
BenchmarkInfo(
41+
name: "SIMDReduceInt32x16_cast",
42+
runFunction: run_SIMDReduceInt32x16_cast,
43+
tags: [.validation, .SIMD],
44+
setUpFunction: { blackHole(int32Data) }
45+
),
46+
BenchmarkInfo(
47+
name: "SIMDReduceInt8x1",
48+
runFunction: run_SIMDReduceInt8x1,
49+
tags: [.validation, .SIMD],
50+
setUpFunction: { blackHole(int32Data) }
51+
),
52+
BenchmarkInfo(
53+
name: "SIMDReduceInt8x16_init",
54+
runFunction: run_SIMDReduceInt8x16_init,
55+
tags: [.validation, .SIMD],
56+
setUpFunction: { blackHole(int32Data) }
57+
),
58+
BenchmarkInfo(
59+
name: "SIMDReduceInt8x16_cast",
60+
runFunction: run_SIMDReduceInt8x16_cast,
61+
tags: [.validation, .SIMD],
62+
setUpFunction: { blackHole(int32Data) }
63+
),
64+
BenchmarkInfo(
65+
name: "SIMDReduceInt8x64_init",
66+
runFunction: run_SIMDReduceInt8x64_init,
67+
tags: [.validation, .SIMD],
68+
setUpFunction: { blackHole(int32Data) }
69+
),
70+
BenchmarkInfo(
71+
name: "SIMDReduceInt8x64_cast",
72+
runFunction: run_SIMDReduceInt8x64_cast,
73+
tags: [.validation, .SIMD],
74+
setUpFunction: { blackHole(int32Data) }
75+
)
76+
]
77+
78+
let int32Data: UnsafeBufferPointer<Int32> = {
79+
let count = 64
80+
// Allocate memory for `count` Int32s with alignment suitable for all
81+
// SIMD vector types.
82+
let untyped = UnsafeMutableRawBufferPointer.allocate(
83+
byteCount: MemoryLayout<Int32>.size * count, alignment: 16
84+
)
85+
// Intialize the memory as Int32 and fill with random values.
86+
let typed = untyped.initializeMemory(as: Int32.self, repeating: 0)
87+
var g = SplitMix64(seed: 0)
88+
for i in 0 ..< typed.count {
89+
typed[i] = .random(in: .min ... .max, using: &g)
90+
}
91+
return UnsafeBufferPointer(typed)
92+
}()
93+
94+
@inline(never)
95+
public func run_SIMDReduceInt32x1(_ N: Int) {
96+
for _ in 0 ..< 1000*N {
97+
var accum: Int32 = 0
98+
for v in int32Data {
99+
accum &+= v &* v
100+
}
101+
blackHole(accum)
102+
}
103+
}
104+
105+
@inline(never)
106+
public func run_SIMDReduceInt32x4_init(_ N: Int) {
107+
for _ in 0 ..< 1000*N {
108+
var accum = SIMD4<Int32>()
109+
for i in stride(from: 0, to: int32Data.count, by: 4) {
110+
let v = SIMD4(int32Data[i ..< i+4])
111+
accum &+= v &* v
112+
}
113+
blackHole(accum.wrappedSum())
114+
}
115+
}
116+
117+
@inline(never)
118+
public func run_SIMDReduceInt32x4_cast(_ N: Int) {
119+
// Morally it seems like we "should" be able to use withMemoryRebound
120+
// to SIMD4<Int32>, but that function requries that the sizes match in
121+
// debug builds, so this is pretty ugly. The following "works" for now,
122+
// but is probably in violation of the formal model (the exact rules
123+
// for "assumingMemoryBound" are not clearly documented). We need a
124+
// better solution.
125+
let vecs = UnsafeBufferPointer<SIMD4<Int32>>(
126+
start: UnsafeRawPointer(int32Data.baseAddress!).assumingMemoryBound(to: SIMD4<Int32>.self),
127+
count: int32Data.count / 4
128+
)
129+
for _ in 0 ..< 1000*N {
130+
var accum = SIMD4<Int32>()
131+
for v in vecs {
132+
accum &+= v &* v
133+
}
134+
blackHole(accum.wrappedSum())
135+
}
136+
}
137+
138+
@inline(never)
139+
public func run_SIMDReduceInt32x16_init(_ N: Int) {
140+
for _ in 0 ..< 1000*N {
141+
var accum = SIMD16<Int32>()
142+
for i in stride(from: 0, to: int32Data.count, by: 16) {
143+
let v = SIMD16(int32Data[i ..< i+16])
144+
accum &+= v &* v
145+
}
146+
blackHole(accum.wrappedSum())
147+
}
148+
}
149+
150+
@inline(never)
151+
public func run_SIMDReduceInt32x16_cast(_ N: Int) {
152+
let vecs = UnsafeBufferPointer<SIMD16<Int32>>(
153+
start: UnsafeRawPointer(int32Data.baseAddress!).assumingMemoryBound(to: SIMD16<Int32>.self),
154+
count: int32Data.count / 16
155+
)
156+
for _ in 0 ..< 1000*N {
157+
var accum = SIMD16<Int32>()
158+
for v in vecs {
159+
accum &+= v &* v
160+
}
161+
blackHole(accum.wrappedSum())
162+
}
163+
}
164+
165+
let int8Data: UnsafeBufferPointer<Int8> = {
166+
let count = 256
167+
// Allocate memory for `count` Int8s with alignment suitable for all
168+
// SIMD vector types.
169+
let untyped = UnsafeMutableRawBufferPointer.allocate(
170+
byteCount: MemoryLayout<Int8>.size * count, alignment: 16
171+
)
172+
// Intialize the memory as Int8 and fill with random values.
173+
let typed = untyped.initializeMemory(as: Int8.self, repeating: 0)
174+
var g = SplitMix64(seed: 0)
175+
for i in 0 ..< typed.count {
176+
typed[i] = .random(in: .min ... .max, using: &g)
177+
}
178+
return UnsafeBufferPointer(typed)
179+
}()
180+
181+
@inline(never)
182+
public func run_SIMDReduceInt8x1(_ N: Int) {
183+
for _ in 0 ..< 1000*N {
184+
var accum: Int8 = 0
185+
for v in int8Data {
186+
accum &+= v &* v
187+
}
188+
blackHole(accum)
189+
}
190+
}
191+
192+
@inline(never)
193+
public func run_SIMDReduceInt8x16_init(_ N: Int) {
194+
for _ in 0 ..< 1000*N {
195+
var accum = SIMD16<Int8>()
196+
for i in stride(from: 0, to: int8Data.count, by: 16) {
197+
let v = SIMD16(int8Data[i ..< i+16])
198+
accum &+= v &* v
199+
}
200+
blackHole(accum.wrappedSum())
201+
}
202+
}
203+
204+
@inline(never)
205+
public func run_SIMDReduceInt8x16_cast(_ N: Int) {
206+
let vecs = UnsafeBufferPointer<SIMD16<Int8>>(
207+
start: UnsafeRawPointer(int8Data.baseAddress!).assumingMemoryBound(to: SIMD16<Int8>.self),
208+
count: int8Data.count / 16
209+
)
210+
for _ in 0 ..< 1000*N {
211+
var accum = SIMD16<Int8>()
212+
for v in vecs {
213+
accum &+= v &* v
214+
}
215+
blackHole(accum.wrappedSum())
216+
}
217+
}
218+
219+
@inline(never)
220+
public func run_SIMDReduceInt8x64_init(_ N: Int) {
221+
for _ in 0 ..< 1000*N {
222+
var accum = SIMD64<Int8>()
223+
for i in stride(from: 0, to: int8Data.count, by: 64) {
224+
let v = SIMD64(int8Data[i ..< i+64])
225+
accum &+= v &* v
226+
}
227+
blackHole(accum.wrappedSum())
228+
}
229+
}
230+
231+
@inline(never)
232+
public func run_SIMDReduceInt8x64_cast(_ N: Int) {
233+
let vecs = UnsafeBufferPointer<SIMD64<Int8>>(
234+
start: UnsafeRawPointer(int8Data.baseAddress!).assumingMemoryBound(to: SIMD64<Int8>.self),
235+
count: int8Data.count / 64
236+
)
237+
for _ in 0 ..< 1000*N {
238+
var accum = SIMD64<Int8>()
239+
for v in vecs {
240+
accum &+= v &* v
241+
}
242+
blackHole(accum.wrappedSum())
243+
}
244+
}

benchmark/utils/TestsUtils.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ public enum BenchmarkCategory : String {
2323
// we know is important to measure.
2424
case validation
2525
// subsystems to validate and their subcategories.
26-
case api, Array, String, Dictionary, Codable, Set, Data, IndexPath
26+
case api, Array, String, Dictionary, Codable, Set, Data, IndexPath, SIMD
2727
case sdk
2828
case runtime, refcount, metadata
2929
// Other general areas of compiled code validation.

benchmark/utils/main.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ import ReduceInto
156156
import RemoveWhere
157157
import ReversedCollections
158158
import RomanNumbers
159+
import SIMDReduceInteger
159160
import SequenceAlgos
160161
import SetTests
161162
import SevenBoom
@@ -352,6 +353,7 @@ registerBenchmark(ReduceInto)
352353
registerBenchmark(RemoveWhere)
353354
registerBenchmark(ReversedCollections)
354355
registerBenchmark(RomanNumbers)
356+
registerBenchmark(SIMDReduceInteger)
355357
registerBenchmark(SequenceAlgos)
356358
registerBenchmark(SetTests)
357359
registerBenchmark(SevenBoom)

0 commit comments

Comments
 (0)