Skip to content

Commit 4f9a7d8

Browse files
Enable tuner name (#1071)
1 parent 477dc3b commit 4f9a7d8

File tree

18 files changed

+403
-145
lines changed

18 files changed

+403
-145
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ serde_json = { version = "1.0.119", default-features = false }
3535
toml = "0.9.1"
3636
variadics_please = "1"
3737

38-
# no_std compatiblity
38+
# no_std compatibility
3939
dashmap = "6.1.0"
4040
foldhash = { version = "0.1.2", default-features = false }
4141
hashbrown = "0.15.5"

crates/cubecl-attention/src/components/tile/fragments.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::components::tile::RowWise;
77

88
#[cube]
99
/// Describes how a fragment is fragmented across units
10-
/// The layout is independant of the data and data types
10+
/// The layout is independent of the data and data types
1111
pub trait FragmentLayout: CubeType {
1212
/// Maps the (row, col) of the registers of a single unit to the position within the whole tile
1313
///

crates/cubecl-attention/src/components/tile/rowwise.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use crate::components::tile::FULLY_MASKED_ROW_THRESHOLD;
1010
/// every unit holds 8 values in the tile.
1111
///
1212
/// In the following layout, values are held contiguously, and num_rows=1 because
13-
/// every two occurences of the same plane id are in the same row
13+
/// every two occurrences of the same plane id are in the same row
1414
/// 0, 0, 1, 1, 2, 2, 3, 3,
1515
/// 4, 4, 5, 5, 6, 6, 7, 7,
1616
/// 8, 8, 9, 9, 10, 10, 11, 11,
@@ -21,7 +21,7 @@ use crate::components::tile::FULLY_MASKED_ROW_THRESHOLD;
2121
/// 28, 28, 29, 29, 30, 30, 31, 31,
2222
///
2323
/// In the following layout, values are held disjointly, and num_rows=2 because
24-
/// the two occurences of the same plane id are not in the same row
24+
/// the two occurrences of the same plane id are not in the same row
2525
/// 0, 1, 2, 3, 4, 5, 6, 7,
2626
/// 8, 9, 10, 11, 12, 13, 14, 15,
2727
/// 16, 17, 18, 19, 20, 21, 22, 23,

crates/cubecl-convolution/src/components/error.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
use std::fmt::Debug;
2-
3-
use cubecl_core::tune::AutotuneError;
41
use cubecl_matmul::components::{MatmulAvailabilityError, MatmulSetupError};
2+
use std::fmt::Debug;
53

64
#[allow(clippy::large_enum_variant)]
75
pub enum ConvSetupError {
@@ -40,8 +38,8 @@ impl From<MatmulAvailabilityError> for ConvSetupError {
4038
}
4139

4240
#[allow(clippy::from_over_into)]
43-
impl Into<AutotuneError> for ConvSetupError {
44-
fn into(self) -> AutotuneError {
45-
AutotuneError::Unknown(format!("{self:?}"))
41+
impl Into<String> for ConvSetupError {
42+
fn into(self) -> String {
43+
format!("{self:?}")
4644
}
4745
}

crates/cubecl-core/src/frontend/synchronization.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ pub mod sync_storage {
5454

5555
/// `sync_async_proxy_shared` is a synchronization fence for the experimental SM 9.0+ copy
5656
/// functions, applying bidirectionally between the async proxy (i.e. TMA) and shared memory.
57-
/// Should be used after intializing the barriers, and before the copy operation.
57+
/// Should be used after initializing the barriers, and before the copy operation.
5858
/// PTX: `fence.proxy.async.shared::cta`
5959
/// Experimental and subject to change.
6060
pub fn sync_async_proxy_shared() {

crates/cubecl-cpp/src/shared/unary.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ function!(Cos, "cos");
155155
function!(Tan, "tan", false);
156156
function!(Sinh, "sinh", false);
157157
function!(Cosh, "cosh", false);
158-
// Tanh is separete below, idk why
158+
// Tanh is separate below, idk why
159159
function!(ArcCos, "acos", false);
160160
function!(ArcSin, "asin", false);
161161
function!(ArcTan, "atan", false);

crates/cubecl-matmul/src/base.rs

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::fmt::Display;
2+
13
use cubecl_common::quant::scheme::{QuantScheme, QuantStore, QuantValue};
24
use cubecl_core::{
35
Runtime,
@@ -83,6 +85,119 @@ pub enum Strategy {
8385
Auto,
8486
}
8587

88+
impl Display for Strategy {
89+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90+
match self {
91+
Strategy::Simple {
92+
read_strategy,
93+
selection,
94+
tile_kind,
95+
} => {
96+
f.write_fmt(format_args!("matmul_simple_{read_strategy}_{tile_kind}"))?;
97+
98+
match selection {
99+
Selection::Forced(_) => f.write_str("_forced_selection")?,
100+
Selection::Inferred(args) => {
101+
if args.multi_rows {
102+
f.write_str("_multirows")?;
103+
}
104+
}
105+
};
106+
}
107+
Strategy::DoubleBuffering {
108+
read_strategy,
109+
selection,
110+
tile_kind,
111+
} => {
112+
f.write_fmt(format_args!(
113+
"matmul_double_buffering_{read_strategy}_{tile_kind}"
114+
))?;
115+
116+
match selection {
117+
Selection::Forced(_) => f.write_str("_forced_selection")?,
118+
Selection::Inferred(args) => {
119+
if args.specialized {
120+
f.write_str("_specialized")?;
121+
}
122+
}
123+
};
124+
}
125+
Strategy::Specialized {
126+
selection,
127+
tile_kind,
128+
} => {
129+
f.write_fmt(format_args!("matmul_specialized_{tile_kind}"))?;
130+
131+
match selection {
132+
Selection::Forced(_) => f.write_str("_forced_selection")?,
133+
Selection::Inferred(_) => {}
134+
};
135+
}
136+
Strategy::SimpleUnit(selection) => {
137+
f.write_fmt(format_args!("matmul_simple_unit"))?;
138+
139+
match selection {
140+
Selection::Forced(_) => f.write_str("_forced_selection")?,
141+
Selection::Inferred(args) => {
142+
f.write_fmt(format_args!("_{}", args.tile_size))?;
143+
}
144+
};
145+
}
146+
Strategy::DoubleUnit(selection) => {
147+
f.write_str("matmul_double_buffering_unit")?;
148+
149+
match selection {
150+
Selection::Forced(_) => f.write_str("_forced_selection")?,
151+
Selection::Inferred(args) => {
152+
f.write_fmt(format_args!("_{}", args.tile_size))?;
153+
}
154+
};
155+
}
156+
Strategy::SimpleVecMat(selection) => {
157+
f.write_str("vecmat_simple")?;
158+
159+
match selection {
160+
Selection::Forced(_) => f.write_str("_forced_selection")?,
161+
Selection::Inferred(_) => {}
162+
};
163+
}
164+
Strategy::DoubleVecMat(selection) => {
165+
f.write_str("vecmat_double_buffering")?;
166+
167+
match selection {
168+
Selection::Forced(_) => f.write_str("_forced_selection")?,
169+
Selection::Inferred(_) => {}
170+
};
171+
}
172+
Strategy::OrderedDoubleBuffering {
173+
selection,
174+
tile_kind,
175+
} => {
176+
f.write_fmt(format_args!("matmul_double_buffering_ordered_{tile_kind}"))?;
177+
178+
match selection {
179+
Selection::Forced(_) => f.write_str("_forced_selection")?,
180+
Selection::Inferred(args) => {
181+
if let Some(k) = args.partition_k {
182+
f.write_fmt(format_args!("_partition_k{}", k))?;
183+
}
184+
if let Some(r) = args.row_count {
185+
f.write_fmt(format_args!("_row_count{}", r))?;
186+
}
187+
if let Some(r) = args.rows_per_plane {
188+
f.write_fmt(format_args!("_row_per_plane{}", r))?;
189+
}
190+
}
191+
};
192+
}
193+
Strategy::Naive => f.write_str("matmul_naive")?,
194+
Strategy::Auto => f.write_str("matmul_auto")?,
195+
};
196+
197+
Ok(())
198+
}
199+
}
200+
86201
#[derive(Debug, Clone, Copy)]
87202
/// Which reader to use in simple algorithms
88203
pub enum ReadingStrategy {
@@ -113,6 +228,43 @@ pub enum AcceleratedTileKind {
113228
Mma,
114229
}
115230

231+
// Display implementations are used to combine and save names when autotuning.
232+
233+
impl Display for AcceleratedTileKind {
234+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
235+
match self {
236+
AcceleratedTileKind::Cmma => f.write_str("cmma"),
237+
AcceleratedTileKind::Mma => f.write_str("mma"),
238+
}
239+
}
240+
}
241+
242+
impl Display for ReadingStrategy {
243+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
244+
match self {
245+
ReadingStrategy::Cyclic => f.write_str("cyclic"),
246+
ReadingStrategy::Strided => f.write_str("strided"),
247+
ReadingStrategy::Tilewise => f.write_str("tilewise"),
248+
ReadingStrategy::AsyncCooperative => f.write_str("async_cooperative"),
249+
ReadingStrategy::AsyncCyclic => f.write_str("async_cyclic"),
250+
ReadingStrategy::AsyncMaximizeSliceLength => f.write_str("async_maximize_slice_length"),
251+
ReadingStrategy::AsyncMaximizeUnitCount => f.write_str("async_maximize_unit_count"),
252+
ReadingStrategy::Tma => f.write_str("tma"),
253+
}
254+
}
255+
}
256+
257+
impl Display for PartialReadingStrategy {
258+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
259+
match self {
260+
PartialReadingStrategy::Cyclic => f.write_str("cyclic"),
261+
PartialReadingStrategy::Tilewise => f.write_str("tilewise"),
262+
PartialReadingStrategy::Hybrid => f.write_str("hybrid"),
263+
PartialReadingStrategy::Tma => f.write_str("tma"),
264+
}
265+
}
266+
}
267+
116268
macro_rules! with_tile_kind {
117269
($kind: expr, $T: ident, $launch: expr) => {
118270
match $kind {

crates/cubecl-matmul/src/kernels/layered/selector/unit.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::fmt::Display;
2+
13
use crate::components::{
24
MatmulElems, MatmulKind, MatmulLineSizes, MatmulProblem, MatmulSelection, MatrixLayout,
35
SwizzleConfig, TilingScheme,
@@ -15,6 +17,15 @@ pub enum TileSizeSelection {
1517
MaxTileSize,
1618
}
1719

20+
impl Display for TileSizeSelection {
21+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
22+
match self {
23+
TileSizeSelection::MinTileSize => f.write_str("min_tile_size"),
24+
TileSizeSelection::MaxTileSize => f.write_str("max_tile_size"),
25+
}
26+
}
27+
}
28+
1829
#[derive(Default, Clone, Copy, Debug)]
1930
pub enum PartitionScaling {
2031
#[default]

crates/cubecl-runtime/src/client.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ impl<R: Runtime> ComputeClient<R> {
411411
self.do_empty(descriptors).unwrap()
412412
}
413413

414-
/// Marks the given [Bytes] as being a staging buffer, maybe transfering it to pinned memory
414+
/// Marks the given [Bytes] as being a staging buffer, maybe transferring it to pinned memory
415415
/// for faster data transfer with compute device.
416416
pub fn staging<'a, I>(&self, bytes: I, file_only: bool)
417417
where

crates/cubecl-runtime/src/memory_management/memory_pool/memory_page.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use alloc::vec::Vec;
1111
use core::fmt::{Debug, Display};
1212
use hashbrown::HashMap;
1313

14-
/// A memory page is responsable to reserve [slices](Slice) of data based on a fixed [storage buffer](StorageHandle).
14+
/// A memory page is responsible to reserve [slices](Slice) of data based on a fixed [storage buffer](StorageHandle).
1515
pub struct MemoryPage {
1616
storage: StorageHandle,
1717
slices: Vec<Slice>,
@@ -117,7 +117,7 @@ impl MemoryPage {
117117
continue;
118118
}
119119

120-
let can_be_splitted = slice.storage.utilization.size > effective_size;
120+
let can_be_split = slice.storage.utilization.size > effective_size;
121121
let handle = slice.handle.clone();
122122

123123
let storage_old = slice.storage.clone();
@@ -126,7 +126,7 @@ impl MemoryPage {
126126
slice.storage.utilization.size = size;
127127
slice.padding = padding;
128128

129-
if can_be_splitted {
129+
if can_be_split {
130130
let new_slice = Slice {
131131
handle: SliceHandle::new(),
132132
storage: storage_old.offset_start(effective_size),

0 commit comments

Comments
 (0)