Skip to content

Commit 4689eae

Browse files
authored
Bump Mojmelo for Mojo 0.25.7 (#183)
* Bump Mojmelo for Mojo 0.25.7 * trailing whitespace fix
1 parent 4dbd830 commit 4689eae

File tree

5 files changed

+104
-86
lines changed

5 files changed

+104
-86
lines changed

recipes/mojmelo/recipe.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
context:
2-
version: "0.0.9"
2+
version: "0.1.0"
33

44
package:
55
name: "mojmelo"
66
version: ${{ version }}
77

88
source:
99
- git: https://github.com/yetalit/mojmelo.git
10-
rev: dd4c87ee4d28d242ce57006182b248c6f95ad37a
10+
rev: 01692d2078e55f4dbeac981240edc620d0dc96af
1111

1212
build:
1313
number: 0
@@ -16,7 +16,7 @@ build:
1616
- mojo package pixi/mojmelo -o ${{ PREFIX }}/lib/mojo/mojmelo.mojopkg
1717
requirements:
1818
host:
19-
- mojo-compiler =0.25.6
19+
- mojo-compiler =0.25.7
2020
run:
2121
- ${{ pin_compatible('mojo-compiler') }}
2222

@@ -27,7 +27,7 @@ tests:
2727
- mojo tests/setup.mojo
2828
requirements:
2929
run:
30-
- mojo-compiler =0.25.6
30+
- mojo-compiler =0.25.7
3131
files:
3232
recipe:
3333
- tests/setup.mojo

recipes/mojmelo/tests/mojmelo/utils/Matrix.mojo

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,36 +6,40 @@ struct Matrix(Copyable, Movable, ImplicitlyCopyable, Sized):
66
var height: Int
77
var width: Int
88
var size: Int
9-
var data: UnsafePointer[Float32]
9+
var data: UnsafePointer[Float32, MutAnyOrigin]
1010
var order: String
1111

1212
# initialize from UnsafePointer
1313
@always_inline
14-
fn __init__(out self, data: UnsafePointer[Float32], height: Int, width: Int, order: String = 'c'):
14+
fn __init__[src: DType = DType.float32](out self, data: UnsafePointer[Scalar[src], MutAnyOrigin], height: Int, width: Int, order: String = 'c'):
1515
self.height = height
1616
self.width = width
1717
self.size = height * width
18-
self.data = data
18+
if src == DType.float32:
19+
self.data = data.bitcast[Float32]()
20+
else:
21+
self.data = cast[src=src, des=DType.float32, width=self.simd_width](data, self.size)
22+
data.free()
1923
self.order = order.lower()
2024

2125
# initialize by copying from UnsafePointer
2226
@always_inline
23-
fn __init__(out self, height: Int, width: Int, data: UnsafePointer[Float32] = UnsafePointer[Float32](), order: String = 'c'):
27+
fn __init__(out self, height: Int, width: Int, data: UnsafePointer[Float32, MutAnyOrigin] = UnsafePointer[Float32, MutAnyOrigin](), order: String = 'c'):
2428
self.height = height
2529
self.width = width
2630
self.size = height * width
27-
self.data = UnsafePointer[Float32].alloc(self.size)
31+
self.data = alloc[Float32](self.size)
2832
self.order = order.lower()
2933
if data:
30-
memcpy(self.data, data, self.size)
34+
memcpy(dest=self.data, src=data, count=self.size)
3135

3236
fn __copyinit__(out self, other: Self):
3337
self.height = other.height
3438
self.width = other.width
3539
self.size = other.size
36-
self.data = UnsafePointer[Float32].alloc(self.size)
40+
self.data = alloc[Float32](self.size)
3741
self.order = other.order
38-
memcpy(self.data, other.data, self.size)
42+
memcpy(dest=self.data, src=other.data, count=self.size)
3943

4044
fn __moveinit__(out self, deinit existing: Self):
4145
self.height = existing.height
@@ -45,7 +49,7 @@ struct Matrix(Copyable, Movable, ImplicitlyCopyable, Sized):
4549
self.order = existing.order
4650
#existing.height = existing.width = existing.size = 0
4751
#existing.order = ''
48-
#existing.data = UnsafePointer[Float32]()
52+
#existing.data = UnsafePointer[Float32, MutAnyOrigin]()
4953

5054
# access an element
5155
@always_inline
@@ -56,7 +60,7 @@ struct Matrix(Copyable, Movable, ImplicitlyCopyable, Sized):
5660
else:
5761
loc = (column * self.height) + row
5862
if loc > self.size - 1 or loc < 0:
59-
raise Error("Error: Location is out of range!")
63+
raise Error("Location is out of range!")
6064
return self.data[loc]
6165

6266
@always_inline
@@ -72,6 +76,24 @@ struct Matrix(Copyable, Movable, ImplicitlyCopyable, Sized):
7276
fn __mul__(self, rhs: Self) raises -> Self:
7377
if self.width != rhs.height:
7478
raise Error('Error: Cannot multiply matrices with shapes (' + String(self.height) + ', ' + String(self.width) + ') and (' + String(rhs.height) + ', ' + String(rhs.width) + ')')
79+
80+
if self.height == 1 and rhs.width == 1:
81+
# Dot product
82+
var mat = Self(1, 1)
83+
mat.data[0] = self.ele_mul(rhs.T()).sum()
84+
return mat^
85+
86+
if self.height * self.width * rhs.width <= 4096:
87+
# matmul naive
88+
var mat = Self(self.height, rhs.width)
89+
for i in range(self.size):
90+
var rhsr = i % self.width
91+
for j in range(rhsr * rhs.width, rhsr * rhs.width + rhs.width):
92+
if rhsr != 0:
93+
mat.data[(Int(i / self.width) * mat.width) + (j % rhs.width)] += self.data[i] * rhs.data[j]
94+
else:
95+
mat.data[(Int(i / self.width) * mat.width) + (j % rhs.width)] = self.data[i] * rhs.data[j]
96+
return mat^
7597
var A = matmul.Matrix[DType.float32](self.data, (self.height, self.width))
7698
var B = matmul.Matrix[DType.float32](rhs.data, (rhs.height, rhs.width))
7799
var C = matmul.Matrix[DType.float32]((self.height, rhs.width))
@@ -91,7 +113,6 @@ struct Matrix(Copyable, Movable, ImplicitlyCopyable, Sized):
91113
return mat^
92114

93115
@staticmethod
94-
@always_inline
95116
fn random(height: Int, width: Int, order: String = 'c') -> Matrix:
96117
random.seed()
97118
var mat = Matrix(height, width, order= order)

recipes/mojmelo/tests/mojmelo/utils/mojmelo_matmul/matmul.mojo

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
from algorithm import vectorize, parallelize
44
from memory.memory import _malloc, stack_allocation
55
from sys import CompilationTarget, num_performance_cores, simd_width_of, size_of
6-
import benchmark
7-
from testing import assert_equal
86
from utils import IndexList
97
import random
108
from .params import *
@@ -37,11 +35,11 @@ struct Layout(Copyable, Movable, Writable):
3735
var shape: IndexList[2]
3836
var strides: IndexList[2]
3937

40-
fn __init__(out self, shape: (Int, Int), strides: (Int, Int)):
38+
fn __init__(out self, shape: Tuple[Int, Int], strides: Tuple[Int, Int]):
4139
self.shape = IndexList[2](shape[0], shape[1])
4240
self.strides = IndexList[2](strides[0], strides[1])
4341

44-
fn __init__(out self, shape: (Int, Int)):
42+
fn __init__(out self, shape: Tuple[Int, Int]):
4543
self.strides = IndexList[2](shape[1], 1)
4644
self.shape = IndexList[2](shape[0], shape[1])
4745

@@ -59,31 +57,31 @@ struct Layout(Copyable, Movable, Writable):
5957

6058

6159
struct Matrix[Type: DType]:
62-
var data: UnsafePointer[Scalar[Type]]
60+
var data: UnsafePointer[Scalar[Type], MutAnyOrigin]
6361
var layout: Layout
6462

65-
fn __init__(out self, shape: (Int, Int)):
66-
self.data = UnsafePointer[Scalar[Type]].alloc(shape[0] * shape[1])
63+
fn __init__(out self, shape: Tuple[Int, Int]):
64+
self.data = alloc[Scalar[Type]](shape[0] * shape[1])
6765
self.layout = Layout(shape)
6866

6967
@always_inline("nodebug")
7068
fn __init__(
71-
out self, data: UnsafePointer[Scalar[Type]], var layout: Layout
69+
out self, data: UnsafePointer[Scalar[Type], MutAnyOrigin], var layout: Layout
7270
):
73-
self.data = UnsafePointer[Scalar[Type]](data)
71+
self.data = data
7472
self.layout = layout
7573

7674
@always_inline("nodebug")
7775
fn __init__(
78-
out self, data: UnsafePointer[Scalar[Type]], shape: (Int, Int)
76+
out self, data: UnsafePointer[Scalar[Type], MutAnyOrigin], shape: Tuple[Int, Int]
7977
):
8078
self.data = data
8179
self.layout = Layout(shape)
8280

8381
@always_inline("nodebug")
8482
fn __getitem__(
8583
ref [_]self, i: Int, j: Int
86-
) -> ref [__origin_of(self)] Scalar[Type]:
84+
) -> ref [origin_of(self)] Scalar[Type]:
8785
var offset = self.layout(i, j)
8886
return (self.data + offset)[]
8987

@@ -146,7 +144,7 @@ struct Matrix[Type: DType]:
146144
@always_inline
147145
fn pack_A[
148146
Type: DType, //, mr: Int
149-
](mc: Int, Ac_buffer: UnsafePointer[Scalar[Type]], Ac: Matrix[Type]) -> Matrix[Type]:
147+
](mc: Int, Ac_buffer: UnsafePointer[Scalar[Type], MutAnyOrigin], Ac: Matrix[Type]) -> Matrix[Type]:
150148
@parameter
151149
fn pack_panel(idx: Int):
152150
var i = idx * mr
@@ -184,7 +182,7 @@ fn pack_A[
184182
@always_inline
185183
fn pack_B[
186184
Type: DType, //, kc: Int, nr: Int
187-
](Bc_buffer: UnsafePointer[Scalar[Type]], Bc: Matrix[Type]) -> Matrix[Type]:
185+
](Bc_buffer: UnsafePointer[Scalar[Type], MutAnyOrigin], Bc: Matrix[Type]) -> Matrix[Type]:
188186
var dst_ptr = Bc_buffer
189187
for i in range(0, Bc.shape[1](), nr):
190188
var src_ptr = Bc.data + i
@@ -267,7 +265,7 @@ fn loop_n[
267265

268266
@parameter
269267
fn parallelize_balanced_part(idx: Int):
270-
var Bc_buffer = UnsafePointer[Scalar[Type]](
268+
var Bc_buffer = UnsafePointer[Scalar[Type], MutAnyOrigin](
271269
_malloc[Scalar[Type]](
272270
kc * nc_per_thread * size_of[Type](), alignment=64
273271
)
@@ -290,7 +288,7 @@ fn loop_n[
290288

291289
@parameter
292290
fn parallelize_remainder(idx: Int):
293-
var Bc_buffer = UnsafePointer[Scalar[Type]](
291+
var Bc_buffer = UnsafePointer[Scalar[Type], MutAnyOrigin](
294292
_malloc[Scalar[Type]](
295293
kc * remainder_per_thread * size_of[Type](), alignment=64
296294
)
@@ -348,7 +346,7 @@ fn macro_kernel[
348346
fn micro_kernel[
349347
Type: DType, //, mr: Int, nr: Int, padding: Bool
350348
](mut Cr: Matrix[Type], Ar: Matrix[Type], Br: Matrix[Type]):
351-
alias simd_width = simd_width_of[Type]()
349+
comptime simd_width = simd_width_of[Type]()
352350
constrained[nr % simd_width == 0, "nr must be multiple of simd_width"]()
353351

354352
var Ar_ptr = Ar.data
@@ -440,31 +438,31 @@ fn micro_kernel[
440438

441439
@always_inline
442440
fn matmul_params[Type: DType]() -> IndexList[5]:
443-
alias mc = 8192 // size_of[Type]() # fix this for simplicity
444-
alias N = simd_width_of[Type]()
441+
comptime mc = 8192 // size_of[Type]() # fix this for simplicity
442+
comptime N = simd_width_of[Type]()
445443

446-
alias Vectors = 32 if CompilationTarget.has_avx512f() else 16
444+
comptime Vectors = 32 if CompilationTarget.has_avx512f() else 16
447445

448446
@parameter
449447
fn compute_kc[mr: Int, nr: Int]() -> Int:
450-
alias CBr = Int((L1_ASSOCIATIVITY - 1) / (1 + mr / nr))
448+
comptime CBr = Int((L1_ASSOCIATIVITY - 1) / (1 + mr / nr))
451449
return (CBr * L1_CACHE_SIZE) // (nr * size_of[Type]() * L1_ASSOCIATIVITY)
452450

453451
@parameter
454452
fn compute_params[C: Int]() -> IndexList[5]:
455-
alias p = C // (intsqrt[C]() + 1)
456-
alias mr = C // p - 1
457-
alias nr = p * N
458-
alias CBr = Int((L1_ASSOCIATIVITY - 1) / (1 + mr / nr))
459-
alias kc = compute_kc[mr, nr]()
460-
alias nc = (L2_ASSOCIATIVITY - 1) * L2_CACHE_SIZE // (
453+
comptime p = C // (intsqrt[C]() + 1)
454+
comptime mr = C // p - 1
455+
comptime nr = p * N
456+
comptime CBr = Int((L1_ASSOCIATIVITY - 1) / (1 + mr / nr))
457+
comptime kc = compute_kc[mr, nr]()
458+
comptime nc = (L2_ASSOCIATIVITY - 1) * L2_CACHE_SIZE // (
461459
kc * size_of[Type]() * L2_ASSOCIATIVITY
462460
) - mr
463461
return IndexList[5](mc, nc, kc, mr, nr)
464462

465463
@parameter
466464
if Type.is_floating_point():
467-
alias TempVectors = 1
465+
comptime TempVectors = 1
468466
return compute_params[Vectors - TempVectors]()
469467
else:
470468

@@ -473,25 +471,25 @@ fn matmul_params[Type: DType]() -> IndexList[5]:
473471

474472
@parameter
475473
if CompilationTarget.has_avx512f():
476-
alias TempVectors = 2
474+
comptime TempVectors = 2
477475
return compute_params[Vectors - TempVectors]()
478476
else:
479-
alias TempVectors = 3
477+
comptime TempVectors = 3
480478
return compute_params[Vectors - TempVectors]()
481479
else:
482-
alias TempVectors = 2
480+
comptime TempVectors = 2
483481
return compute_params[Vectors - TempVectors]()
484482

485483

486484
fn matmul[
487485
Type: DType
488486
](m: Int, n: Int, k: Int, mut C: Matrix[Type], A: Matrix[Type], B: Matrix[Type]):
489-
alias params = matmul_params[Type]()
490-
alias mc = params[0]
491-
alias nc = params[1]
492-
alias kc = params[2]
493-
alias mr = params[3]
494-
alias nr = params[4]
487+
comptime params = matmul_params[Type]()
488+
comptime mc = params[0]
489+
comptime nc = params[1]
490+
comptime kc = params[2]
491+
comptime mr = params[3]
492+
comptime nr = params[4]
495493
var resized_mc = roundup(min(mc, m), mr)
496494
var resized_nc = roundup(min(nc, n), nr)
497495
matmul_impl[kc, mr, nr](resized_mc, resized_nc, C, A, B)
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
alias L1_CACHE_SIZE = 32768
2-
alias L1_ASSOCIATIVITY = 8
3-
alias L2_CACHE_SIZE = 262144
4-
alias L2_ASSOCIATIVITY = 4
1+
comptime L1_CACHE_SIZE = 32768
2+
comptime L1_ASSOCIATIVITY = 8
3+
comptime L2_CACHE_SIZE = 262144
4+
comptime L2_ASSOCIATIVITY = 4

0 commit comments

Comments
 (0)