From 1dacb76cc17738760b918c69394dfd1904e21cd6 Mon Sep 17 00:00:00 2001 From: Yetalit <98420273+yetalit@users.noreply.github.com> Date: Thu, 20 Nov 2025 21:23:08 +0300 Subject: [PATCH 1/2] Bump Mojmelo for Mojo 0.25.7 --- recipes/mojmelo/recipe.yaml | 8 +-- .../mojmelo/tests/mojmelo/utils/Matrix.mojo | 43 +++++++++--- .../mojmelo/utils/mojmelo_matmul/matmul.mojo | 70 +++++++++---------- .../mojmelo/utils/mojmelo_matmul/params.mojo | 8 +-- recipes/mojmelo/tests/setup.mojo | 61 ++++++++-------- 5 files changed, 104 insertions(+), 86 deletions(-) diff --git a/recipes/mojmelo/recipe.yaml b/recipes/mojmelo/recipe.yaml index 48b019c6..1d9a8e47 100644 --- a/recipes/mojmelo/recipe.yaml +++ b/recipes/mojmelo/recipe.yaml @@ -1,5 +1,5 @@ context: - version: "0.0.9" + version: "0.1.0" package: name: "mojmelo" @@ -7,7 +7,7 @@ package: source: - git: https://github.com/yetalit/mojmelo.git - rev: dd4c87ee4d28d242ce57006182b248c6f95ad37a + rev: 01692d2078e55f4dbeac981240edc620d0dc96af build: number: 0 @@ -16,7 +16,7 @@ build: - mojo package pixi/mojmelo -o ${{ PREFIX }}/lib/mojo/mojmelo.mojopkg requirements: host: - - mojo-compiler =0.25.6 + - mojo-compiler =0.25.7 run: - ${{ pin_compatible('mojo-compiler') }} @@ -27,7 +27,7 @@ tests: - mojo tests/setup.mojo requirements: run: - - mojo-compiler =0.25.6 + - mojo-compiler =0.25.7 files: recipe: - tests/setup.mojo diff --git a/recipes/mojmelo/tests/mojmelo/utils/Matrix.mojo b/recipes/mojmelo/tests/mojmelo/utils/Matrix.mojo index 5813013b..6ca751dc 100644 --- a/recipes/mojmelo/tests/mojmelo/utils/Matrix.mojo +++ b/recipes/mojmelo/tests/mojmelo/utils/Matrix.mojo @@ -6,36 +6,40 @@ struct Matrix(Copyable, Movable, ImplicitlyCopyable, Sized): var height: Int var width: Int var size: Int - var data: UnsafePointer[Float32] + var data: UnsafePointer[Float32, MutAnyOrigin] var order: String # initialize from UnsafePointer @always_inline - fn __init__(out self, data: UnsafePointer[Float32], height: Int, width: Int, order: String = 'c'): + fn __init__[src: DType = DType.float32](out self, data: UnsafePointer[Scalar[src], MutAnyOrigin], height: Int, width: Int, order: String = 'c'): self.height = height self.width = width self.size = height * width - self.data = data + if src == DType.float32: + self.data = data.bitcast[Float32]() + else: + self.data = cast[src=src, des=DType.float32, width=self.simd_width](data, self.size) + data.free() self.order = order.lower() # initialize by copying from UnsafePointer @always_inline - fn __init__(out self, height: Int, width: Int, data: UnsafePointer[Float32] = UnsafePointer[Float32](), order: String = 'c'): + fn __init__(out self, height: Int, width: Int, data: UnsafePointer[Float32, MutAnyOrigin] = UnsafePointer[Float32, MutAnyOrigin](), order: String = 'c'): self.height = height self.width = width self.size = height * width - self.data = UnsafePointer[Float32].alloc(self.size) + self.data = alloc[Float32](self.size) self.order = order.lower() if data: - memcpy(self.data, data, self.size) + memcpy(dest=self.data, src=data, count=self.size) fn __copyinit__(out self, other: Self): self.height = other.height self.width = other.width self.size = other.size - self.data = UnsafePointer[Float32].alloc(self.size) + self.data = alloc[Float32](self.size) self.order = other.order - memcpy(self.data, other.data, self.size) + memcpy(dest=self.data, src=other.data, count=self.size) fn __moveinit__(out self, deinit existing: Self): self.height = existing.height @@ -45,7 +49,7 @@ struct Matrix(Copyable, Movable, ImplicitlyCopyable, Sized): self.order = existing.order #existing.height = existing.width = existing.size = 0 #existing.order = '' - #existing.data = UnsafePointer[Float32]() + #existing.data = UnsafePointer[Float32, MutAnyOrigin]() # access an element @always_inline @@ -56,7 +60,7 @@ struct Matrix(Copyable, Movable, ImplicitlyCopyable, Sized): else: loc = (column * self.height) + row if loc > self.size - 1 or loc < 0: - raise Error("Error: Location is out of range!") + raise Error("Location is out of range!") return self.data[loc] @always_inline @@ -72,6 +76,24 @@ struct Matrix(Copyable, Movable, ImplicitlyCopyable, Sized): fn __mul__(self, rhs: Self) raises -> Self: if self.width != rhs.height: raise Error('Error: Cannot multiply matrices with shapes (' + String(self.height) + ', ' + String(self.width) + ') and (' + String(rhs.height) + ', ' + String(rhs.width) + ')') + + if self.height == 1 and rhs.width == 1: + # Dot product + var mat = Self(1, 1) + mat.data[0] = self.ele_mul(rhs.T()).sum() + return mat^ + + if self.height * self.width * rhs.width <= 4096: + # matmul naive + var mat = Self(self.height, rhs.width) + for i in range(self.size): + var rhsr = i % self.width + for j in range(rhsr * rhs.width, rhsr * rhs.width + rhs.width): + if rhsr != 0: + mat.data[(Int(i / self.width) * mat.width) + (j % rhs.width)] += self.data[i] * rhs.data[j] + else: + mat.data[(Int(i / self.width) * mat.width) + (j % rhs.width)] = self.data[i] * rhs.data[j] + return mat^ var A = matmul.Matrix[DType.float32](self.data, (self.height, self.width)) var B = matmul.Matrix[DType.float32](rhs.data, (rhs.height, rhs.width)) var C = matmul.Matrix[DType.float32]((self.height, rhs.width)) @@ -91,7 +113,6 @@ struct Matrix(Copyable, Movable, ImplicitlyCopyable, Sized): return mat^ @staticmethod - @always_inline fn random(height: Int, width: Int, order: String = 'c') -> Matrix: random.seed() var mat = Matrix(height, width, order= order) diff --git a/recipes/mojmelo/tests/mojmelo/utils/mojmelo_matmul/matmul.mojo b/recipes/mojmelo/tests/mojmelo/utils/mojmelo_matmul/matmul.mojo index 5161d061..00cccc69 100644 --- a/recipes/mojmelo/tests/mojmelo/utils/mojmelo_matmul/matmul.mojo +++ b/recipes/mojmelo/tests/mojmelo/utils/mojmelo_matmul/matmul.mojo @@ -3,8 +3,6 @@ from algorithm import vectorize, parallelize from memory.memory import _malloc, stack_allocation from sys import CompilationTarget, num_performance_cores, simd_width_of, size_of -import benchmark -from testing import assert_equal from utils import IndexList import random from .params import * @@ -37,11 +35,11 @@ struct Layout(Copyable, Movable, Writable): var shape: IndexList[2] var strides: IndexList[2] - fn __init__(out self, shape: (Int, Int), strides: (Int, Int)): + fn __init__(out self, shape: Tuple[Int, Int], strides: Tuple[Int, Int]): self.shape = IndexList[2](shape[0], shape[1]) self.strides = IndexList[2](strides[0], strides[1]) - fn __init__(out self, shape: (Int, Int)): + fn __init__(out self, shape: Tuple[Int, Int]): self.strides = IndexList[2](shape[1], 1) self.shape = IndexList[2](shape[0], shape[1]) @@ -59,23 +57,23 @@ struct Layout(Copyable, Movable, Writable): struct Matrix[Type: DType]: - var data: UnsafePointer[Scalar[Type]] + var data: UnsafePointer[Scalar[Type], MutAnyOrigin] var layout: Layout - fn __init__(out self, shape: (Int, Int)): - self.data = UnsafePointer[Scalar[Type]].alloc(shape[0] * shape[1]) + fn __init__(out self, shape: Tuple[Int, Int]): + self.data = alloc[Scalar[Type]](shape[0] * shape[1]) self.layout = Layout(shape) @always_inline("nodebug") fn __init__( - out self, data: UnsafePointer[Scalar[Type]], var layout: Layout + out self, data: UnsafePointer[Scalar[Type], MutAnyOrigin], var layout: Layout ): - self.data = UnsafePointer[Scalar[Type]](data) + self.data = data self.layout = layout @always_inline("nodebug") fn __init__( - out self, data: UnsafePointer[Scalar[Type]], shape: (Int, Int) + out self, data: UnsafePointer[Scalar[Type], MutAnyOrigin], shape: Tuple[Int, Int] ): self.data = data self.layout = Layout(shape) @@ -83,7 +81,7 @@ struct Matrix[Type: DType]: @always_inline("nodebug") fn __getitem__( ref [_]self, i: Int, j: Int - ) -> ref [__origin_of(self)] Scalar[Type]: + ) -> ref [origin_of(self)] Scalar[Type]: var offset = self.layout(i, j) return (self.data + offset)[] @@ -146,7 +144,7 @@ struct Matrix[Type: DType]: @always_inline fn pack_A[ Type: DType, //, mr: Int -](mc: Int, Ac_buffer: UnsafePointer[Scalar[Type]], Ac: Matrix[Type]) -> Matrix[Type]: +](mc: Int, Ac_buffer: UnsafePointer[Scalar[Type], MutAnyOrigin], Ac: Matrix[Type]) -> Matrix[Type]: @parameter fn pack_panel(idx: Int): var i = idx * mr @@ -184,7 +182,7 @@ fn pack_A[ @always_inline fn pack_B[ Type: DType, //, kc: Int, nr: Int -](Bc_buffer: UnsafePointer[Scalar[Type]], Bc: Matrix[Type]) -> Matrix[Type]: +](Bc_buffer: UnsafePointer[Scalar[Type], MutAnyOrigin], Bc: Matrix[Type]) -> Matrix[Type]: var dst_ptr = Bc_buffer for i in range(0, Bc.shape[1](), nr): var src_ptr = Bc.data + i @@ -267,7 +265,7 @@ fn loop_n[ @parameter fn parallelize_balanced_part(idx: Int): - var Bc_buffer = UnsafePointer[Scalar[Type]]( + var Bc_buffer = UnsafePointer[Scalar[Type], MutAnyOrigin]( _malloc[Scalar[Type]]( kc * nc_per_thread * size_of[Type](), alignment=64 ) @@ -290,7 +288,7 @@ fn loop_n[ @parameter fn parallelize_remainder(idx: Int): - var Bc_buffer = UnsafePointer[Scalar[Type]]( + var Bc_buffer = UnsafePointer[Scalar[Type], MutAnyOrigin]( _malloc[Scalar[Type]]( kc * remainder_per_thread * size_of[Type](), alignment=64 ) @@ -348,7 +346,7 @@ fn macro_kernel[ fn micro_kernel[ Type: DType, //, mr: Int, nr: Int, padding: Bool ](mut Cr: Matrix[Type], Ar: Matrix[Type], Br: Matrix[Type]): - alias simd_width = simd_width_of[Type]() + comptime simd_width = simd_width_of[Type]() constrained[nr % simd_width == 0, "nr must be multiple of simd_width"]() var Ar_ptr = Ar.data @@ -440,31 +438,31 @@ fn micro_kernel[ @always_inline fn matmul_params[Type: DType]() -> IndexList[5]: - alias mc = 8192 // size_of[Type]() # fix this for simplicity - alias N = simd_width_of[Type]() + comptime mc = 8192 // size_of[Type]() # fix this for simplicity + comptime N = simd_width_of[Type]() - alias Vectors = 32 if CompilationTarget.has_avx512f() else 16 + comptime Vectors = 32 if CompilationTarget.has_avx512f() else 16 @parameter fn compute_kc[mr: Int, nr: Int]() -> Int: - alias CBr = Int((L1_ASSOCIATIVITY - 1) / (1 + mr / nr)) + comptime CBr = Int((L1_ASSOCIATIVITY - 1) / (1 + mr / nr)) return (CBr * L1_CACHE_SIZE) // (nr * size_of[Type]() * L1_ASSOCIATIVITY) @parameter fn compute_params[C: Int]() -> IndexList[5]: - alias p = C // (intsqrt[C]() + 1) - alias mr = C // p - 1 - alias nr = p * N - alias CBr = Int((L1_ASSOCIATIVITY - 1) / (1 + mr / nr)) - alias kc = compute_kc[mr, nr]() - alias nc = (L2_ASSOCIATIVITY - 1) * L2_CACHE_SIZE // ( + comptime p = C // (intsqrt[C]() + 1) + comptime mr = C // p - 1 + comptime nr = p * N + comptime CBr = Int((L1_ASSOCIATIVITY - 1) / (1 + mr / nr)) + comptime kc = compute_kc[mr, nr]() + comptime nc = (L2_ASSOCIATIVITY - 1) * L2_CACHE_SIZE // ( kc * size_of[Type]() * L2_ASSOCIATIVITY ) - mr return IndexList[5](mc, nc, kc, mr, nr) @parameter if Type.is_floating_point(): - alias TempVectors = 1 + comptime TempVectors = 1 return compute_params[Vectors - TempVectors]() else: @@ -473,25 +471,25 @@ fn matmul_params[Type: DType]() -> IndexList[5]: @parameter if CompilationTarget.has_avx512f(): - alias TempVectors = 2 + comptime TempVectors = 2 return compute_params[Vectors - TempVectors]() else: - alias TempVectors = 3 + comptime TempVectors = 3 return compute_params[Vectors - TempVectors]() else: - alias TempVectors = 2 + comptime TempVectors = 2 return compute_params[Vectors - TempVectors]() fn matmul[ Type: DType ](m: Int, n: Int, k: Int, mut C: Matrix[Type], A: Matrix[Type], B: Matrix[Type]): - alias params = matmul_params[Type]() - alias mc = params[0] - alias nc = params[1] - alias kc = params[2] - alias mr = params[3] - alias nr = params[4] + comptime params = matmul_params[Type]() + comptime mc = params[0] + comptime nc = params[1] + comptime kc = params[2] + comptime mr = params[3] + comptime nr = params[4] var resized_mc = roundup(min(mc, m), mr) var resized_nc = roundup(min(nc, n), nr) matmul_impl[kc, mr, nr](resized_mc, resized_nc, C, A, B) diff --git a/recipes/mojmelo/tests/mojmelo/utils/mojmelo_matmul/params.mojo b/recipes/mojmelo/tests/mojmelo/utils/mojmelo_matmul/params.mojo index 6e7e91b6..292404ae 100644 --- a/recipes/mojmelo/tests/mojmelo/utils/mojmelo_matmul/params.mojo +++ b/recipes/mojmelo/tests/mojmelo/utils/mojmelo_matmul/params.mojo @@ -1,4 +1,4 @@ -alias L1_CACHE_SIZE = 32768 -alias L1_ASSOCIATIVITY = 8 -alias L2_CACHE_SIZE = 262144 -alias L2_ASSOCIATIVITY = 4 +comptime L1_CACHE_SIZE = 32768 +comptime L1_ASSOCIATIVITY = 8 +comptime L2_CACHE_SIZE = 262144 +comptime L2_ASSOCIATIVITY = 4 diff --git a/recipes/mojmelo/tests/setup.mojo b/recipes/mojmelo/tests/setup.mojo index f4565547..c373f74a 100644 --- a/recipes/mojmelo/tests/setup.mojo +++ b/recipes/mojmelo/tests/setup.mojo @@ -3,11 +3,11 @@ from sys.ffi import * fn cachel1() -> Int32: var l1_cache_size: c_int = 0 - alias length: c_size_t = 4 + comptime length: c_size_t = 4 # Get L1 Cache Size - if external_call["sysctlbyname", c_int]("hw.perflevel0.l1dcachesize".unsafe_cstr_ptr(), UnsafePointer(to=l1_cache_size), UnsafePointer(to=length), OpaquePointer(), 0) == 0: + if external_call["sysctlbyname", c_int]("hw.perflevel0.l1dcachesize".unsafe_cstr_ptr(), UnsafePointer(to=l1_cache_size), UnsafePointer(to=length), OpaquePointer[MutOrigin.external](), 0) == 0: if l1_cache_size <= 1: - if external_call["sysctlbyname", c_int]("hw.l1dcachesize".unsafe_cstr_ptr(), UnsafePointer(to=l1_cache_size), UnsafePointer(to=length), OpaquePointer(), 0) == 0: + if external_call["sysctlbyname", c_int]("hw.l1dcachesize".unsafe_cstr_ptr(), UnsafePointer(to=l1_cache_size), UnsafePointer(to=length), OpaquePointer[MutOrigin.external](), 0) == 0: if l1_cache_size <= 1: return 65536 return l1_cache_size @@ -15,7 +15,7 @@ fn cachel1() -> Int32: return 65536 return l1_cache_size else: - if external_call["sysctlbyname", c_int]("hw.l1dcachesize".unsafe_cstr_ptr(), UnsafePointer(to=l1_cache_size), UnsafePointer(to=length), OpaquePointer(), 0) == 0: + if external_call["sysctlbyname", c_int]("hw.l1dcachesize".unsafe_cstr_ptr(), UnsafePointer(to=l1_cache_size), UnsafePointer(to=length), OpaquePointer[MutOrigin.external](), 0) == 0: if l1_cache_size <= 1: return 65536 return l1_cache_size @@ -25,11 +25,11 @@ fn cachel1() -> Int32: fn cachel2() -> Int32: var l2_cache_size: c_int = 0 - alias length: c_size_t = 4 + comptime length: c_size_t = 4 # Get L2 Cache Size - if external_call["sysctlbyname", c_int]("hw.perflevel0.l2cachesize".unsafe_cstr_ptr(), UnsafePointer(to=l2_cache_size), UnsafePointer(to=length), OpaquePointer(), 0) == 0: + if external_call["sysctlbyname", c_int]("hw.perflevel0.l2cachesize".unsafe_cstr_ptr(), UnsafePointer(to=l2_cache_size), UnsafePointer(to=length), OpaquePointer[MutOrigin.external](), 0) == 0: if l2_cache_size <= 1: - if external_call["sysctlbyname", c_int]("hw.l2cachesize".unsafe_cstr_ptr(), UnsafePointer(to=l2_cache_size), UnsafePointer(to=length), OpaquePointer(), 0) == 0: + if external_call["sysctlbyname", c_int]("hw.l2cachesize".unsafe_cstr_ptr(), UnsafePointer(to=l2_cache_size), UnsafePointer(to=length), OpaquePointer[MutOrigin.external](), 0) == 0: if l2_cache_size <= 1: return 4194304 return l2_cache_size @@ -37,7 +37,7 @@ fn cachel2() -> Int32: return 4194304 return l2_cache_size else: - if external_call["sysctlbyname", c_int]("hw.l2cachesize".unsafe_cstr_ptr(), UnsafePointer(to=l2_cache_size), UnsafePointer(to=length), OpaquePointer(), 0) == 0: + if external_call["sysctlbyname", c_int]("hw.l2cachesize".unsafe_cstr_ptr(), UnsafePointer(to=l2_cache_size), UnsafePointer(to=length), OpaquePointer[MutOrigin.external](), 0) == 0: if l2_cache_size <= 1: return 4194304 return l2_cache_size @@ -62,25 +62,25 @@ fn initialize(cache_l1_size: Int, cache_l1_associativity: Int, cache_l2_size: In possible_l2_associativities[1] = possible_l2_associativities[0] * 2 possible_l2_associativities[2] = possible_l2_associativities[0] * 4 with open("./mojmelo/utils/mojmelo_matmul/params.mojo", "w") as f: - code = 'alias L1_CACHE_SIZE = ' + String(cache_l1_size) + '\n' - code += 'alias L1_ASSOCIATIVITY = ' + String(possible_l1_associativities[0]) + '\n' - code += 'alias L2_CACHE_SIZE = ' + String(cache_l2_size) + '\n' - code += 'alias L2_ASSOCIATIVITY = ' + String(possible_l2_associativities[0]) + '\n' + code = 'comptime L1_CACHE_SIZE = ' + String(cache_l1_size) + '\n' + code += 'comptime L1_ASSOCIATIVITY = ' + String(possible_l1_associativities[0]) + '\n' + code += 'comptime L2_CACHE_SIZE = ' + String(cache_l2_size) + '\n' + code += 'comptime L2_ASSOCIATIVITY = ' + String(possible_l2_associativities[0]) + '\n' f.write(code) for i in range(3): for j in range(1, 4): with open("./param" + String(i * 3 + j), "w") as f: - code = 'alias L1_CACHE_SIZE = ' + String(cache_l1_size) + '\n' - code += 'alias L1_ASSOCIATIVITY = ' + String(possible_l1_associativities[i]) + '\n' - code += 'alias L2_CACHE_SIZE = ' + String(cache_l2_size) + '\n' - code += 'alias L2_ASSOCIATIVITY = ' + String(possible_l2_associativities[j - 1]) + '\n' + code = 'comptime L1_CACHE_SIZE = ' + String(cache_l1_size) + '\n' + code += 'comptime L1_ASSOCIATIVITY = ' + String(possible_l1_associativities[i]) + '\n' + code += 'comptime L2_CACHE_SIZE = ' + String(cache_l2_size) + '\n' + code += 'comptime L2_ASSOCIATIVITY = ' + String(possible_l2_associativities[j - 1]) + '\n' f.write(code) else: with open("./mojmelo/utils/mojmelo_matmul/params.mojo", "w") as f: - code = 'alias L1_CACHE_SIZE = ' + String(cache_l1_size) + '\n' - code += 'alias L1_ASSOCIATIVITY = ' + String(cache_l1_associativity) + '\n' - code += 'alias L2_CACHE_SIZE = ' + String(cache_l2_size) + '\n' - code += 'alias L2_ASSOCIATIVITY = ' + String(cache_l2_associativity) + '\n' + code = 'comptime L1_CACHE_SIZE = ' + String(cache_l1_size) + '\n' + code += 'comptime L1_ASSOCIATIVITY = ' + String(cache_l1_associativity) + '\n' + code += 'comptime L2_CACHE_SIZE = ' + String(cache_l2_size) + '\n' + code += 'comptime L2_ASSOCIATIVITY = ' + String(cache_l2_associativity) + '\n' f.write(code) with open("./done", "w") as f: f.write("done") @@ -122,21 +122,20 @@ fn main() raises: else: command = String(argv()[1]) - from python import Python - os_py = Python.import_module("os") - os_path_py = Python.import_module("os.path") - if os_path_py.isfile('./done'): + import os + + if os.path.isfile('./done'): if command != '9': print('Setup', command + '/8', 'skipped!') else: - os_py.remove("./done") + os.remove("./done") print('Setup done!') return from mojmelo.utils.Matrix import Matrix import time - alias NUM_ITER = 16 + comptime NUM_ITER = 16 results = InlineArray[Int, 3](fill=0) var junk: Float32 = 0.0 a = Matrix.random(512, 4096) @@ -147,7 +146,7 @@ fn main() raises: finish = time.perf_counter_ns() junk += c[0, 0] if i != 0: - results[0] += (finish - start) // (NUM_ITER - 1) + results[0] += Int(finish - start) // (NUM_ITER - 1) a = Matrix.random(4096, 4096) b = Matrix.random(4096, 4096) for i in range(NUM_ITER): @@ -156,7 +155,7 @@ fn main() raises: finish = time.perf_counter_ns() junk += c[0, 0] if i != 0: - results[1] += (finish - start) // (NUM_ITER - 1) + results[1] += Int(finish - start) // (NUM_ITER - 1) a = Matrix.random(4096, 512) b = Matrix.random(512, 4096) for i in range(NUM_ITER): @@ -165,7 +164,7 @@ fn main() raises: finish = time.perf_counter_ns() junk += c[0, 0] if i != 0: - results[2] += (finish - start) // (NUM_ITER - 1) + results[2] += Int(finish - start) // (NUM_ITER - 1) if command != '9': with open("./results" + command, "w") as f: f.write(String(results[0]) + ',' + String(results[1]) + ',' + String(results[2]) + ',' + String(junk)) @@ -205,7 +204,7 @@ fn main() raises: f.write(code) for i in range(1, 10): - os_py.remove("./param" + String(i)) + os.remove("./param" + String(i)) if i != 9: - os_py.remove("./results" + String(i)) + os.remove("./results" + String(i)) print('Setup done!') From ef1c15ad98a4c4d63d2c4eab6fa709e213eb5ba8 Mon Sep 17 00:00:00 2001 From: Yetalit <98420273+yetalit@users.noreply.github.com> Date: Thu, 20 Nov 2025 21:26:36 +0300 Subject: [PATCH 2/2] trailing whitespace fix --- recipes/mojmelo/tests/mojmelo/utils/Matrix.mojo | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/mojmelo/tests/mojmelo/utils/Matrix.mojo b/recipes/mojmelo/tests/mojmelo/utils/Matrix.mojo index 6ca751dc..56882fee 100644 --- a/recipes/mojmelo/tests/mojmelo/utils/Matrix.mojo +++ b/recipes/mojmelo/tests/mojmelo/utils/Matrix.mojo @@ -76,13 +76,13 @@ struct Matrix(Copyable, Movable, ImplicitlyCopyable, Sized): fn __mul__(self, rhs: Self) raises -> Self: if self.width != rhs.height: raise Error('Error: Cannot multiply matrices with shapes (' + String(self.height) + ', ' + String(self.width) + ') and (' + String(rhs.height) + ', ' + String(rhs.width) + ')') - + if self.height == 1 and rhs.width == 1: # Dot product var mat = Self(1, 1) mat.data[0] = self.ele_mul(rhs.T()).sum() return mat^ - + if self.height * self.width * rhs.width <= 4096: # matmul naive var mat = Self(self.height, rhs.width)