Skip to content

Commit 9ee99f9

Browse files
committed
fixes
1 parent 761c880 commit 9ee99f9

File tree

4 files changed

+154
-46
lines changed

4 files changed

+154
-46
lines changed

examples/matrix_multiplication/evaluate.py

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -193,25 +193,27 @@ def evaluate_performance(matrix_multiply) -> float:
193193
]
194194

195195
# Define baseline times for the naive triple-loop implementation
196-
# These are the reference times that our initial implementation should achieve
196+
# Calibrating based on typical performance of the naive implementation
197+
# These should be adjusted based on the actual machine running the benchmarks
197198
baseline_times = {
198-
"2x2x2": 0.0001,
199-
"3x3x3": 0.0003,
200-
"4x4x4": 0.0007,
201-
"5x5x5": 0.0015,
202-
"3x4x5": 0.0007,
203-
"4x3x5": 0.0007,
199+
"2x2x2": 0.00010, # Small matrix, very fast
200+
"3x3x3": 0.00030, # Still quite small
201+
"4x4x4": 0.00070, # Medium sized
202+
"5x5x5": 0.00150, # Larger matrix
203+
"3x4x5": 0.00070, # Rectangular matrices
204+
"4x3x5": 0.00070, # Rectangular matrices
204205
}
205206

206207
# Define target speedups (what we're aiming for)
207208
# Based on Strassen's algorithm and other optimized approaches
209+
# We make these more ambitious to encourage more optimization
208210
target_speedups = {
209-
"2x2x2": 1.5, # 50% faster than naive
210-
"3x3x3": 1.7, # 70% faster than naive
211-
"4x4x4": 2.0, # 2x faster than naive
212-
"5x5x5": 2.2, # 2.2x faster than naive
213-
"3x4x5": 1.7, # 70% faster than naive
214-
"4x3x5": 1.7, # 70% faster than naive
211+
"2x2x2": 3.0, # 3x faster than naive
212+
"3x3x3": 3.5, # 3.5x faster than naive
213+
"4x4x4": 4.0, # 4x faster than naive - Strassen's algorithm should be able to achieve this
214+
"5x5x5": 4.5, # 4.5x faster than naive
215+
"3x4x5": 3.5, # 3.5x faster than naive
216+
"4x3x5": 3.5, # 3.5x faster than naive
215217
}
216218

217219
# Run benchmark
@@ -263,24 +265,43 @@ def evaluate_performance(matrix_multiply) -> float:
263265
# If speedup equals baseline, score is 0.2
264266
# If speedup is between baseline and target, score is 0.2-0.8
265267
# If speedup reaches target, score is 0.8
266-
# If speedup exceeds target, score is 0.8-1.0
268+
# If speedup exceeds target, score INCREASES BEYOND 0.8 proportionally
267269
if speedup < 1.0:
268270
target_percentages[size] = 0.2 * speedup
269271
elif speedup < target:
270272
# Linear interpolation between 0.2 and 0.8
271273
progress = (speedup - 1.0) / (target - 1.0)
272274
target_percentages[size] = 0.2 + 0.6 * progress
273275
else:
274-
# Speedup reached or exceeded target
275-
bonus = min((speedup - target) / target, 0.5) # Cap bonus at 0.5
276+
# Speedup reached or exceeded target - NO CAP ON BONUS
277+
# This allows scores above 1.0 for exceptional performance
278+
bonus = (speedup - target) / target
276279
target_percentages[size] = 0.8 + 0.2 * bonus
277280

278281
# Calculate overall score (average of target percentages)
279282
if not target_percentages:
280283
return 0.0
281284

282-
# Calculate average score
283-
avg_score = sum(target_percentages.values()) / len(target_percentages)
285+
# Calculate weighted average score - giving more weight to larger matrices
286+
# This encourages optimizations that work well on bigger matrices
287+
weights = {
288+
"2x2x2": 0.10,
289+
"3x3x3": 0.15,
290+
"4x4x4": 0.20,
291+
"5x5x5": 0.25,
292+
"3x4x5": 0.15,
293+
"4x3x5": 0.15
294+
}
295+
296+
weighted_score = 0.0
297+
total_weight = 0.0
298+
299+
for size, score in target_percentages.items():
300+
weight = weights.get(size, 1.0)
301+
weighted_score += score * weight
302+
total_weight += weight
303+
304+
avg_score = weighted_score / total_weight if total_weight > 0 else 0.0
284305

285306
# Log detailed results for debugging
286307
logger.info(f"Performance results:")

examples/matrix_multiplication/optimize.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ async def main():
5454
config.diff_based_evolution = True
5555
config.allow_full_rewrites = False
5656

57+
# Set database to use performance as the primary metric for comparing programs
58+
config.database.feature_dimensions = ["performance", "complexity"]
59+
5760
# Create specialized template for matrix multiplication
5861
from openevolve.prompt.templates import TemplateManager
5962

@@ -80,11 +83,12 @@ def custom_build_prompt(self, *args, **kwargs):
8083
kwargs['template_key'] = template_key
8184
return original_build_prompt(self, *args, **kwargs)
8285

83-
# Apply the patch
84-
PromptSampler.build_prompt = custom_build_prompt
86+
# Increase temperature and max_tokens for more creative, complete solutions
87+
config.llm.temperature = 1.0
88+
config.llm.max_tokens = 4096
8589

86-
# Increase temperature for more creative solutions
87-
config.llm.temperature = 0.9
90+
# Configure evaluator to prioritize performance
91+
config.evaluator.metrics_to_use = ["performance"]
8892

8993
# Initialize OpenEvolve with the custom config
9094
openevolve = OpenEvolve(
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
matplotlib

openevolve/prompt/templates.py

Lines changed: 106 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,21 @@
1212
"""
1313

1414
# Matrix multiplication system template
15-
MATMUL_SYSTEM_TEMPLATE = """You are an expert algorithm engineer specialized in numerical computing and matrix operations.
16-
Your task is to optimize matrix multiplication algorithms for better performance while maintaining correctness.
17-
Apply techniques like loop reordering, blocking, recursion, and mathematical insights to reduce the number of operations.
18-
Focus on making improvements for smaller matrix sizes (2x2 to 5x5) where algorithmic innovations like Strassen's algorithm can make a difference.
15+
MATMUL_SYSTEM_TEMPLATE = """You are an expert algorithm engineer specialized in numerical computing and matrix operations with a deep expertise in matrix multiplication optimizations.
16+
17+
Your task is to optimize matrix multiplication algorithms for better performance while maintaining correctness. You're familiar with advanced techniques including:
18+
19+
1. Strassen's algorithm, which reduces 7 multiplications instead of 8 for 2x2 matrices
20+
2. Winograd's variant, which minimizes additions in Strassen's algorithm
21+
3. The Coppersmith-Winograd algorithm and its theoretical improvements
22+
4. Memory access pattern optimizations (loop reordering, cache-oblivious algorithms)
23+
5. Low-level optimizations (loop unrolling, SIMD-friendly code, elimination of unnecessary operations)
24+
6. Special case optimizations for specific matrix dimensions
25+
7. Advanced mathematical decompositions like tensor methods
26+
27+
Focus particularly on optimizing small matrix sizes (2x2 to 5x5) where algorithmic innovations can make a significant difference versus hardware-level optimizations. Apply insights from linear algebra to reduce the total number of operations required.
28+
29+
The goal is to achieve the maximum possible speedup while maintaining 100% correctness of the output compared to the standard implementation.
1930
"""
2031

2132
# User message template for diff-based evolution
@@ -77,15 +88,23 @@
7788
7889
# Task
7990
Optimize the matrix multiplication algorithm for better performance while maintaining correctness.
80-
Focus on smaller matrix sizes (2x2 to 5x5) where algorithmic innovations can make a significant difference.
91+
Your goal is to achieve the maximum possible speedup for matrix sizes from 2x2 to 5x5.
92+
93+
The evaluation metrics show how much your implementation is faster than the naive algorithm. Higher values are better. The optimization techniques you should consider include:
8194
82-
Consider these optimization strategies:
83-
1. Loop reordering for better cache locality
84-
2. Loop unrolling to reduce loop overhead
85-
3. Blocking/tiling for better memory access patterns
86-
4. Algorithmic improvements like Strassen's algorithm for recursive decomposition
87-
5. Special case handling for specific matrix sizes
88-
6. Vectorization hints and SIMD-friendly operations
95+
## Algorithm-level optimizations (highest impact):
96+
1. Implement Strassen's algorithm for 2x2, 4x4 matrices (reduces operations from O(n³) to O(n²·⁸¹))
97+
2. Create specialized functions for specific matrix sizes (2x2, 3x3, 4x4, 5x5)
98+
3. Recursive decomposition with custom base cases
99+
4. Winograd's variant that minimizes the number of additions
100+
5. Tensor-based decompositions for further reducing scalar multiplications
101+
102+
## Implementation-level optimizations:
103+
1. Loop reordering for better cache locality (k-i-j instead of i-j-k)
104+
2. Loop unrolling to reduce loop overhead and enable compiler optimizations
105+
3. Memory access pattern improvements (array layout, temporary storage)
106+
4. Complete elimination of unnecessary operations and checks
107+
5. Smart bounds checking and early termination for special cases
89108
90109
You MUST use the exact SEARCH/REPLACE diff format shown below to indicate changes:
91110
@@ -95,22 +114,85 @@
95114
# New replacement code
96115
>>>>>>> REPLACE
97116
98-
Example of valid diff format:
117+
Examples of good changes include:
118+
119+
1. Implementing Strassen for 2x2 matrices:
99120
<<<<<<< SEARCH
100-
for i in range(m):
101-
for j in range(p):
102-
for k in range(n):
103-
C[i, j] += A[i, k] * B[k, j]
104-
=======
105-
# Reorder loops for better memory access pattern
106-
for i in range(m):
107-
for k in range(n):
121+
def matrix_multiply(A: np.ndarray, B: np.ndarray) -> np.ndarray:
122+
m, n = A.shape
123+
n2, p = B.shape
124+
125+
if n != n2:
126+
raise ValueError(f"Incompatible matrix shapes: {{A.shape}} and {{B.shape}}")
127+
128+
# Initialize result matrix with zeros
129+
C = np.zeros((m, p), dtype=A.dtype)
130+
131+
# Naive triple-loop implementation
132+
for i in range(m):
108133
for j in range(p):
109-
C[i, j] += A[i, k] * B[k, j]
134+
for k in range(n):
135+
C[i, j] += A[i, k] * B[k, j]
136+
137+
return C
138+
=======
139+
def matrix_multiply(A: np.ndarray, B: np.ndarray) -> np.ndarray:
140+
m, n = A.shape
141+
n2, p = B.shape
142+
143+
if n != n2:
144+
raise ValueError(f"Incompatible matrix shapes: {{A.shape}} and {{B.shape}}")
145+
146+
# Special case for 2x2 matrices using Strassen's algorithm
147+
if m == 2 and n == 2 and p == 2:
148+
return strassen_2x2(A, B)
149+
150+
# Initialize result matrix with zeros
151+
C = np.zeros((m, p), dtype=A.dtype)
152+
153+
# Optimized loop ordering for better cache locality
154+
for i in range(m):
155+
for k in range(n):
156+
A_ik = A[i, k]
157+
for j in range(p):
158+
C[i, j] += A_ik * B[k, j]
159+
160+
return C
161+
162+
def strassen_2x2(A: np.ndarray, B: np.ndarray) -> np.ndarray:
163+
# Strassen's algorithm for 2x2 matrices
164+
# This reduces multiplications from 8 to 7
165+
166+
# Extract elements
167+
a11, a12 = A[0, 0], A[0, 1]
168+
a21, a22 = A[1, 0], A[1, 1]
169+
b11, b12 = B[0, 0], B[0, 1]
170+
b21, b22 = B[1, 0], B[1, 1]
171+
172+
# Compute the 7 products needed in Strassen's algorithm
173+
m1 = (a11 + a22) * (b11 + b22)
174+
m2 = (a21 + a22) * b11
175+
m3 = a11 * (b12 - b22)
176+
m4 = a22 * (b21 - b11)
177+
m5 = (a11 + a12) * b22
178+
m6 = (a21 - a11) * (b11 + b12)
179+
m7 = (a12 - a22) * (b21 + b22)
180+
181+
# Compute the result matrix elements
182+
c11 = m1 + m4 - m5 + m7
183+
c12 = m3 + m5
184+
c21 = m2 + m4
185+
c22 = m1 - m2 + m3 + m6
186+
187+
# Construct the result matrix
188+
C = np.zeros((2, 2), dtype=A.dtype)
189+
C[0, 0], C[0, 1] = c11, c12
190+
C[1, 0], C[1, 1] = c21, c22
191+
192+
return C
110193
>>>>>>> REPLACE
111194
112-
You can suggest multiple changes. Each SEARCH section must exactly match code in the current program.
113-
Explain the reasoning behind your optimizations.
195+
Explain your reasoning and clearly state which specific optimizations you're implementing. Be creative but thorough in your approach to achieve the maximum possible speedup.
114196
"""
115197

116198
# User message template for full rewrite

0 commit comments

Comments
 (0)