Skip to content

Commit d302fa3

Browse files
committed
Optimizer: Ensure weight budget is fully used up
1 parent b400394 commit d302fa3

File tree

1 file changed

+24
-9
lines changed

1 file changed

+24
-9
lines changed

exllamav2/conversion/optimize.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def optimize(job, save_fn, model):
105105
last_update = 0
106106
m = float("inf")
107107
p = float("inf")
108+
c = weight_budget
108109
for i in range(anneal_stages * anneal_samples):
109110
if time.time() - last_update > 1 or i == anneal_samples - 1:
110111
print(f" -- Optimizing: {i + 1:4}/{anneal_stages * anneal_samples:4}")
@@ -125,27 +126,41 @@ def optimize(job, save_fn, model):
125126
else:
126127
norm = bestnorm
127128

128-
s_, si_, p_, c_, m_ = ext_c.sim_anneal(slots,
129-
weight_budget,
130-
anneal_temp_max,
131-
anneal_cooling_factor,
132-
anneal_temp_min,
133-
anneal_iter,
134-
norm)
129+
s_, si_, p_, c_, m_ = ext_c.sim_anneal(
130+
slots,
131+
weight_budget,
132+
anneal_temp_max,
133+
anneal_cooling_factor,
134+
anneal_temp_min,
135+
anneal_iter,
136+
norm
137+
)
135138

136139
if i < anneal_samples * 2:
137140
if m_ < m:
138141
m = m_
139142
bestnorm = norm
140143
else:
141144
if p_ < p:
142-
s, si, p, m = s_, si_, p_, m_
145+
s, si, p, c, m = s_, si_, p_, c_, m_
146+
147+
# Tweak solution in case there is some budget left over
148+
149+
while True:
150+
repeat = False
151+
for i in range(len(si)):
152+
if si[i] < len(slots[i]) - 1:
153+
delta_c = slots[i][si[i] + 1][0] - slots[i][si[i]][0]
154+
if c + delta_c <= weight_budget:
155+
c += delta_c
156+
si[i] = si[i] + 1
157+
repeat = True
158+
if not repeat: break
143159

144160
solution_idx = si
145161
print(f" -- max(err): {m:.6f}")
146162
print(f" -- error_norm: {bestnorm:.6f}")
147163

148-
149164
# Save strategy
150165

151166
print(" -- Quantization strategy:")

0 commit comments

Comments
 (0)