Skip to content

Commit f69fa6b

Browse files
committed
Remove debug statements from status script
- Clean final version - Status monitoring fully functional - Shows progress, elapsed time, ETA for all 8 authors Ref: #38
1 parent babcf0a commit f69fa6b

File tree

1 file changed

+1
-13
lines changed

1 file changed

+1
-13
lines changed

check_hf_status.sh

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -137,15 +137,11 @@ def check_author_status(author):
137137
model_dir = Path(f'models/{author}_tokenizer=gpt2_seed=0')
138138
loss_log = model_dir / 'loss_logs.csv'
139139
140-
print(f"[DEBUG] Checking {author}: {loss_log}", file=sys.stderr)
141-
142140
if not loss_log.exists():
143-
print(f"[DEBUG] Loss log not found for {author}", file=sys.stderr)
144141
return None
145142
146143
try:
147144
df = pd.read_csv(loss_log)
148-
print(f"[DEBUG] {author}: {len(df)} rows in loss log", file=sys.stderr)
149145
if len(df) == 0:
150146
return None
151147
@@ -160,26 +156,22 @@ def check_author_status(author):
160156
161157
# Check if we're in HF training (loss < PAPER_LOSS)
162158
hf_rows = df[(df['loss_dataset'] == 'train') & (df['loss_value'] < PAPER_LOSS)]
163-
print(f"[DEBUG] {author}: HF rows (loss < {PAPER_LOSS}): {len(hf_rows)}", file=sys.stderr)
164159
165160
if len(hf_rows) == 0:
166161
# Not yet started HF training
167-
print(f"[DEBUG] {author}: Returning None - no HF training yet", file=sys.stderr)
168162
return None
169163
170164
# Find when HF training started
171165
hf_start_epoch = int(hf_rows.iloc[0]['epochs_completed'])
172166
epochs_since_start = int(max_epoch - hf_start_epoch)
173-
print(f"[DEBUG] {author}: HF start epoch {hf_start_epoch}, epochs since: {epochs_since_start}", file=sys.stderr)
174167
175168
# Estimate elapsed time (rough: 10 sec/epoch with eval skipped)
176169
elapsed = timedelta(seconds=int(epochs_since_start * 10))
177-
print(f"[DEBUG] {author}: About to return status dict", file=sys.stderr)
178170
179171
# Check if complete
180172
is_complete = current_loss <= TARGET_LOSS
181173
182-
result = {
174+
return {
183175
'current_epoch': max_epoch,
184176
'current_loss': current_loss,
185177
'target_loss': TARGET_LOSS,
@@ -189,11 +181,7 @@ def check_author_status(author):
189181
'elapsed': elapsed
190182
}
191183
192-
print(f"[DEBUG] {author}: Returning status - epoch {max_epoch}, loss {current_loss:.4f}", file=sys.stderr)
193-
return result
194-
195184
except Exception as e:
196-
print(f"[DEBUG] {author}: Exception: {e}", file=sys.stderr)
197185
return None
198186
199187
# Print report

0 commit comments

Comments
 (0)