Skip to content

Commit 2c19d15

Browse files
committed
Make log output more print-friendly
1 parent 7756382 commit 2c19d15

File tree

2 files changed

+63
-9
lines changed

2 files changed

+63
-9
lines changed

src/fasttext.cc

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -329,13 +329,14 @@ void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
329329
int64_t eta;
330330
std::tie<double, double, int64_t>(wst, lr, eta) = progressInfo(progress);
331331

332+
// Format output with timestamp for more structured logging
332333
log_stream << std::fixed;
333-
log_stream << "Progress: ";
334-
log_stream << std::setprecision(1) << std::setw(5) << (progress * 100) << "%";
335-
log_stream << " words/sec/thread: " << std::setw(7) << int64_t(wst);
336-
log_stream << " lr: " << std::setw(9) << std::setprecision(6) << lr;
337-
log_stream << " avg.loss: " << std::setw(9) << std::setprecision(6) << loss;
338-
log_stream << " ETA: " << utils::ClockPrint(eta);
334+
log_stream << "[FLORET] ";
335+
log_stream << std::setprecision(1) << (progress * 100) << "% complete | ";
336+
log_stream << int64_t(wst) << " words/sec/thread | ";
337+
log_stream << "lr: " << std::setprecision(6) << lr << " | ";
338+
log_stream << "loss: " << std::setprecision(6) << loss << " | ";
339+
log_stream << "ETA: " << utils::ClockPrint(eta);
339340
log_stream << std::flush;
340341
}
341342

@@ -786,15 +787,25 @@ void FastText::train(const Args& args, const TrainCallback& callback) {
786787
// manage expectations
787788
throw std::invalid_argument("Cannot use stdin for training!");
788789
}
790+
791+
// Log beginning of training process
792+
std::cerr << "[FLORET] Starting training in "
793+
<< (args_->mode == mode_name::floret ? "floret" : "fasttext")
794+
<< " mode" << std::endl;
795+
789796
std::ifstream ifs(args_->input);
790797
if (!ifs.is_open()) {
791798
throw std::invalid_argument(
792799
args_->input + " cannot be opened for training!");
793800
}
794801
dict_->readFromFile(ifs);
795802
ifs.close();
803+
804+
std::cerr << "[FLORET] Read " << dict_->ntokens() << " tokens, "
805+
<< dict_->nwords() << " unique words" << std::endl;
796806

797807
if (!args_->pretrainedVectors.empty()) {
808+
std::cerr << "[FLORET] Loading pretrained vectors" << std::endl;
798809
input_ = getInputMatrixFromFile(args_->pretrainedVectors);
799810
} else {
800811
input_ = createRandomMatrix();
@@ -803,6 +814,19 @@ void FastText::train(const Args& args, const TrainCallback& callback) {
803814
quant_ = false;
804815
auto loss = createLoss(output_);
805816
bool normalizeGradient = (args_->model == model_name::sup);
817+
818+
// Log training configuration
819+
std::cerr << "[FLORET] Config: dim=" << args_->dim
820+
<< ", mode=" << (args_->mode == mode_name::floret ? "floret" : "fasttext")
821+
<< ", minCount=" << args_->minCount
822+
<< ", epoch=" << args_->epoch
823+
<< ", lr=" << args_->lr;
824+
825+
if (args_->mode == mode_name::floret) {
826+
std::cerr << ", hashCount=" << args_->hashCount;
827+
}
828+
std::cerr << std::endl;
829+
806830
model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
807831
startThreads(callback);
808832
}
@@ -831,12 +855,22 @@ void FastText::startThreads(const TrainCallback& callback) {
831855
}
832856
const int64_t ntokens = dict_->ntokens();
833857
// Same condition as trainThread
858+
// Use a rate limiter for logs to avoid overwhelming output
859+
real lastLoggedProgress = 0.0;
860+
real logInterval = 0.01; // Log at most every 1% progress (adjustable)
861+
834862
while (keepTraining(ntokens)) {
835863
std::this_thread::sleep_for(std::chrono::milliseconds(100));
836864
if (loss_ >= 0 && args_->verbose > 1) {
837865
real progress = real(tokenCount_) / (args_->epoch * ntokens);
838-
std::cerr << "\r";
839-
printInfo(progress, loss_, std::cerr);
866+
867+
// Only log if we've made sufficient progress since last log
868+
if (progress - lastLoggedProgress >= logInterval) {
869+
lastLoggedProgress = progress;
870+
// Don't use carriage return in log-friendly mode
871+
printInfo(progress, loss_, std::cerr);
872+
std::cerr << std::endl;
873+
}
840874
}
841875
}
842876
for (int32_t i = 0; i < threads.size(); i++) {
@@ -848,9 +882,10 @@ void FastText::startThreads(const TrainCallback& callback) {
848882
std::rethrow_exception(exception);
849883
}
850884
if (args_->verbose > 0) {
851-
std::cerr << "\r";
885+
// Log final stats without carriage return for better log output
852886
printInfo(1.0, loss_, std::cerr);
853887
std::cerr << std::endl;
888+
std::cerr << "[FLORET] Training complete - " << dict_->nwords() << " words in vocabulary" << std::endl;
854889
}
855890
}
856891

src/main.cc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,9 @@ void train(const std::vector<std::string> args) {
361361
std::shared_ptr<FastText> fasttext = std::make_shared<FastText>();
362362
std::string outputFileName;
363363

364+
// Log initialization details
365+
std::cerr << "[FLORET-CLI] Starting training task" << std::endl;
366+
364367
if (a.hasAutotune() &&
365368
a.getAutotuneModelSize() != Args::kUnlimitedModelSize) {
366369
outputFileName = a.output + ".ftz";
@@ -373,20 +376,36 @@ void train(const std::vector<std::string> args) {
373376
outputFileName + " cannot be opened for saving.");
374377
}
375378
ofs.close();
379+
380+
// Train the model
381+
std::cerr << "[FLORET-CLI] Training model with input: " << a.input << std::endl;
382+
std::cerr << "[FLORET-CLI] Output will be saved to: " << outputFileName << std::endl;
383+
376384
if (a.hasAutotune()) {
377385
Autotune autotune(fasttext);
378386
autotune.train(a);
379387
} else {
380388
fasttext->train(a);
381389
}
390+
391+
// Save model outputs
392+
std::cerr << "[FLORET-CLI] Saving model to: " << outputFileName << std::endl;
382393
fasttext->saveModel(outputFileName);
394+
395+
std::cerr << "[FLORET-CLI] Saving word vectors to: " << a.output + ".vec" << std::endl;
383396
fasttext->saveVectors(a.output + ".vec");
397+
384398
if (a.mode == mode_name::floret) {
399+
std::cerr << "[FLORET-CLI] Saving floret vectors to: " << a.output + ".floret" << std::endl;
385400
fasttext->saveFloretVectors(a.output + ".floret");
386401
}
402+
387403
if (a.saveOutput) {
404+
std::cerr << "[FLORET-CLI] Saving output vectors to: " << a.output + ".output" << std::endl;
388405
fasttext->saveOutput(a.output + ".output");
389406
}
407+
408+
std::cerr << "[FLORET-CLI] Training complete!" << std::endl;
390409
}
391410

392411
void dump(const std::vector<std::string>& args) {

0 commit comments

Comments
 (0)