@@ -329,13 +329,14 @@ void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
329329 int64_t eta;
330330 std::tie<double , double , int64_t >(wst, lr, eta) = progressInfo (progress);
331331
332+ // Format output with timestamp for more structured logging
332333 log_stream << std::fixed;
333- log_stream << " Progress: " ;
334- log_stream << std::setprecision (1 ) << std::setw ( 5 ) << ( progress * 100 ) << " %" ;
335- log_stream << " words/sec/thread: " << std::setw ( 7 ) << int64_t (wst) ;
336- log_stream << " lr: " << std::setw ( 9 ) << std::setprecision ( 6 ) << lr ;
337- log_stream << " avg. loss: " << std::setw ( 9 ) << std::setprecision ( 6 ) << loss ;
338- log_stream << " ETA: " << utils::ClockPrint (eta);
334+ log_stream << " [FLORET] " ;
335+ log_stream << std::setprecision (1 ) << ( progress * 100 ) << " % complete | " ;
336+ log_stream << int64_t (wst) << " words/sec/thread | " ;
337+ log_stream << " lr: " << std::setprecision ( 6 ) << lr << " | " ;
338+ log_stream << " loss: " << std::setprecision ( 6 ) << loss << " | " ;
339+ log_stream << " ETA: " << utils::ClockPrint (eta);
339340 log_stream << std::flush;
340341}
341342
@@ -786,15 +787,25 @@ void FastText::train(const Args& args, const TrainCallback& callback) {
786787 // manage expectations
787788 throw std::invalid_argument (" Cannot use stdin for training!" );
788789 }
790+
791+ // Log beginning of training process
792+ std::cerr << " [FLORET] Starting training in "
793+ << (args_->mode == mode_name::floret ? " floret" : " fasttext" )
794+ << " mode" << std::endl;
795+
789796 std::ifstream ifs (args_->input );
790797 if (!ifs.is_open ()) {
791798 throw std::invalid_argument (
792799 args_->input + " cannot be opened for training!" );
793800 }
794801 dict_->readFromFile (ifs);
795802 ifs.close ();
803+
804+ std::cerr << " [FLORET] Read " << dict_->ntokens () << " tokens, "
805+ << dict_->nwords () << " unique words" << std::endl;
796806
797807 if (!args_->pretrainedVectors .empty ()) {
808+ std::cerr << " [FLORET] Loading pretrained vectors" << std::endl;
798809 input_ = getInputMatrixFromFile (args_->pretrainedVectors );
799810 } else {
800811 input_ = createRandomMatrix ();
@@ -803,6 +814,19 @@ void FastText::train(const Args& args, const TrainCallback& callback) {
803814 quant_ = false ;
804815 auto loss = createLoss (output_);
805816 bool normalizeGradient = (args_->model == model_name::sup);
817+
818+ // Log training configuration
819+ std::cerr << " [FLORET] Config: dim=" << args_->dim
820+ << " , mode=" << (args_->mode == mode_name::floret ? " floret" : " fasttext" )
821+ << " , minCount=" << args_->minCount
822+ << " , epoch=" << args_->epoch
823+ << " , lr=" << args_->lr ;
824+
825+ if (args_->mode == mode_name::floret) {
826+ std::cerr << " , hashCount=" << args_->hashCount ;
827+ }
828+ std::cerr << std::endl;
829+
806830 model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
807831 startThreads (callback);
808832}
@@ -831,12 +855,22 @@ void FastText::startThreads(const TrainCallback& callback) {
831855 }
832856 const int64_t ntokens = dict_->ntokens ();
833857 // Same condition as trainThread
858+ // Use a rate limiter for logs to avoid overwhelming output
859+ real lastLoggedProgress = 0.0 ;
860+ real logInterval = 0.01 ; // Log at most every 1% progress (adjustable)
861+
834862 while (keepTraining (ntokens)) {
835863 std::this_thread::sleep_for (std::chrono::milliseconds (100 ));
836864 if (loss_ >= 0 && args_->verbose > 1 ) {
837865 real progress = real (tokenCount_) / (args_->epoch * ntokens);
838- std::cerr << " \r " ;
839- printInfo (progress, loss_, std::cerr);
866+
867+ // Only log if we've made sufficient progress since last log
868+ if (progress - lastLoggedProgress >= logInterval) {
869+ lastLoggedProgress = progress;
870+ // Don't use carriage return in log-friendly mode
871+ printInfo (progress, loss_, std::cerr);
872+ std::cerr << std::endl;
873+ }
840874 }
841875 }
842876 for (int32_t i = 0 ; i < threads.size (); i++) {
@@ -848,9 +882,10 @@ void FastText::startThreads(const TrainCallback& callback) {
848882 std::rethrow_exception (exception);
849883 }
850884 if (args_->verbose > 0 ) {
851- std::cerr << " \r " ;
885+ // Log final stats without carriage return for better log output
852886 printInfo (1.0 , loss_, std::cerr);
853887 std::cerr << std::endl;
888+ std::cerr << " [FLORET] Training complete - " << dict_->nwords () << " words in vocabulary" << std::endl;
854889 }
855890}
856891
0 commit comments