99
1010from ..commons .check_steps import check_dataset_steps
1111from ..preprocessor import c_tf_idf , extract_tfidf_topics
12- from ..utils .cbc_utils import (DocumentCoherence ,
13- get_top_tfidf_words_per_document )
12+ from ..utils .cbc_utils import DocumentCoherence , get_top_tfidf_words_per_document
1413from ..utils .dataset import TMDataset
1514from .abstract_helper_models .base import BaseModel , TrainingStatus
1615
@@ -189,12 +188,10 @@ def fit(
189188 clusters = self .cluster_documents ()
190189
191190 num_clusters = len (clusters )
192- print (
193- f"Iteration { iteration } : { num_clusters } clusters formed." )
191+ print (f"Iteration { iteration } : { num_clusters } clusters formed." )
194192
195193 # Prepare for the next iteration
196- combined_documents = self .combine_documents (
197- current_documents , clusters )
194+ combined_documents = self .combine_documents (current_documents , clusters )
198195 current_documents = combined_documents
199196 iteration += 1
200197
@@ -247,8 +244,7 @@ def fit(
247244 self .labels += 1
248245
249246 # Update the 'predictions' column in the dataframe with -1 where NaN was present
250- self .dataframe ["predictions" ] = self .dataframe ["predictions" ].fillna (
251- - 1 )
247+ self .dataframe ["predictions" ] = self .dataframe ["predictions" ].fillna (- 1 )
252248 self .dataframe ["predictions" ] += 1
253249 print ("--- replaced NaN values with 0 in topics ---" )
254250 print (
@@ -259,13 +255,11 @@ def fit(
259255 {"text" : " " .join }
260256 )
261257 logger .info ("--- Extract topics ---" )
262- tfidf , count = c_tf_idf (
263- docs_per_topic ["text" ].values , m = len (self .dataframe ))
264- self .topic_dict = extract_tfidf_topics (
265- tfidf , count , docs_per_topic , n = 10 )
258+ tfidf , count = c_tf_idf (docs_per_topic ["text" ].values , m = len (self .dataframe ))
259+ self .topic_dict = extract_tfidf_topics (tfidf , count , docs_per_topic , n = 10 )
266260
267261 one_hot_encoder = OneHotEncoder (
268- sparse = False
262+ sparse_output = False
269263 ) # Use sparse=False to get a dense array
270264 predictions_one_hot = one_hot_encoder .fit_transform (
271265 self .dataframe [["predictions" ]]
0 commit comments