@@ -159,10 +159,337 @@ def test_entropy(self):
159159 self .assertGreater (entropy , 0 )
160160 self .assertGreater (entropy2 , 0 )
161161
162+ def test_entropy_single_topic (self ):
163+ # Test edge case with single topic (line 76 in _metrics.py)
164+ import numpy as np
165+ single_topic_phi = np .random .rand (1 , 100 ) # Create single topic phi matrix
166+ entropy_single = tm .entropy (single_topic_phi )
167+ self .assertIsInstance (entropy_single , float )
168+
162169 def test_get_salient_terms (self ):
163170 saliency = tm .get_salient_terms (self .phi , self .theta )
164171 self .assertEqual (saliency .size , self .phi .shape [0 ])
165172
173+ # Error handling tests for _helpers.py
174+ def test_get_theta_gensim_no_corpus (self ):
175+ # Test error when corpus is not provided for gensim model (line 158)
176+ with self .assertRaises (ValueError ) as context :
177+ tm .get_theta (self .gensim_model )
178+ self .assertIn ("corpus" , str (context .exception ).lower ())
179+
180+ def test_get_theta_gensim_empty_corpus (self ):
181+ # Test error when corpus is empty for gensim model (line 160)
182+ with self .assertRaises (ValueError ) as context :
183+ tm .get_theta (self .gensim_model , corpus = [])
184+ self .assertIn ("corpus cannot be empty" , str (context .exception ))
185+
186+ def test_get_theta_unsupported_model (self ):
187+ # Test error for unsupported model type (line 171)
188+ class UnsupportedModel :
189+ pass
190+
191+ with self .assertRaises (ValueError ) as context :
192+ tm .get_theta (UnsupportedModel ())
193+ self .assertIn ("Unsupported model type" , str (context .exception ))
194+
195+ def test_get_top_docs_no_model_or_theta (self ):
196+ # Test error when neither model nor theta is provided (line 237)
197+ docs = tm .get_docs (self .tomotopy_model )
198+ with self .assertRaises (ValueError ) as context :
199+ tm .get_top_docs (docs )
200+ self .assertIn ("model or a theta matrix" , str (context .exception ))
201+
202+ def test_calc_topics_marg_probs_empty_theta (self ):
203+ # Test error for empty theta matrix (line 273)
204+ import numpy as np
205+ empty_theta = np .array ([])
206+ with self .assertRaises (ValueError ) as context :
207+ tm .calc_topics_marg_probs (empty_theta )
208+ self .assertIn ("theta matrix cannot be empty" , str (context .exception ))
209+
210+ def test_calc_topics_marg_probs_all_zeros (self ):
211+ # Test error for theta matrix with all zeros (line 278)
212+ import numpy as np
213+ zero_theta = np .zeros ((3 , 5 ))
214+ with self .assertRaises (ValueError ) as context :
215+ tm .calc_topics_marg_probs (zero_theta )
216+ self .assertIn ("contains all zeros" , str (context .exception ))
217+
218+ def test_calc_topics_marg_probs_invalid_topic_id (self ):
219+ # Test error for invalid topic_id (line 283)
220+ with self .assertRaises (IndexError ) as context :
221+ tm .calc_topics_marg_probs (self .theta , topic_id = 999 )
222+ self .assertIn ("out of bounds" , str (context .exception ))
223+
224+ def test_calc_terms_marg_probs_empty_phi (self ):
225+ # Test error for empty phi matrix (line 313)
226+ import numpy as np
227+ empty_phi = np .array ([])
228+ p_t = tm .calc_topics_marg_probs (self .theta )
229+ with self .assertRaises (ValueError ) as context :
230+ tm .calc_terms_marg_probs (empty_phi , p_t )
231+ self .assertIn ("phi matrix cannot be empty" , str (context .exception ))
232+
233+ def test_calc_terms_marg_probs_empty_pt (self ):
234+ # Test error for empty p_t array (line 315)
235+ import numpy as np
236+ empty_pt = np .array ([])
237+ with self .assertRaises (ValueError ) as context :
238+ tm .calc_terms_marg_probs (self .phi , empty_pt )
239+ self .assertIn ("p_t array cannot be empty" , str (context .exception ))
240+
241+ def test_calc_terms_marg_probs_dimension_mismatch (self ):
242+ # Test error for dimension mismatch (line 317)
243+ import numpy as np
244+ wrong_pt = np .array ([0.5 , 0.5 ]) # Wrong size
245+ with self .assertRaises (ValueError ) as context :
246+ tm .calc_terms_marg_probs (self .phi , wrong_pt )
247+ self .assertIn ("phi topics dimension" , str (context .exception ))
248+
249+ def test_calc_terms_marg_probs_invalid_word_id (self ):
250+ # Test error for invalid word_id (line 322)
251+ p_t = tm .calc_topics_marg_probs (self .theta )
252+ max_word_id = self .phi .shape [0 ]
253+ with self .assertRaises (IndexError ) as context :
254+ tm .calc_terms_marg_probs (self .phi , p_t , word_id = max_word_id + 10 )
255+ self .assertIn ("word_id" , str (context .exception ))
256+
257+ def test_get_salient_terms_empty_matrices (self ):
258+ # Test error for empty phi and theta matrices (line 347)
259+ import numpy as np
260+ empty_phi = np .array ([])
261+ empty_theta = np .array ([])
262+ with self .assertRaises (ValueError ) as context :
263+ tm .get_salient_terms (empty_phi , empty_theta )
264+ self .assertIn ("phi and theta matrices cannot be empty" , str (context .exception ))
265+
266+ def test_get_salient_terms_dimension_mismatch (self ):
267+ # Test error for dimension mismatch in phi and theta (line 349)
268+ import numpy as np
269+ wrong_theta = np .random .rand (10 , 5 ) # Wrong number of topics
270+ with self .assertRaises (ValueError ) as context :
271+ tm .get_salient_terms (self .phi , wrong_theta )
272+ self .assertIn ("phi topics dimension" , str (context .exception ))
273+
274+ # Tests for _vis.py error handling
275+ def test_plot_scatter_topics_empty_ndarray (self ):
276+ # Test error for empty ndarray input (lines 133-135)
277+ import numpy as np
278+ empty_coords = np .array ([])
279+ with self .assertRaises (ValueError ) as context :
280+ tm .plot_scatter_topics (empty_coords )
281+ self .assertIn ("topics_coords cannot be empty" , str (context .exception ))
282+
283+ def test_plot_scatter_topics_empty_dataframe (self ):
284+ # Test error for empty DataFrame input (lines 137-139)
285+ from pandas import DataFrame
286+ empty_df = DataFrame ()
287+ with self .assertRaises (ValueError ) as context :
288+ tm .plot_scatter_topics (empty_df )
289+ self .assertIn ("topics_coords DataFrame cannot be empty" , str (context .exception ))
290+
291+ def test_plot_terms_empty_dataframe (self ):
292+ # Test error for empty DataFrame input (lines 233-234)
293+ from pandas import DataFrame
294+ empty_df = DataFrame ()
295+ with self .assertRaises (ValueError ) as context :
296+ tm .plot_terms (empty_df )
297+ self .assertIn ("terms_probs DataFrame cannot be empty" , str (context .exception ))
298+
299+ def test_plot_terms_missing_columns (self ):
300+ # Test error for missing required columns (lines 236-238)
301+ from pandas import DataFrame
302+ incomplete_df = DataFrame ({"wrong_col" : [1 , 2 , 3 ]})
303+ with self .assertRaises (ValueError ) as context :
304+ tm .plot_terms (incomplete_df )
305+ self .assertIn ("Missing required columns" , str (context .exception ))
306+
307+ def test_plot_docs_with_sequence (self ):
308+ # Test plot_docs with sequence input (lines 298-299)
309+ docs_list = ["Document 1 content" , "Document 2 content" ]
310+ result = tm .plot_docs (docs_list )
311+ from IPython .display import HTML
312+ self .assertIsInstance (result , HTML )
313+
314+ # Tests for _report.py error handling
315+ def test_report_empty_docs (self ):
316+ # Test error for empty docs (line 110)
317+ with self .assertRaises (ValueError ) as context :
318+ tm .report (self .tomotopy_model , docs = [])
319+ self .assertIn ("docs cannot be empty" , str (context .exception ))
320+
321+ def test_report_none_docs (self ):
322+ # Test error for None docs (line 110)
323+ with self .assertRaises (ValueError ) as context :
324+ tm .report (self .tomotopy_model , docs = None )
325+ self .assertIn ("docs cannot be empty" , str (context .exception ))
326+
327+ def test_report_with_custom_parameters (self ):
328+ # Test report with custom parameters to increase coverage
329+ docs = tm .get_docs (self .tomotopy_model )
330+ report = tm .report (
331+ self .tomotopy_model ,
332+ docs = docs ,
333+ topics_labels = ["Topic A" , "Topic B" ],
334+ show_headers = False ,
335+ show_docs = False ,
336+ show_words = False ,
337+ show_topics = True ,
338+ width = 400 ,
339+ height = 600
340+ )
341+ from ipywidgets import VBox
342+ self .assertIsInstance (report , VBox )
343+
344+ def test_report_gensim_with_corpus (self ):
345+ # Test report with gensim model and corpus
346+ theta_gensim = tm .get_theta (self .gensim_model , self .gensim_corpus )
347+ num_docs = theta_gensim .shape [1 ]
348+ docs = [f"doc{ i } " for i in range (num_docs )] # Create appropriate number of docs
349+ report = tm .report (
350+ self .gensim_model ,
351+ docs = docs ,
352+ corpus = self .gensim_corpus ,
353+ width = 200
354+ )
355+ from ipywidgets import VBox
356+ self .assertIsInstance (report , VBox )
357+
358+ # Additional tests for better coverage of edge cases
359+ def test_get_docs_non_tomotopy_model (self ):
360+ # Test get_docs with non-tomotopy model (line 198)
361+ result = tm .get_docs (self .gensim_model )
362+ self .assertIsNone (result )
363+
364+ def test_get_phi_with_vocabulary (self ):
365+ # Test get_phi with gensim model and vocabulary (line 85)
366+ gensim_phi = tm .get_phi (self .gensim_model )
367+ vocab = ["word" + str (i ) for i in range (gensim_phi .shape [0 ])]
368+ phi_with_vocab = tm .get_phi (self .gensim_model , vocabulary = vocab )
369+ self .assertEqual (len (phi_with_vocab .index ), len (vocab ))
370+ self .assertListEqual (list (phi_with_vocab .index ), vocab )
371+
372+ def test_prepare_coords_with_kwargs (self ):
373+ # Test prepare_coords with dist_kws and scatter_kws
374+ dist_kws = {"method" : "jsd" }
375+ scatter_kws = {"method" : "mds" }
376+ coords = tm .prepare_coords (
377+ self .tomotopy_model ,
378+ dist_kws = dist_kws ,
379+ scatter_kws = scatter_kws
380+ )
381+ self .assertEqual (coords .shape [1 ], 5 ) # x, y, size, label, topic
382+
383+ def test_get_top_docs_with_theta_matrix (self ):
384+ # Test get_top_docs when providing theta matrix instead of model
385+ docs = tm .get_docs (self .tomotopy_model )
386+ theta_values = self .theta .values
387+ top_docs = tm .get_top_docs (docs , theta = theta_values )
388+ self .assertEqual (top_docs .shape [0 ], 5 ) # Default docs_num
389+
390+ def test_get_top_docs_with_specific_topics (self ):
391+ # Test get_top_docs with specific topics selection
392+ docs = tm .get_docs (self .tomotopy_model )
393+ specific_topics = [0 , 2 , 4 ]
394+ top_docs = tm .get_top_docs (
395+ docs ,
396+ self .tomotopy_model ,
397+ self .theta ,
398+ topics = specific_topics
399+ )
400+ self .assertEqual (top_docs .shape [1 ], len (specific_topics ))
401+
402+ def test_calc_terms_probs_ratio_edge_cases (self ):
403+ # Test calc_terms_probs_ratio with different parameters
404+ terms_probs = tm .calc_terms_probs_ratio (
405+ self .phi ,
406+ topic = 1 ,
407+ terms_num = 10 ,
408+ lambda_ = 0.8
409+ )
410+ self .assertEqual (len (terms_probs ), 20 ) # 10 terms * 2 types
411+
412+ def test_get_relevant_terms_different_lambda (self ):
413+ # Test get_relevant_terms with different lambda values
414+ relevant_terms_1 = tm .get_relevant_terms (self .phi , 0 , lambda_ = 0.2 )
415+ relevant_terms_2 = tm .get_relevant_terms (self .phi , 0 , lambda_ = 0.9 )
416+ self .assertEqual (relevant_terms_1 .size , self .phi .shape [0 ])
417+ self .assertEqual (relevant_terms_2 .size , self .phi .shape [0 ])
418+ # Results should be different with different lambda values
419+ self .assertFalse (relevant_terms_1 .equals (relevant_terms_2 ))
420+
421+ def test_plot_scatter_topics_with_all_options (self ):
422+ # Test plot_scatter_topics with many parameters to increase coverage
423+ topics_coords = tm .prepare_coords (self .tomotopy_model )
424+ chart = tm .plot_scatter_topics (
425+ topics_coords ,
426+ topic = 1 ,
427+ size_col = "size" ,
428+ label_col = "label" ,
429+ font_size = 15 ,
430+ x_kws = {"title" : "X Axis" },
431+ y_kws = {"title" : "Y Axis" },
432+ chart_kws = {"title" : "Test Chart" },
433+ circle_kws = {"opacity" : 0.5 },
434+ text_kws = {"fontSize" : 12 },
435+ size_kws = {"range" : [100 , 2000 ]},
436+ color_kws = {"scheme" : "viridis" }
437+ )
438+ from altair import LayerChart
439+ self .assertIsInstance (chart , LayerChart )
440+
441+ def test_plot_terms_with_custom_parameters (self ):
442+ # Test plot_terms with custom parameters
443+ terms_probs = tm .calc_terms_probs_ratio (self .phi , 0 )
444+ chart = tm .plot_terms (
445+ terms_probs ,
446+ font_size = 16 ,
447+ chart_kws = {"width" : 400 },
448+ bar_kws = {"stroke" : "black" },
449+ x_kws = {"title" : "Custom X" },
450+ y_kws = {"title" : "Custom Y" },
451+ color_kws = {"scheme" : "set1" }
452+ )
453+ from altair import Chart
454+ self .assertIsInstance (chart , Chart )
455+
456+ def test_plot_docs_with_custom_styles (self ):
457+ # Test plot_docs with custom styles and html_kws
458+ docs_list = ["Document 1" , "Document 2" ]
459+ custom_styles = "<style>table { border: 1px solid black; }</style>"
460+ html_kws = {"escape" : False , "classes" : "custom-table" }
461+ result = tm .plot_docs (docs_list , styles = custom_styles , html_kws = html_kws )
462+ from IPython .display import HTML
463+ self .assertIsInstance (result , HTML )
464+
465+ def test_btm_model_functionality (self ):
466+ # Test BTM model specific functionality to increase coverage
467+ phi_btm = tm .get_phi (self .btm_model_big )
468+ self .assertGreater (phi_btm .shape [0 ], 0 )
469+
470+ theta_btm = tm .get_theta (self .btm_model_big )
471+ self .assertGreater (theta_btm .shape [0 ], 0 )
472+
473+ # Test package warning functionality (when packages aren't available)
474+ def test_package_warning_simulation (self ):
475+ # This will test the warning paths, but since packages are installed,
476+ # we test the actual functionality and ensure no warnings are raised
477+ # The warning paths would be tested if packages weren't available
478+
479+ # Test that the helper functions work correctly when packages are available
480+ self .assertTrue (tm ._helpers ._is_tomotopy (self .tomotopy_model ))
481+ self .assertTrue (tm ._helpers ._is_gensim (self .gensim_model ))
482+ self .assertTrue (tm ._helpers ._is_btmplus (self .btm_model_big ))
483+
484+ # Test with an object that's not a recognized model type
485+ class NotAModel :
486+ pass
487+
488+ not_a_model = NotAModel ()
489+ self .assertFalse (tm ._helpers ._is_tomotopy (not_a_model ))
490+ self .assertFalse (tm ._helpers ._is_gensim (not_a_model ))
491+ self .assertFalse (tm ._helpers ._is_btmplus (not_a_model ))
492+
166493
167494if __name__ == "__main__" :
168495 unittest .main ()
0 commit comments