Skip to content

Commit 2e07178

Browse files
committed
increased code coverage with new tests
1 parent 0226cd8 commit 2e07178

File tree

1 file changed

+327
-0
lines changed

1 file changed

+327
-0
lines changed

tests/test_tmplot.py

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,337 @@ def test_entropy(self):
159159
self.assertGreater(entropy, 0)
160160
self.assertGreater(entropy2, 0)
161161

162+
def test_entropy_single_topic(self):
163+
# Test edge case with single topic (line 76 in _metrics.py)
164+
import numpy as np
165+
single_topic_phi = np.random.rand(1, 100) # Create single topic phi matrix
166+
entropy_single = tm.entropy(single_topic_phi)
167+
self.assertIsInstance(entropy_single, float)
168+
162169
def test_get_salient_terms(self):
163170
saliency = tm.get_salient_terms(self.phi, self.theta)
164171
self.assertEqual(saliency.size, self.phi.shape[0])
165172

173+
# Error handling tests for _helpers.py
174+
def test_get_theta_gensim_no_corpus(self):
175+
# Test error when corpus is not provided for gensim model (line 158)
176+
with self.assertRaises(ValueError) as context:
177+
tm.get_theta(self.gensim_model)
178+
self.assertIn("corpus", str(context.exception).lower())
179+
180+
def test_get_theta_gensim_empty_corpus(self):
181+
# Test error when corpus is empty for gensim model (line 160)
182+
with self.assertRaises(ValueError) as context:
183+
tm.get_theta(self.gensim_model, corpus=[])
184+
self.assertIn("corpus cannot be empty", str(context.exception))
185+
186+
def test_get_theta_unsupported_model(self):
187+
# Test error for unsupported model type (line 171)
188+
class UnsupportedModel:
189+
pass
190+
191+
with self.assertRaises(ValueError) as context:
192+
tm.get_theta(UnsupportedModel())
193+
self.assertIn("Unsupported model type", str(context.exception))
194+
195+
def test_get_top_docs_no_model_or_theta(self):
196+
# Test error when neither model nor theta is provided (line 237)
197+
docs = tm.get_docs(self.tomotopy_model)
198+
with self.assertRaises(ValueError) as context:
199+
tm.get_top_docs(docs)
200+
self.assertIn("model or a theta matrix", str(context.exception))
201+
202+
def test_calc_topics_marg_probs_empty_theta(self):
203+
# Test error for empty theta matrix (line 273)
204+
import numpy as np
205+
empty_theta = np.array([])
206+
with self.assertRaises(ValueError) as context:
207+
tm.calc_topics_marg_probs(empty_theta)
208+
self.assertIn("theta matrix cannot be empty", str(context.exception))
209+
210+
def test_calc_topics_marg_probs_all_zeros(self):
211+
# Test error for theta matrix with all zeros (line 278)
212+
import numpy as np
213+
zero_theta = np.zeros((3, 5))
214+
with self.assertRaises(ValueError) as context:
215+
tm.calc_topics_marg_probs(zero_theta)
216+
self.assertIn("contains all zeros", str(context.exception))
217+
218+
def test_calc_topics_marg_probs_invalid_topic_id(self):
219+
# Test error for invalid topic_id (line 283)
220+
with self.assertRaises(IndexError) as context:
221+
tm.calc_topics_marg_probs(self.theta, topic_id=999)
222+
self.assertIn("out of bounds", str(context.exception))
223+
224+
def test_calc_terms_marg_probs_empty_phi(self):
225+
# Test error for empty phi matrix (line 313)
226+
import numpy as np
227+
empty_phi = np.array([])
228+
p_t = tm.calc_topics_marg_probs(self.theta)
229+
with self.assertRaises(ValueError) as context:
230+
tm.calc_terms_marg_probs(empty_phi, p_t)
231+
self.assertIn("phi matrix cannot be empty", str(context.exception))
232+
233+
def test_calc_terms_marg_probs_empty_pt(self):
234+
# Test error for empty p_t array (line 315)
235+
import numpy as np
236+
empty_pt = np.array([])
237+
with self.assertRaises(ValueError) as context:
238+
tm.calc_terms_marg_probs(self.phi, empty_pt)
239+
self.assertIn("p_t array cannot be empty", str(context.exception))
240+
241+
def test_calc_terms_marg_probs_dimension_mismatch(self):
242+
# Test error for dimension mismatch (line 317)
243+
import numpy as np
244+
wrong_pt = np.array([0.5, 0.5]) # Wrong size
245+
with self.assertRaises(ValueError) as context:
246+
tm.calc_terms_marg_probs(self.phi, wrong_pt)
247+
self.assertIn("phi topics dimension", str(context.exception))
248+
249+
def test_calc_terms_marg_probs_invalid_word_id(self):
250+
# Test error for invalid word_id (line 322)
251+
p_t = tm.calc_topics_marg_probs(self.theta)
252+
max_word_id = self.phi.shape[0]
253+
with self.assertRaises(IndexError) as context:
254+
tm.calc_terms_marg_probs(self.phi, p_t, word_id=max_word_id + 10)
255+
self.assertIn("word_id", str(context.exception))
256+
257+
def test_get_salient_terms_empty_matrices(self):
258+
# Test error for empty phi and theta matrices (line 347)
259+
import numpy as np
260+
empty_phi = np.array([])
261+
empty_theta = np.array([])
262+
with self.assertRaises(ValueError) as context:
263+
tm.get_salient_terms(empty_phi, empty_theta)
264+
self.assertIn("phi and theta matrices cannot be empty", str(context.exception))
265+
266+
def test_get_salient_terms_dimension_mismatch(self):
267+
# Test error for dimension mismatch in phi and theta (line 349)
268+
import numpy as np
269+
wrong_theta = np.random.rand(10, 5) # Wrong number of topics
270+
with self.assertRaises(ValueError) as context:
271+
tm.get_salient_terms(self.phi, wrong_theta)
272+
self.assertIn("phi topics dimension", str(context.exception))
273+
274+
# Tests for _vis.py error handling
275+
def test_plot_scatter_topics_empty_ndarray(self):
276+
# Test error for empty ndarray input (lines 133-135)
277+
import numpy as np
278+
empty_coords = np.array([])
279+
with self.assertRaises(ValueError) as context:
280+
tm.plot_scatter_topics(empty_coords)
281+
self.assertIn("topics_coords cannot be empty", str(context.exception))
282+
283+
def test_plot_scatter_topics_empty_dataframe(self):
284+
# Test error for empty DataFrame input (lines 137-139)
285+
from pandas import DataFrame
286+
empty_df = DataFrame()
287+
with self.assertRaises(ValueError) as context:
288+
tm.plot_scatter_topics(empty_df)
289+
self.assertIn("topics_coords DataFrame cannot be empty", str(context.exception))
290+
291+
def test_plot_terms_empty_dataframe(self):
292+
# Test error for empty DataFrame input (lines 233-234)
293+
from pandas import DataFrame
294+
empty_df = DataFrame()
295+
with self.assertRaises(ValueError) as context:
296+
tm.plot_terms(empty_df)
297+
self.assertIn("terms_probs DataFrame cannot be empty", str(context.exception))
298+
299+
def test_plot_terms_missing_columns(self):
300+
# Test error for missing required columns (lines 236-238)
301+
from pandas import DataFrame
302+
incomplete_df = DataFrame({"wrong_col": [1, 2, 3]})
303+
with self.assertRaises(ValueError) as context:
304+
tm.plot_terms(incomplete_df)
305+
self.assertIn("Missing required columns", str(context.exception))
306+
307+
def test_plot_docs_with_sequence(self):
308+
# Test plot_docs with sequence input (lines 298-299)
309+
docs_list = ["Document 1 content", "Document 2 content"]
310+
result = tm.plot_docs(docs_list)
311+
from IPython.display import HTML
312+
self.assertIsInstance(result, HTML)
313+
314+
# Tests for _report.py error handling
315+
def test_report_empty_docs(self):
316+
# Test error for empty docs (line 110)
317+
with self.assertRaises(ValueError) as context:
318+
tm.report(self.tomotopy_model, docs=[])
319+
self.assertIn("docs cannot be empty", str(context.exception))
320+
321+
def test_report_none_docs(self):
322+
# Test error for None docs (line 110)
323+
with self.assertRaises(ValueError) as context:
324+
tm.report(self.tomotopy_model, docs=None)
325+
self.assertIn("docs cannot be empty", str(context.exception))
326+
327+
def test_report_with_custom_parameters(self):
328+
# Test report with custom parameters to increase coverage
329+
docs = tm.get_docs(self.tomotopy_model)
330+
report = tm.report(
331+
self.tomotopy_model,
332+
docs=docs,
333+
topics_labels=["Topic A", "Topic B"],
334+
show_headers=False,
335+
show_docs=False,
336+
show_words=False,
337+
show_topics=True,
338+
width=400,
339+
height=600
340+
)
341+
from ipywidgets import VBox
342+
self.assertIsInstance(report, VBox)
343+
344+
def test_report_gensim_with_corpus(self):
345+
# Test report with gensim model and corpus
346+
theta_gensim = tm.get_theta(self.gensim_model, self.gensim_corpus)
347+
num_docs = theta_gensim.shape[1]
348+
docs = [f"doc{i}" for i in range(num_docs)] # Create appropriate number of docs
349+
report = tm.report(
350+
self.gensim_model,
351+
docs=docs,
352+
corpus=self.gensim_corpus,
353+
width=200
354+
)
355+
from ipywidgets import VBox
356+
self.assertIsInstance(report, VBox)
357+
358+
# Additional tests for better coverage of edge cases
359+
def test_get_docs_non_tomotopy_model(self):
360+
# Test get_docs with non-tomotopy model (line 198)
361+
result = tm.get_docs(self.gensim_model)
362+
self.assertIsNone(result)
363+
364+
def test_get_phi_with_vocabulary(self):
365+
# Test get_phi with gensim model and vocabulary (line 85)
366+
gensim_phi = tm.get_phi(self.gensim_model)
367+
vocab = ["word" + str(i) for i in range(gensim_phi.shape[0])]
368+
phi_with_vocab = tm.get_phi(self.gensim_model, vocabulary=vocab)
369+
self.assertEqual(len(phi_with_vocab.index), len(vocab))
370+
self.assertListEqual(list(phi_with_vocab.index), vocab)
371+
372+
def test_prepare_coords_with_kwargs(self):
373+
# Test prepare_coords with dist_kws and scatter_kws
374+
dist_kws = {"method": "jsd"}
375+
scatter_kws = {"method": "mds"}
376+
coords = tm.prepare_coords(
377+
self.tomotopy_model,
378+
dist_kws=dist_kws,
379+
scatter_kws=scatter_kws
380+
)
381+
self.assertEqual(coords.shape[1], 5) # x, y, size, label, topic
382+
383+
def test_get_top_docs_with_theta_matrix(self):
384+
# Test get_top_docs when providing theta matrix instead of model
385+
docs = tm.get_docs(self.tomotopy_model)
386+
theta_values = self.theta.values
387+
top_docs = tm.get_top_docs(docs, theta=theta_values)
388+
self.assertEqual(top_docs.shape[0], 5) # Default docs_num
389+
390+
def test_get_top_docs_with_specific_topics(self):
391+
# Test get_top_docs with specific topics selection
392+
docs = tm.get_docs(self.tomotopy_model)
393+
specific_topics = [0, 2, 4]
394+
top_docs = tm.get_top_docs(
395+
docs,
396+
self.tomotopy_model,
397+
self.theta,
398+
topics=specific_topics
399+
)
400+
self.assertEqual(top_docs.shape[1], len(specific_topics))
401+
402+
def test_calc_terms_probs_ratio_edge_cases(self):
403+
# Test calc_terms_probs_ratio with different parameters
404+
terms_probs = tm.calc_terms_probs_ratio(
405+
self.phi,
406+
topic=1,
407+
terms_num=10,
408+
lambda_=0.8
409+
)
410+
self.assertEqual(len(terms_probs), 20) # 10 terms * 2 types
411+
412+
def test_get_relevant_terms_different_lambda(self):
413+
# Test get_relevant_terms with different lambda values
414+
relevant_terms_1 = tm.get_relevant_terms(self.phi, 0, lambda_=0.2)
415+
relevant_terms_2 = tm.get_relevant_terms(self.phi, 0, lambda_=0.9)
416+
self.assertEqual(relevant_terms_1.size, self.phi.shape[0])
417+
self.assertEqual(relevant_terms_2.size, self.phi.shape[0])
418+
# Results should be different with different lambda values
419+
self.assertFalse(relevant_terms_1.equals(relevant_terms_2))
420+
421+
def test_plot_scatter_topics_with_all_options(self):
422+
# Test plot_scatter_topics with many parameters to increase coverage
423+
topics_coords = tm.prepare_coords(self.tomotopy_model)
424+
chart = tm.plot_scatter_topics(
425+
topics_coords,
426+
topic=1,
427+
size_col="size",
428+
label_col="label",
429+
font_size=15,
430+
x_kws={"title": "X Axis"},
431+
y_kws={"title": "Y Axis"},
432+
chart_kws={"title": "Test Chart"},
433+
circle_kws={"opacity": 0.5},
434+
text_kws={"fontSize": 12},
435+
size_kws={"range": [100, 2000]},
436+
color_kws={"scheme": "viridis"}
437+
)
438+
from altair import LayerChart
439+
self.assertIsInstance(chart, LayerChart)
440+
441+
def test_plot_terms_with_custom_parameters(self):
442+
# Test plot_terms with custom parameters
443+
terms_probs = tm.calc_terms_probs_ratio(self.phi, 0)
444+
chart = tm.plot_terms(
445+
terms_probs,
446+
font_size=16,
447+
chart_kws={"width": 400},
448+
bar_kws={"stroke": "black"},
449+
x_kws={"title": "Custom X"},
450+
y_kws={"title": "Custom Y"},
451+
color_kws={"scheme": "set1"}
452+
)
453+
from altair import Chart
454+
self.assertIsInstance(chart, Chart)
455+
456+
def test_plot_docs_with_custom_styles(self):
457+
# Test plot_docs with custom styles and html_kws
458+
docs_list = ["Document 1", "Document 2"]
459+
custom_styles = "<style>table { border: 1px solid black; }</style>"
460+
html_kws = {"escape": False, "classes": "custom-table"}
461+
result = tm.plot_docs(docs_list, styles=custom_styles, html_kws=html_kws)
462+
from IPython.display import HTML
463+
self.assertIsInstance(result, HTML)
464+
465+
def test_btm_model_functionality(self):
466+
# Test BTM model specific functionality to increase coverage
467+
phi_btm = tm.get_phi(self.btm_model_big)
468+
self.assertGreater(phi_btm.shape[0], 0)
469+
470+
theta_btm = tm.get_theta(self.btm_model_big)
471+
self.assertGreater(theta_btm.shape[0], 0)
472+
473+
# Test package warning functionality (when packages aren't available)
474+
def test_package_warning_simulation(self):
475+
# This will test the warning paths, but since packages are installed,
476+
# we test the actual functionality and ensure no warnings are raised
477+
# The warning paths would be tested if packages weren't available
478+
479+
# Test that the helper functions work correctly when packages are available
480+
self.assertTrue(tm._helpers._is_tomotopy(self.tomotopy_model))
481+
self.assertTrue(tm._helpers._is_gensim(self.gensim_model))
482+
self.assertTrue(tm._helpers._is_btmplus(self.btm_model_big))
483+
484+
# Test with an object that's not a recognized model type
485+
class NotAModel:
486+
pass
487+
488+
not_a_model = NotAModel()
489+
self.assertFalse(tm._helpers._is_tomotopy(not_a_model))
490+
self.assertFalse(tm._helpers._is_gensim(not_a_model))
491+
self.assertFalse(tm._helpers._is_btmplus(not_a_model))
492+
166493

167494
if __name__ == "__main__":
168495
unittest.main()

0 commit comments

Comments
 (0)