|
4 | 4 |
|
5 | 5 | import numpy as np |
6 | 6 | from numpy.testing import assert_array_equal |
| 7 | +from orangecontrib.text.preprocess import RegexpTokenizer |
| 8 | +from orangewidget.utils.signals import Input, summarize |
7 | 9 | from scipy.sparse import csr_matrix, issparse |
8 | 10 |
|
9 | 11 | from Orange.data import Table, DiscreteVariable, StringVariable, Domain, ContinuousVariable |
@@ -629,5 +631,40 @@ def test_pickle_corpus(self): |
629 | 631 | pickle.dumps(c) |
630 | 632 |
|
631 | 633 |
|
| 634 | +class TestCorpusSummaries(unittest.TestCase): |
| 635 | + def test_corpus_not_preprocessed(self): |
| 636 | + """Check if details part of the summary is formatted correctly""" |
| 637 | + corpus = Corpus.from_file("book-excerpts") |
| 638 | + |
| 639 | + n_features = len(corpus.domain.variables) + len(corpus.domain.metas) |
| 640 | + details = ( |
| 641 | + f"<nobr>{len(corpus)} instances, {n_features} variables</nobr><br/>" |
| 642 | + f"<nobr>Features: — (no missing values)</nobr><br/>" |
| 643 | + f"<nobr>Target: categorical</nobr><br/>" |
| 644 | + f"<nobr>Metas: string</nobr><br/>" |
| 645 | + f"<nobr>Corpus is not preprocessed</nobr>" |
| 646 | + ) |
| 647 | + table_summary = summarize.dispatch(Corpus)(corpus) |
| 648 | + self.assertEqual(140, table_summary.summary) |
| 649 | + self.assertEqual(details, table_summary.details) |
| 650 | + |
| 651 | + def test_corpus_preprocessed(self): |
| 652 | + """Check if details part of the summary is formatted correctly""" |
| 653 | + corpus = Corpus.from_file("book-excerpts") |
| 654 | + corpus = RegexpTokenizer()(corpus) |
| 655 | + |
| 656 | + n_features = len(corpus.domain.variables) + len(corpus.domain.metas) |
| 657 | + details = ( |
| 658 | + f"<nobr>{len(corpus)} instances, {n_features} variables</nobr><br/>" |
| 659 | + f"<nobr>Features: — (no missing values)</nobr><br/>" |
| 660 | + f"<nobr>Target: categorical</nobr><br/>" |
| 661 | + f"<nobr>Metas: string</nobr><br/>" |
| 662 | + f"<nobr>Total tokens: 128020, Total types: 11712</nobr>" |
| 663 | + ) |
| 664 | + table_summary = summarize.dispatch(Corpus)(corpus) |
| 665 | + self.assertEqual(140, table_summary.summary) |
| 666 | + self.assertEqual(details, table_summary.details) |
| 667 | + |
| 668 | + |
632 | 669 | if __name__ == "__main__": |
633 | 670 | unittest.main() |
0 commit comments