@@ -8,7 +8,7 @@ import TestHelper._
88
99
1010/**
11- * Test infomartion theoretic feature selection
11+ * Test information theoretic feature selection on datasets from Peng's webpage
1212 *
1313 * @author Sergio Ramirez
1414 */
@@ -21,20 +21,78 @@ class ITSelectorSuite extends FunSuite with BeforeAndAfterAll {
2121 sqlContext = new SQLContext (SPARK_CTX )
2222 }
2323
24- /** Do entropy based binning of cars data from UC Irvine repository . */
24+ /** Do mRMR feature selection on COLON data. */
2525 test(" Run ITFS on colon data (nPart = 10, nfeat = 10)" ) {
2626
27- val df = readColonData (sqlContext)
27+ val df = readCSVData (sqlContext, " test_colon_s3.csv " )
2828 val cols = df.columns
2929 val pad = 2
3030 val allVectorsDense = true
31- val model = getSelectorModel(sqlContext, df, cols.drop(1 ), cols.head, 10 , 10 , allVectorsDense, pad)
31+ val model = getSelectorModel(sqlContext, df, cols.drop(1 ), cols.head,
32+ 10 , 10 , allVectorsDense, pad)
3233
3334 assertResult(" 512, 764, 1324, 1380, 1411, 1422, 1581, 1670, 1671, 1971" ) {
3435 model.selectedFeatures.mkString(" , " )
3536 }
3637 }
3738
39+ /** Do mRMR feature selection on LEUKEMIA data. */
40+ test(" Run ITFS on leukemia data (nPart = 10, nfeat = 10)" ) {
41+
42+ val df = readCSVData(sqlContext, " test_leukemia_s3.csv" )
43+ val cols = df.columns
44+ val pad = 2
45+ val allVectorsDense = true
46+ val model = getSelectorModel(sqlContext, df, cols.drop(1 ), cols.head,
47+ 10 , 10 , allVectorsDense, pad)
48+
49+ assertResult(" 1084, 1719, 1774, 1822, 2061, 2294, 3192, 4387, 4787, 6795" ) {
50+ model.selectedFeatures.mkString(" , " )
51+ }
52+ }
3853
54+ /** Do mRMR feature selection on LUNG data. */
55+ test(" Run ITFS on lung data (nPart = 10, nfeat = 10)" ) {
56+
57+ val df = readCSVData(sqlContext, " test_lung_s3.csv" )
58+ val cols = df.columns
59+ val pad = 2
60+ val allVectorsDense = true
61+ val model = getSelectorModel(sqlContext, df, cols.drop(1 ), cols.head,
62+ 10 , 10 , allVectorsDense, pad)
63+
64+ assertResult(" 18, 22, 29, 125, 132, 150, 166, 242, 243, 269" ) {
65+ model.selectedFeatures.mkString(" , " )
66+ }
67+ }
68+
69+ /** Do mRMR feature selection on LYMPHOMA data. */
70+ test(" Run ITFS on lymphoma data (nPart = 10, nfeat = 10)" ) {
3971
72+ val df = readCSVData(sqlContext, " test_lymphoma_s3.csv" )
73+ val cols = df.columns
74+ val pad = 2
75+ val allVectorsDense = true
76+ val model = getSelectorModel(sqlContext, df, cols.drop(1 ), cols.head,
77+ 10 , 10 , allVectorsDense, pad)
78+
79+ assertResult(" 236, 393, 759, 2747, 2818, 2841, 2862, 3014, 3702, 3792" ) {
80+ model.selectedFeatures.mkString(" , " )
81+ }
82+ }
83+
84+ /** Do mRMR feature selection on NCI data. */
85+ test(" Run ITFS on nci data (nPart = 10, nfeat = 10)" ) {
86+
87+ val df = readCSVData(sqlContext, " test_nci9_s3.csv" )
88+ val cols = df.columns
89+ val pad = 2
90+ val allVectorsDense = true
91+ val model = getSelectorModel(sqlContext, df, cols.drop(1 ), cols.head,
92+ 10 , 10 , allVectorsDense, pad)
93+
94+ assertResult(" 443, 755, 1369, 1699, 3483, 5641, 6290, 7674, 9399, 9576" ) {
95+ model.selectedFeatures.mkString(" , " )
96+ }
97+ }
4098}
0 commit comments