Skip to content

Commit 3f0935a

Browse files
authored
add verbosity parameter to apriori (#519)
1 parent 691ede7 commit 3f0935a

File tree

3 files changed

+79
-37
lines changed

3 files changed

+79
-37
lines changed

docs/sources/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ The CHANGELOG for the current development version is available at
1818
##### New Features
1919

2020
- Adds multiprocessing support to `StackingCVClassifier`. ([#512](https://github.com/rasbt/mlxtend/pull/512) via [Qiang Gu](https://github.com/qiaguhttps://github.com/qiagu))
21+
- Adds a `verbose` parameter to `apriori` to show the current iteration number as well as the itemset size currently being sampled. ([#519](https://github.com/rasbt/mlxtend/pull/519)
2122
- Adds an optional `class_name` parameter to the confusion matrix function to display class names on the axis as tick marks. ([#487](https://github.com/rasbt/mlxtend/pull/487) via [sandpiturtle](https://github.com/qiaguhttps://github.com/sandpiturtle))
2223

2324
##### Changes

docs/sources/user_guide/frequent_patterns/apriori.ipynb

Lines changed: 65 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -420,27 +420,27 @@
420420
" <tr>\n",
421421
" <th>6</th>\n",
422422
" <td>0.6</td>\n",
423-
" <td>(Onion, Eggs)</td>\n",
423+
" <td>(Eggs, Onion)</td>\n",
424424
" </tr>\n",
425425
" <tr>\n",
426426
" <th>7</th>\n",
427427
" <td>0.6</td>\n",
428-
" <td>(Milk, Kidney Beans)</td>\n",
428+
" <td>(Kidney Beans, Milk)</td>\n",
429429
" </tr>\n",
430430
" <tr>\n",
431431
" <th>8</th>\n",
432432
" <td>0.6</td>\n",
433-
" <td>(Onion, Kidney Beans)</td>\n",
433+
" <td>(Kidney Beans, Onion)</td>\n",
434434
" </tr>\n",
435435
" <tr>\n",
436436
" <th>9</th>\n",
437437
" <td>0.6</td>\n",
438-
" <td>(Kidney Beans, Yogurt)</td>\n",
438+
" <td>(Yogurt, Kidney Beans)</td>\n",
439439
" </tr>\n",
440440
" <tr>\n",
441441
" <th>10</th>\n",
442442
" <td>0.6</td>\n",
443-
" <td>(Onion, Eggs, Kidney Beans)</td>\n",
443+
" <td>(Eggs, Kidney Beans, Onion)</td>\n",
444444
" </tr>\n",
445445
" </tbody>\n",
446446
"</table>\n",
@@ -454,11 +454,11 @@
454454
"3 0.6 (Onion)\n",
455455
"4 0.6 (Yogurt)\n",
456456
"5 0.8 (Eggs, Kidney Beans)\n",
457-
"6 0.6 (Onion, Eggs)\n",
458-
"7 0.6 (Milk, Kidney Beans)\n",
459-
"8 0.6 (Onion, Kidney Beans)\n",
460-
"9 0.6 (Kidney Beans, Yogurt)\n",
461-
"10 0.6 (Onion, Eggs, Kidney Beans)"
457+
"6 0.6 (Eggs, Onion)\n",
458+
"7 0.6 (Kidney Beans, Milk)\n",
459+
"8 0.6 (Kidney Beans, Onion)\n",
460+
"9 0.6 (Yogurt, Kidney Beans)\n",
461+
"10 0.6 (Eggs, Kidney Beans, Onion)"
462462
]
463463
},
464464
"execution_count": 4,
@@ -555,31 +555,31 @@
555555
" <tr>\n",
556556
" <th>6</th>\n",
557557
" <td>0.6</td>\n",
558-
" <td>(Onion, Eggs)</td>\n",
558+
" <td>(Eggs, Onion)</td>\n",
559559
" <td>2</td>\n",
560560
" </tr>\n",
561561
" <tr>\n",
562562
" <th>7</th>\n",
563563
" <td>0.6</td>\n",
564-
" <td>(Milk, Kidney Beans)</td>\n",
564+
" <td>(Kidney Beans, Milk)</td>\n",
565565
" <td>2</td>\n",
566566
" </tr>\n",
567567
" <tr>\n",
568568
" <th>8</th>\n",
569569
" <td>0.6</td>\n",
570-
" <td>(Onion, Kidney Beans)</td>\n",
570+
" <td>(Kidney Beans, Onion)</td>\n",
571571
" <td>2</td>\n",
572572
" </tr>\n",
573573
" <tr>\n",
574574
" <th>9</th>\n",
575575
" <td>0.6</td>\n",
576-
" <td>(Kidney Beans, Yogurt)</td>\n",
576+
" <td>(Yogurt, Kidney Beans)</td>\n",
577577
" <td>2</td>\n",
578578
" </tr>\n",
579579
" <tr>\n",
580580
" <th>10</th>\n",
581581
" <td>0.6</td>\n",
582-
" <td>(Onion, Eggs, Kidney Beans)</td>\n",
582+
" <td>(Eggs, Kidney Beans, Onion)</td>\n",
583583
" <td>3</td>\n",
584584
" </tr>\n",
585585
" </tbody>\n",
@@ -594,11 +594,11 @@
594594
"3 0.6 (Onion) 1\n",
595595
"4 0.6 (Yogurt) 1\n",
596596
"5 0.8 (Eggs, Kidney Beans) 2\n",
597-
"6 0.6 (Onion, Eggs) 2\n",
598-
"7 0.6 (Milk, Kidney Beans) 2\n",
599-
"8 0.6 (Onion, Kidney Beans) 2\n",
600-
"9 0.6 (Kidney Beans, Yogurt) 2\n",
601-
"10 0.6 (Onion, Eggs, Kidney Beans) 3"
597+
"6 0.6 (Eggs, Onion) 2\n",
598+
"7 0.6 (Kidney Beans, Milk) 2\n",
599+
"8 0.6 (Kidney Beans, Onion) 2\n",
600+
"9 0.6 (Yogurt, Kidney Beans) 2\n",
601+
"10 0.6 (Eggs, Kidney Beans, Onion) 3"
602602
]
603603
},
604604
"execution_count": 5,
@@ -718,7 +718,7 @@
718718
" <tr>\n",
719719
" <th>6</th>\n",
720720
" <td>0.6</td>\n",
721-
" <td>(Onion, Eggs)</td>\n",
721+
" <td>(Eggs, Onion)</td>\n",
722722
" <td>2</td>\n",
723723
" </tr>\n",
724724
" </tbody>\n",
@@ -727,7 +727,7 @@
727727
],
728728
"text/plain": [
729729
" support itemsets length\n",
730-
"6 0.6 (Onion, Eggs) 2"
730+
"6 0.6 (Eggs, Onion) 2"
731731
]
732732
},
733733
"execution_count": 7,
@@ -919,6 +919,30 @@
919919
"execution_count": 9,
920920
"metadata": {},
921921
"outputs": [
922+
{
923+
"name": "stdout",
924+
"output_type": "stream",
925+
"text": [
926+
"\r",
927+
"Iteration: 1 | Sampling itemset size 2\r",
928+
"Iteration: 2 | Sampling itemset size 2\r",
929+
"Iteration: 3 | Sampling itemset size 2\r",
930+
"Iteration: 4 | Sampling itemset size 2\r",
931+
"Iteration: 5 | Sampling itemset size 2\r",
932+
"Iteration: 6 | Sampling itemset size 2\r",
933+
"Iteration: 7 | Sampling itemset size 2\r",
934+
"Iteration: 8 | Sampling itemset size 2\r",
935+
"Iteration: 9 | Sampling itemset size 2\r",
936+
"Iteration: 10 | Sampling itemset size 2\r",
937+
"Iteration: 11 | Sampling itemset size 3\r",
938+
"Iteration: 12 | Sampling itemset size 3\r",
939+
"Iteration: 13 | Sampling itemset size 3\r",
940+
"Iteration: 14 | Sampling itemset size 3\r",
941+
"Iteration: 15 | Sampling itemset size 3\r",
942+
"Iteration: 16 | Sampling itemset size 3\r",
943+
"Iteration: 17 | Sampling itemset size 3\n"
944+
]
945+
},
922946
{
923947
"data": {
924948
"text/html": [
@@ -978,27 +1002,27 @@
9781002
" <tr>\n",
9791003
" <th>6</th>\n",
9801004
" <td>0.6</td>\n",
981-
" <td>(Onion, Eggs)</td>\n",
1005+
" <td>(Eggs, Onion)</td>\n",
9821006
" </tr>\n",
9831007
" <tr>\n",
9841008
" <th>7</th>\n",
9851009
" <td>0.6</td>\n",
986-
" <td>(Milk, Kidney Beans)</td>\n",
1010+
" <td>(Kidney Beans, Milk)</td>\n",
9871011
" </tr>\n",
9881012
" <tr>\n",
9891013
" <th>8</th>\n",
9901014
" <td>0.6</td>\n",
991-
" <td>(Onion, Kidney Beans)</td>\n",
1015+
" <td>(Kidney Beans, Onion)</td>\n",
9921016
" </tr>\n",
9931017
" <tr>\n",
9941018
" <th>9</th>\n",
9951019
" <td>0.6</td>\n",
996-
" <td>(Kidney Beans, Yogurt)</td>\n",
1020+
" <td>(Yogurt, Kidney Beans)</td>\n",
9971021
" </tr>\n",
9981022
" <tr>\n",
9991023
" <th>10</th>\n",
10001024
" <td>0.6</td>\n",
1001-
" <td>(Onion, Eggs, Kidney Beans)</td>\n",
1025+
" <td>(Eggs, Kidney Beans, Onion)</td>\n",
10021026
" </tr>\n",
10031027
" </tbody>\n",
10041028
"</table>\n",
@@ -1012,11 +1036,11 @@
10121036
"3 0.6 (Onion)\n",
10131037
"4 0.6 (Yogurt)\n",
10141038
"5 0.8 (Eggs, Kidney Beans)\n",
1015-
"6 0.6 (Onion, Eggs)\n",
1016-
"7 0.6 (Milk, Kidney Beans)\n",
1017-
"8 0.6 (Onion, Kidney Beans)\n",
1018-
"9 0.6 (Kidney Beans, Yogurt)\n",
1019-
"10 0.6 (Onion, Eggs, Kidney Beans)"
1039+
"6 0.6 (Eggs, Onion)\n",
1040+
"7 0.6 (Kidney Beans, Milk)\n",
1041+
"8 0.6 (Kidney Beans, Onion)\n",
1042+
"9 0.6 (Yogurt, Kidney Beans)\n",
1043+
"10 0.6 (Eggs, Kidney Beans, Onion)"
10201044
]
10211045
},
10221046
"execution_count": 9,
@@ -1025,7 +1049,7 @@
10251049
}
10261050
],
10271051
"source": [
1028-
"apriori(sparse_df, min_support=0.6, use_colnames=True)"
1052+
"apriori(sparse_df, min_support=0.6, use_colnames=True, verbose=1)"
10291053
]
10301054
},
10311055
{
@@ -1037,7 +1061,7 @@
10371061
},
10381062
{
10391063
"cell_type": "code",
1040-
"execution_count": 1,
1064+
"execution_count": 10,
10411065
"metadata": {},
10421066
"outputs": [
10431067
{
@@ -1046,7 +1070,7 @@
10461070
"text": [
10471071
"## apriori\n",
10481072
"\n",
1049-
"*apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1)*\n",
1073+
"*apriori(df, min_support=0.5, use_colnames=False, max_len=None, verbose=0)*\n",
10501074
"\n",
10511075
"Get frequent itemsets from a one-hot DataFrame\n",
10521076
"**Parameters**\n",
@@ -1088,6 +1112,11 @@
10881112
" Maximum length of the itemsets generated. If `None` (default) all\n",
10891113
" possible itemsets lengths (under the apriori condition) are evaluated.\n",
10901114
"\n",
1115+
"\n",
1116+
"- `verbose` : int (default: 0)\n",
1117+
"\n",
1118+
" Shows the number of iterations if 1.\n",
1119+
"\n",
10911120
"**Returns**\n",
10921121
"\n",
10931122
"pandas DataFrame with columns ['support', 'itemsets'] of all itemsets\n",
@@ -1131,7 +1160,7 @@
11311160
"name": "python",
11321161
"nbconvert_exporter": "python",
11331162
"pygments_lexer": "ipython3",
1134-
"version": "3.6.5"
1163+
"version": "3.7.1"
11351164
},
11361165
"toc": {
11371166
"nav_menu": {},

mlxtend/frequent_patterns/apriori.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def generate_new_combinations(old_combinations):
5151
yield res
5252

5353

54-
def apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1):
54+
def apriori(df, min_support=0.5, use_colnames=False, max_len=None, verbose=0):
5555
"""Get frequent itemsets from a one-hot DataFrame
5656
Parameters
5757
-----------
@@ -85,6 +85,9 @@ def apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1):
8585
Maximum length of the itemsets generated. If `None` (default) all
8686
possible itemsets lengths (under the apriori condition) are evaluated.
8787
88+
verbose : int (default: 0)
89+
Shows the number of iterations if 1.
90+
8891
Returns
8992
-----------
9093
pandas DataFrame with columns ['support', 'itemsets'] of all itemsets
@@ -134,6 +137,8 @@ def apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1):
134137
if max_len is None:
135138
max_len = float('inf')
136139

140+
iter_count = 0
141+
137142
while max_itemset and max_itemset < max_len:
138143
next_max_itemset = max_itemset + 1
139144
combin = generate_new_combinations(itemset_dict[max_itemset])
@@ -143,6 +148,10 @@ def apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1):
143148
if is_sparse:
144149
all_ones = np.ones((X.shape[0], next_max_itemset))
145150
for c in combin:
151+
if verbose:
152+
iter_count += 1
153+
print('\rIteration: %d | Sampling itemset size %d' %
154+
(iter_count, next_max_itemset), end="")
146155
if is_sparse:
147156
together = np.all(X[:, c] == all_ones, axis=1)
148157
else:
@@ -175,4 +184,7 @@ def apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1):
175184
mapping[i] for i in x]))
176185
res_df = res_df.reset_index(drop=True)
177186

187+
if verbose:
188+
print() # adds newline if verbose counter was used
189+
178190
return res_df

0 commit comments

Comments
 (0)