add verbosity parameter to apriori (#519)

rasbt · web-flow · commit 3f0935a09f39 · 2019-03-27T17:10:39.000-05:00
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -18,6 +18,7 @@ The CHANGELOG for the current development version is available at
 ##### New Features
 
 - Adds multiprocessing support to `StackingCVClassifier`. ([#512](https://github.com/rasbt/mlxtend/pull/512) via [Qiang Gu](https://github.com/qiaguhttps://github.com/qiagu))
+- Adds a `verbose` parameter to `apriori` to show the current iteration number as well as the itemset size currently being sampled. ([#519](https://github.com/rasbt/mlxtend/pull/519)
 - Adds an optional `class_name` parameter to the confusion matrix function to display class names on the axis as tick marks. ([#487](https://github.com/rasbt/mlxtend/pull/487) via [sandpiturtle](https://github.com/qiaguhttps://github.com/sandpiturtle))
 
 ##### Changes
diff --git a/docs/sources/user_guide/frequent_patterns/apriori.ipynb b/docs/sources/user_guide/frequent_patterns/apriori.ipynb
@@ -420,27 +420,27 @@
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Onion, Eggs)</td>\n",
+       "      <td>(Eggs, Onion)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Milk, Kidney Beans)</td>\n",
+       "      <td>(Kidney Beans, Milk)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Onion, Kidney Beans)</td>\n",
+       "      <td>(Kidney Beans, Onion)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Kidney Beans, Yogurt)</td>\n",
+       "      <td>(Yogurt, Kidney Beans)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Onion, Eggs, Kidney Beans)</td>\n",
+       "      <td>(Eggs, Kidney Beans, Onion)</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -454,11 +454,11 @@
        "3       0.6                      (Onion)\n",
        "4       0.6                     (Yogurt)\n",
        "5       0.8         (Eggs, Kidney Beans)\n",
-       "6       0.6                (Onion, Eggs)\n",
-       "7       0.6         (Milk, Kidney Beans)\n",
-       "8       0.6        (Onion, Kidney Beans)\n",
-       "9       0.6       (Kidney Beans, Yogurt)\n",
-       "10      0.6  (Onion, Eggs, Kidney Beans)"
+       "6       0.6                (Eggs, Onion)\n",
+       "7       0.6         (Kidney Beans, Milk)\n",
+       "8       0.6        (Kidney Beans, Onion)\n",
+       "9       0.6       (Yogurt, Kidney Beans)\n",
+       "10      0.6  (Eggs, Kidney Beans, Onion)"
       ]
      },
      "execution_count": 4,
@@ -555,31 +555,31 @@
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Onion, Eggs)</td>\n",
+       "      <td>(Eggs, Onion)</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Milk, Kidney Beans)</td>\n",
+       "      <td>(Kidney Beans, Milk)</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Onion, Kidney Beans)</td>\n",
+       "      <td>(Kidney Beans, Onion)</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Kidney Beans, Yogurt)</td>\n",
+       "      <td>(Yogurt, Kidney Beans)</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Onion, Eggs, Kidney Beans)</td>\n",
+       "      <td>(Eggs, Kidney Beans, Onion)</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -594,11 +594,11 @@
        "3       0.6                      (Onion)       1\n",
        "4       0.6                     (Yogurt)       1\n",
        "5       0.8         (Eggs, Kidney Beans)       2\n",
-       "6       0.6                (Onion, Eggs)       2\n",
-       "7       0.6         (Milk, Kidney Beans)       2\n",
-       "8       0.6        (Onion, Kidney Beans)       2\n",
-       "9       0.6       (Kidney Beans, Yogurt)       2\n",
-       "10      0.6  (Onion, Eggs, Kidney Beans)       3"
+       "6       0.6                (Eggs, Onion)       2\n",
+       "7       0.6         (Kidney Beans, Milk)       2\n",
+       "8       0.6        (Kidney Beans, Onion)       2\n",
+       "9       0.6       (Yogurt, Kidney Beans)       2\n",
+       "10      0.6  (Eggs, Kidney Beans, Onion)       3"
       ]
      },
      "execution_count": 5,
@@ -718,7 +718,7 @@
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Onion, Eggs)</td>\n",
+       "      <td>(Eggs, Onion)</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -727,7 +727,7 @@
       ],
       "text/plain": [
        "   support       itemsets  length\n",
-       "6      0.6  (Onion, Eggs)       2"
+       "6      0.6  (Eggs, Onion)       2"
       ]
      },
      "execution_count": 7,
@@ -919,6 +919,30 @@
    "execution_count": 9,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Iteration: 1 | Sampling itemset size 2\r",
+      "Iteration: 2 | Sampling itemset size 2\r",
+      "Iteration: 3 | Sampling itemset size 2\r",
+      "Iteration: 4 | Sampling itemset size 2\r",
+      "Iteration: 5 | Sampling itemset size 2\r",
+      "Iteration: 6 | Sampling itemset size 2\r",
+      "Iteration: 7 | Sampling itemset size 2\r",
+      "Iteration: 8 | Sampling itemset size 2\r",
+      "Iteration: 9 | Sampling itemset size 2\r",
+      "Iteration: 10 | Sampling itemset size 2\r",
+      "Iteration: 11 | Sampling itemset size 3\r",
+      "Iteration: 12 | Sampling itemset size 3\r",
+      "Iteration: 13 | Sampling itemset size 3\r",
+      "Iteration: 14 | Sampling itemset size 3\r",
+      "Iteration: 15 | Sampling itemset size 3\r",
+      "Iteration: 16 | Sampling itemset size 3\r",
+      "Iteration: 17 | Sampling itemset size 3\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -978,27 +1002,27 @@
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Onion, Eggs)</td>\n",
+       "      <td>(Eggs, Onion)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Milk, Kidney Beans)</td>\n",
+       "      <td>(Kidney Beans, Milk)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Onion, Kidney Beans)</td>\n",
+       "      <td>(Kidney Beans, Onion)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Kidney Beans, Yogurt)</td>\n",
+       "      <td>(Yogurt, Kidney Beans)</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>0.6</td>\n",
-       "      <td>(Onion, Eggs, Kidney Beans)</td>\n",
+       "      <td>(Eggs, Kidney Beans, Onion)</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1012,11 +1036,11 @@
        "3       0.6                      (Onion)\n",
        "4       0.6                     (Yogurt)\n",
        "5       0.8         (Eggs, Kidney Beans)\n",
-       "6       0.6                (Onion, Eggs)\n",
-       "7       0.6         (Milk, Kidney Beans)\n",
-       "8       0.6        (Onion, Kidney Beans)\n",
-       "9       0.6       (Kidney Beans, Yogurt)\n",
-       "10      0.6  (Onion, Eggs, Kidney Beans)"
+       "6       0.6                (Eggs, Onion)\n",
+       "7       0.6         (Kidney Beans, Milk)\n",
+       "8       0.6        (Kidney Beans, Onion)\n",
+       "9       0.6       (Yogurt, Kidney Beans)\n",
+       "10      0.6  (Eggs, Kidney Beans, Onion)"
       ]
      },
      "execution_count": 9,
@@ -1025,7 +1049,7 @@
     }
    ],
    "source": [
-    "apriori(sparse_df, min_support=0.6, use_colnames=True)"
+    "apriori(sparse_df, min_support=0.6, use_colnames=True, verbose=1)"
    ]
   },
   {
@@ -1037,7 +1061,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -1046,7 +1070,7 @@
      "text": [
       "## apriori\n",
       "\n",
-      "*apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1)*\n",
+      "*apriori(df, min_support=0.5, use_colnames=False, max_len=None, verbose=0)*\n",
       "\n",
       "Get frequent itemsets from a one-hot DataFrame\n",
       "**Parameters**\n",
@@ -1088,6 +1112,11 @@
       "    Maximum length of the itemsets generated. If `None` (default) all\n",
       "    possible itemsets lengths (under the apriori condition) are evaluated.\n",
       "\n",
+      "\n",
+      "- `verbose` : int (default: 0)\n",
+      "\n",
+      "    Shows the number of iterations if 1.\n",
+      "\n",
       "**Returns**\n",
       "\n",
       "pandas DataFrame with columns ['support', 'itemsets'] of all itemsets\n",
@@ -1131,7 +1160,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.7.1"
   },
   "toc": {
    "nav_menu": {},
diff --git a/mlxtend/frequent_patterns/apriori.py b/mlxtend/frequent_patterns/apriori.py
@@ -51,7 +51,7 @@ def generate_new_combinations(old_combinations):
                 yield res
 
 
-def apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1):
+def apriori(df, min_support=0.5, use_colnames=False, max_len=None, verbose=0):
     """Get frequent itemsets from a one-hot DataFrame
     Parameters
     -----------
@@ -85,6 +85,9 @@ def apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1):
       Maximum length of the itemsets generated. If `None` (default) all
       possible itemsets lengths (under the apriori condition) are evaluated.
 
+    verbose : int (default: 0)
+      Shows the number of iterations if 1.
+
     Returns
     -----------
     pandas DataFrame with columns ['support', 'itemsets'] of all itemsets
@@ -134,6 +137,8 @@ def apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1):
     if max_len is None:
         max_len = float('inf')
 
+    iter_count = 0
+
     while max_itemset and max_itemset < max_len:
         next_max_itemset = max_itemset + 1
         combin = generate_new_combinations(itemset_dict[max_itemset])
@@ -143,6 +148,10 @@ def apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1):
         if is_sparse:
             all_ones = np.ones((X.shape[0], next_max_itemset))
         for c in combin:
+            if verbose:
+                iter_count += 1
+                print('\rIteration: %d | Sampling itemset size %d' %
+                      (iter_count, next_max_itemset), end="")
             if is_sparse:
                 together = np.all(X[:, c] == all_ones, axis=1)
             else:
@@ -175,4 +184,7 @@ def apriori(df, min_support=0.5, use_colnames=False, max_len=None, n_jobs=1):
                                                       mapping[i] for i in x]))
     res_df = res_df.reset_index(drop=True)
 
+    if verbose:
+        print()  # adds newline if verbose counter was used
+
     return res_df