From 2ae7c93549d393837eb7bde0b369c5a27cfe623e Mon Sep 17 00:00:00 2001
From: avsthiago <thiago.alves@jarchitects.be>
Date: Sat, 24 Oct 2020 19:07:58 +0200
Subject: [PATCH 1/4] Fixing typos and pep8 issues

---
 tests/test_dataframe.py | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/tests/test_dataframe.py b/tests/test_dataframe.py
index 481c0b2..31d238e 100644
--- a/tests/test_dataframe.py
+++ b/tests/test_dataframe.py
@@ -1,6 +1,6 @@
 import numpy as np
-from numpy.testing import assert_array_equal
 import pytest
+from numpy.testing import assert_array_equal
 
 import pandas_cub as pdc
 from tests import assert_df_equals
@@ -28,20 +28,20 @@ def test_input_types(self):
             pdc.DataFrame({'a': np.array([1]), 'b': 10})
 
         with pytest.raises(ValueError):
-            pdc.DataFrame({'a': np.array([1]), 
+            pdc.DataFrame({'a': np.array([1]),
                            'b': np.array([[1]])})
 
         # correct construction. no error
-        pdc.DataFrame({'a': np.array([1]), 
+        pdc.DataFrame({'a': np.array([1]),
                        'b': np.array([1])})
 
     def test_array_length(self):
         with pytest.raises(ValueError):
-            pdc.DataFrame({'a': np.array([1, 2]), 
+            pdc.DataFrame({'a': np.array([1, 2]),
                            'b': np.array([1])})
         # correct construction. no error                           
-        pdc.DataFrame({'a': np.array([1, 2]), 
-                        'b': np.array([5, 10])})
+        pdc.DataFrame({'a': np.array([1, 2]),
+                       'b': np.array([5, 10])})
 
     def test_unicode_to_object(self):
         a_object = a.astype('O')
@@ -110,8 +110,8 @@ def test_simple_boolean(self):
         bool_arr = np.array([True, False, False])
         df_bool = pdc.DataFrame({'col': bool_arr})
         df_result = df[df_bool]
-        df_answer = pdc.DataFrame({'a': a[bool_arr], 'b': b[bool_arr], 
-                                   'c': c[bool_arr], 'd': d[bool_arr], 
+        df_answer = pdc.DataFrame({'a': a[bool_arr], 'b': b[bool_arr],
+                                   'c': c[bool_arr], 'd': d[bool_arr],
                                    'e': e[bool_arr]})
         assert_df_equals(df_result, df_answer)
 
@@ -131,7 +131,7 @@ def test_multiple_columns_tuple(self):
         df_answer = pdc.DataFrame({'a': a, 'c': c})
         assert_df_equals(df_result, df_answer)
 
-    def test_int_selcetion(self):
+    def test_int_selection(self):
         assert_df_equals(df[:, 3], pdc.DataFrame({'d': d}))
 
     def test_simultaneous_tuple(self):
@@ -221,7 +221,7 @@ def test_new_column(self):
 
         with pytest.raises(NotImplementedError):
             df[['a', 'b']] = 5
-        
+
         with pytest.raises(ValueError):
             df['a'] = np.random.rand(5, 5)
 
@@ -246,7 +246,7 @@ def test_head_tail(self):
 
         df_result = df.tail(2)
         df_answer = pdc.DataFrame({'a': a[-2:], 'b': b[-2:], 'c': c[-2:],
-                                   'd':d[-2:], 'e': e[-2:]})
+                                   'd': d[-2:], 'e': e[-2:]})
         assert_df_equals(df_result, df_answer)
 
 
@@ -263,7 +263,6 @@ def test_head_tail(self):
 
 class TestAggregation:
 
-
     def test_min(self):
         df_result = df1.min()
         df_answer = pdc.DataFrame({'a': np.array(['a'], dtype='O'),
@@ -555,7 +554,7 @@ def test_sort_values(self):
     def test_sort_values_desc(self):
         df_result = df6.sort_values('a', asc=False)
         a = np.array(['c', 'b', 'b', 'a', 'a'])
-        b = np.array([5.1, 6, 3.4, 1,2])
+        b = np.array([5.1, 6, 3.4, 1, 2])
         df_answer = pdc.DataFrame({'a': a, 'b': b})
         assert_df_equals(df_result, df_answer)
 
@@ -600,8 +599,9 @@ def test_sample(self):
 class TestGrouping:
 
     def test_value_counts(self):
-        df_temp = pdc.DataFrame({'state': np.array(['texas', 'texas', 'texas', 'florida', 'florida', 'florida', 'florida', 'ohio']),
-                                 'fruit': np.array(['a', 'a', 'a', 'a', 'b', 'b', 'b', 'a'])})
+        df_temp = pdc.DataFrame(
+            {'state': np.array(['texas', 'texas', 'texas', 'florida', 'florida', 'florida', 'florida', 'ohio']),
+             'fruit': np.array(['a', 'a', 'a', 'a', 'b', 'b', 'b', 'a'])})
         df_results = df_temp.value_counts()
         df_answer = pdc.DataFrame({'state': np.array(['florida', 'texas', 'ohio'], dtype=object),
                                    'count': np.array([4, 3, 1])})
@@ -612,8 +612,9 @@ def test_value_counts(self):
         assert_df_equals(df_results[1], df_answer)
 
     def test_value_counts_normalize(self):
-        df_temp = pdc.DataFrame({'state': np.array(['texas', 'texas', 'texas', 'florida', 'florida', 'florida', 'florida', 'ohio']),
-                                 'fruit': np.array(['a', 'a', 'a', 'a', 'b', 'b', 'b', 'a'])})
+        df_temp = pdc.DataFrame(
+            {'state': np.array(['texas', 'texas', 'texas', 'florida', 'florida', 'florida', 'florida', 'ohio']),
+             'fruit': np.array(['a', 'a', 'a', 'a', 'b', 'b', 'b', 'a'])})
         df_results = df_temp.value_counts(normalize=True)
         df_answer = pdc.DataFrame({'state': np.array(['florida', 'texas', 'ohio'], dtype=object),
                                    'count': np.array([.5, .375, .125])})
@@ -841,4 +842,4 @@ def test_head(self):
                 'salary': np.array([45279, 63166, 66614, 71680, 42390])}
         result = df_emp.head()
         answer = pdc.DataFrame(data)
-        assert_df_equals(result, answer)
\ No newline at end of file
+        assert_df_equals(result, answer)

From 6db227c7480350b04622e57a5f372fccbb0fc28b Mon Sep 17 00:00:00 2001
From: avsthiago <thiago.alves@jarchitects.be>
Date: Sat, 24 Oct 2020 19:31:50 +0200
Subject: [PATCH 2/4] Adding nb_conda_kernels as a dependency for for loading
 up the environment on jupyter automatically

---
 environment.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index ada51a9..28ddd9d 100644
--- a/environment.yml
+++ b/environment.yml
@@ -3,4 +3,5 @@ dependencies:
 - python=3.6
 - pandas
 - jupyter
-- pytest
\ No newline at end of file
+- pytest
+- nb_conda_kernels
\ No newline at end of file

From 64db054af99503b9405e5f77cd21815ecd6f6388 Mon Sep 17 00:00:00 2001
From: avsthiago <thiago.alves@jarchitects.be>
Date: Sun, 1 Nov 2020 23:12:35 +0100
Subject: [PATCH 3/4] Creating new methods and validations for init

---
 pandas_cub/__init__.py | 57 ++++++++++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 13 deletions(-)

diff --git a/pandas_cub/__init__.py b/pandas_cub/__init__.py
index 029fc52..b743010 100644
--- a/pandas_cub/__init__.py
+++ b/pandas_cub/__init__.py
@@ -30,13 +30,31 @@ def __init__(self, data):
         self._add_docs()
 
     def _check_input_types(self, data):
-        pass
+        if not isinstance(data, dict):
+            raise TypeError('`data must be a dictionary')
+        for key, value in data.items():
+            if not isinstance(key, str):
+                raise TypeError('The keys o `data` must be strings.')
+            if not isinstance(value, np.ndarray):
+                raise TypeError('values of `data` must be Numpy arrays.')
+            if value.ndim != 1:
+                raise ValueError('Value of `data` must be 1d ndarray')
 
     def _check_array_lengths(self, data):
-        pass
+        for i, value in enumerate(data.values()):
+            if i == 0:
+                length = len(value)
+            elif length != len(value):
+                raise ValueError('values of `data` must be a one-dimensional array')
 
     def _convert_unicode_to_object(self, data):
         new_data = {}
+        for key, value in data.items():
+            if value.dtype.kind == 'U':
+                new_data[key] = value.astype('object')
+            else:
+                new_data[key] = value
+
         return new_data
 
     def __len__(self):
@@ -47,7 +65,8 @@ def __len__(self):
         -------
         int: the number of rows in the dataframe
         """
-        pass
+        for value in self._data.values():
+            return len(value)
 
     @property
     def columns(self):
@@ -60,7 +79,7 @@ def columns(self):
         -------
         list of column names
         """
-        pass
+        return list(self._data)
 
     @columns.setter
     def columns(self, columns):
@@ -76,7 +95,16 @@ def columns(self, columns):
         -------
         None
         """
-        pass
+        if not isinstance(columns, list):
+            raise TypeError('`columns` must be a list')
+        if len(columns) != len(self._data):
+            raise ValueError('New `columns` must be same length as current DataFrame')
+        if any(i for i in columns if not isinstance(i, str)):
+            raise TypeError('All column names must be strings')
+        if len(columns) != len(set(columns)):
+            raise ValueError('Your columns have duplicates')
+        self._data = dict(zip(columns, self._data.values()))
+
 
     @property
     def shape(self):
@@ -437,8 +465,10 @@ def diff(self, n=1):
         -------
         A DataFrame
         """
+
         def func():
             pass
+
         return self._non_agg(func)
 
     def pct_change(self, n=1):
@@ -454,8 +484,10 @@ def pct_change(self, n=1):
         -------
         A DataFrame
         """
+
         def func():
             pass
+
         return self._non_agg(func)
 
     #### Arithmetic and Comparison Operators ####
@@ -588,14 +620,13 @@ def pivot_table(self, rows=None, columns=None, values=None, aggfunc=None):
     def _add_docs(self):
         agg_names = ['min', 'max', 'mean', 'median', 'sum', 'var',
                      'std', 'any', 'all', 'argmax', 'argmin']
-        agg_doc = \
-        """
-        Find the {} of each column
-        
-        Returns
-        -------
-        DataFrame
-        """
+        agg_doc = ("""
+            Find the {} of each column
+            
+            Returns
+            -------
+            DataFrame
+            """)
         for name in agg_names:
             getattr(DataFrame, name).__doc__ = agg_doc.format(name)
 

From a9c8a8a2541928f80f1967df9eee50a24270e5ea Mon Sep 17 00:00:00 2001
From: avsthiago <thiago.alves@jarchitects.be>
Date: Sun, 8 Nov 2020 12:57:53 +0100
Subject: [PATCH 4/4] Adding new functions for __init__

---
 Test Notebook.ipynb    | 136 ++++++++++++++++---
 pandas_cub/__init__.py | 301 +++++++++++++++++++++++++++++++++++++----
 2 files changed, 393 insertions(+), 44 deletions(-)

diff --git a/Test Notebook.ipynb b/Test Notebook.ipynb
index 955ce40..b77e80e 100644
--- a/Test Notebook.ipynb	
+++ b/Test Notebook.ipynb	
@@ -11,9 +11,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'/Users/thiagoalves/miniconda3/envs/pandas_cub/bin/python'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "import sys\n",
     "sys.executable"
@@ -21,9 +32,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2"
@@ -31,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -43,7 +63,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -63,27 +83,111 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
-    "df"
+    "\n",
+    "df = pdc.DataFrame({'a': name,'b': name,'thiahitiathia': name,'dshuadsdhai': name,'dsodkasokdpa': name,'daysurdusduasda': name,'dsijdtaosjdaosijdosad': name,'hdusahdisaiasihdia': name,'djssaijdsaojdiasjodaoa': name, 'dsijdaosjdaosijddosad': name,'dsijdaosjdaosijdoslad': name,'dsijdaosjdposijdosad': name,'dsijdaosjdaosijdoosad': name,'dsijdaosjdyaosijdosad': name})"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table><thead><tr><th></th><th>a         </th><th>b         </th><th>thiahitiathia</th><th>dshuadsdhai</th><th>dsodkasokdpa</th><th>daysurdusduasda</th><th>dsijdtaosjdaosijdosad</th><th>hdusahdisaiasihdia</th><th>djssaijdsaojdiasjodaoa</th><th>dsijdaosjdaosijddosad</th><th>dsijdaosjdaosijdoslad</th><th>dsijdaosjdposijdosad</th><th>dsijdaosjdaosijdoosad</th><th>dsijdaosjdyaosijdosad</th></tr></thead><tbody><tr><td><strong>0</strong></td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td><td>Penelope  </td></tr><tr><td><strong>1</strong></td><td>Niko      </td><td>Niko      </td><td>Niko      </td><td>Niko      </td><td>Niko      </td><td>Niko      </td><td>Niko      </td><td>Niko      </td><td>Niko      </td><td>Niko      </td><td>Niko      </td><td>Niko      </td><td>Niko      </td><td>Niko      </td></tr><tr><td><strong>2</strong></td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td><td>Eleni     </td></tr></tbody></table>"
+      ],
+      "text/plain": [
+       "<pandas_cub.DataFrame at 0x7fb9440f18d0>"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "df_final"
+    "df"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>state</th>\n",
+       "      <th>height</th>\n",
+       "      <th>school</th>\n",
+       "      <th>weight</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Penelope</td>\n",
+       "      <td>Texas</td>\n",
+       "      <td>3.6</td>\n",
+       "      <td>True</td>\n",
+       "      <td>45</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Niko</td>\n",
+       "      <td>California</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>False</td>\n",
+       "      <td>40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Eleni</td>\n",
+       "      <td>Texas</td>\n",
+       "      <td>5.2</td>\n",
+       "      <td>True</td>\n",
+       "      <td>130</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       name       state  height  school  weight\n",
+       "0  Penelope       Texas     3.6    True      45\n",
+       "1      Niko  California     3.5   False      40\n",
+       "2     Eleni       Texas     5.2    True     130"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "df_pandas"
    ]
@@ -98,9 +202,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python [conda env:pandas_cub] *",
    "language": "python",
-   "name": "python3"
+   "name": "conda-env-pandas_cub-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -112,7 +216,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.6.12"
   }
  },
  "nbformat": 4,
diff --git a/pandas_cub/__init__.py b/pandas_cub/__init__.py
index b743010..b6b98c0 100644
--- a/pandas_cub/__init__.py
+++ b/pandas_cub/__init__.py
@@ -105,7 +105,6 @@ def columns(self, columns):
             raise ValueError('Your columns have duplicates')
         self._data = dict(zip(columns, self._data.values()))
 
-
     @property
     def shape(self):
         """
@@ -113,7 +112,7 @@ def shape(self):
         -------
         two-item tuple of number of rows and columns
         """
-        pass
+        return len(self), len(self._data)
 
     def _repr_html_(self):
         """
@@ -147,7 +146,61 @@ def _repr_html_(self):
             </tbody>
         </table>
         """
-        pass
+        html = '<table><thead><tr><th></th>'
+        for col in self.columns:
+            html += f"<th>{col:10}</th>"
+
+        html += '</tr></thead>'
+        html += "<tbody>"
+
+        only_head = False
+        num_head = 10
+        num_tail = 10
+        if len(self) <= 20:
+            only_head = True
+            num_head = len(self)
+
+        for i in range(num_head):
+            html += f'<tr><td><strong>{i}</strong></td>'
+            for col, values in self._data.items():
+                kind = values.dtype.kind
+                if kind == 'f':
+                    html += f'<td>{values[i]:10.3f}</td>'
+                elif kind == 'b':
+                    html += f'<td>{values[i]}</td>'
+                elif kind == 'O':
+                    v = values[i]
+                    if v is None:
+                        v = 'None'
+                    html += f'<td>{v:10}</td>'
+                else:
+                    html += f'<td>{values[i]:10}</td>'
+            html += '</tr>'
+
+        if not only_head:
+            html += '<tr><strong><td>...</td></strong>'
+            for i in range(len(self.columns)):
+                html += '<td>...</td>'
+            html += '</tr>'
+            for i in range(-num_tail, 0):
+                html += f'<tr><td><strong>{len(self) + i}</strong></td>'
+                for col, values in self._data.items():
+                    kind = values.dtype.kind
+                    if kind == 'f':
+                        html += f'<td>{values[i]:10.3f}</td>'
+                    elif kind == 'b':
+                        html += f'<td>{values[i]}</td>'
+                    elif kind == 'O':
+                        v = values[i]
+                        if v is None:
+                            v = 'None'
+                        html += f'<td>{v:10}</td>'
+                    else:
+                        html += f'<td>{values[i]:10}</td>'
+                html += '</tr>'
+
+        html += '</tbody></table>'
+        return html
 
     @property
     def values(self):
@@ -156,7 +209,7 @@ def values(self):
         -------
         A single 2D NumPy array of the underlying data
         """
-        pass
+        return np.column_stack(list(self._data.values()))
 
     @property
     def dtypes(self):
@@ -167,7 +220,10 @@ def dtypes(self):
         their data type in the other
         """
         DTYPE_NAME = {'O': 'string', 'i': 'int', 'f': 'float', 'b': 'bool'}
-        pass
+        col_names = np.array(list((self._data.keys())))
+        dtypes = np.array([DTYPE_NAME[i.dtype.kind] for i in self._data.values()])
+        new_data = {'Column Name': col_names, 'Data Type': dtypes}
+        return DataFrame(new_data)
 
     def __getitem__(self, item):
         """
@@ -183,19 +239,100 @@ def __getitem__(self, item):
         -------
         A subset of the original DataFrame
         """
-        pass
+        if isinstance(item, str):
+            return DataFrame({item: self._data[item]})
+        if isinstance(item, list):
+            return DataFrame({col: self._data[col] for col in item})
+        if isinstance(item, DataFrame):
+            if item.shape[1] != 1:
+                raise ValueError('item must be one-column DataFrame')
+            arr = next(iter(item._data.values()))
+            if arr.dtype.kind != 'b':
+                raise ValueError('item must be a one-column boolean DataFrame')
+            return DataFrame({col: value[arr] for col, value in self._data.items()})
+        if isinstance(item, tuple):
+            return self._getitem_tuple(item)
+
+        raise TypeError('You must pass either a string, list, DataFrame, or tuple to the selection operator')
 
     def _getitem_tuple(self, item):
         # simultaneous selection of rows and cols -> df[rs, cs]
-        pass
+        if len(item) != 2:
+            raise ValueError('item must have length 2')
+        row_selection, col_selection = item
+
+        if isinstance(row_selection, int):
+            row_selection = [row_selection]
+        elif isinstance(row_selection, DataFrame):
+            if row_selection.shape[1] != 1:
+                raise ValueError('row selection DataFrame must be one column')
+            row_selection = next(iter(row_selection._data.values()))
+            if row_selection.dtype.kind != 'b':
+                raise TypeError('row selection DataFrame must be a boolean')
+        elif not isinstance(row_selection, (list, slice)):
+            raise TypeError('row selection must be an int, list, slice or DataFrame')
+
+        if isinstance(col_selection, int):
+            col_selection = [self.columns[col_selection]]
+        elif isinstance(col_selection, str):
+            col_selection = [col_selection]
+        elif isinstance(col_selection, list):
+            new_col_selection = []
+            for col in col_selection:
+                if isinstance(col, int):
+                    new_col_selection.append(self.columns[col])
+                else:
+                    new_col_selection.append(col)
+            col_selection = new_col_selection
+        elif isinstance(col_selection, slice):
+            start = col_selection.start
+            stop = col_selection.stop
+            step = col_selection.step
+
+            if isinstance(start, str):
+                start = self.columns.index(start)
+
+            if isinstance(stop, str):
+                stop = self.columns.index(stop) + 1
+
+            col_selection = self.columns[start:stop:step]
+        else:
+            raise TypeError('column selection must be int, str, list or DataFrame')
+
+        new_data = {}
+        for col in col_selection:
+            new_data[col] = self._data[col][row_selection]
+
+        return DataFrame(new_data)
 
     def _ipython_key_completions_(self):
         # allows for tab completion when doing df['c
-        pass
+        return self.columns
 
     def __setitem__(self, key, value):
         # adds a new column or a overwrites an old column
-        pass
+        if not isinstance(key, str):
+            raise NotImplementedError('Setting columns is only done with a numpy array')
+        if isinstance(value, np.ndarray):
+            if value.ndim != 1:
+                raise ValueError('The numpy array must be 1D')
+            if len(value) != len(self):
+                raise ValueError('Length of setting array must be length of the DataFrame')
+        elif isinstance(value, DataFrame):
+            if value.shape[1] != 1:
+                raise ValueError('Setting DataFrame must be one column')
+            if len(value) != len(self):
+                raise ValueError('Setting DataFrame must be same length')
+            value = next(iter(value._data.values()))
+        elif isinstance(value, (int, bool, str, float)):
+            value = np.repeat(value, len(self))
+        else:
+            raise TypeError('Setting DataFrame must be a int, bool, str, float or DataFrame')
+
+        if value.dtype.kind == 'U':
+            value = value.astype('object')
+
+        self._data[key] = value
 
     def head(self, n=5):
         """
@@ -209,7 +346,7 @@ def head(self, n=5):
         -------
         DataFrame
         """
-        pass
+        return self[:n, :]
 
     def tail(self, n=5):
         """
@@ -223,7 +360,7 @@ def tail(self, n=5):
         -------
         DataFrame
         """
-        pass
+        return self[-n:, :]
 
     #### Aggregation Methods ####
 
@@ -273,7 +410,13 @@ def _agg(self, aggfunc):
         -------
         A DataFrame
         """
-        pass
+        new_data = {}
+        for col, value in self._data.items():
+            try:
+                new_data[col] = np.array([aggfunc(value)])
+            except TypeError:
+                pass
+        return DataFrame(new_data)
 
     def isna(self):
         """
@@ -283,7 +426,13 @@ def isna(self):
         -------
         A DataFrame of booleans the same size as the calling DataFrame
         """
-        pass
+        new_data = {}
+        for col, value in self._data.items():
+            if value.dtype.kind == 'O':
+                new_data[col] = value == None
+            else:
+                new_data[col] = np.isnan(value)
+        return DataFrame(new_data)
 
     def count(self):
         """
@@ -293,7 +442,12 @@ def count(self):
         -------
         A DataFrame
         """
-        pass
+        df = self.isna()
+        new_data = {}
+        length = len(df)
+        for col, value in df._data.items():
+            new_data[col] = np.array([length - value.sum()])
+        return DataFrame(new_data)
 
     def unique(self):
         """
@@ -303,7 +457,13 @@ def unique(self):
         -------
         A list of one-column DataFrames
         """
-        pass
+        dfs = []
+        for col, value in self._data.items():
+            new_data = {col: np.unique(value)}
+            dfs.append(DataFrame(new_data))
+        if len(dfs) == 1:
+            return dfs[0]
+        return dfs
 
     def nunique(self):
         """
@@ -313,7 +473,10 @@ def nunique(self):
         -------
         A DataFrame
         """
-        pass
+        new_data = {}
+        for col, value in self._data.items():
+            new_data[col] = np.array([len(np.unique(value))])
+        return DataFrame(new_data)
 
     def value_counts(self, normalize=False):
         """
@@ -328,7 +491,19 @@ def value_counts(self, normalize=False):
         -------
         A list of DataFrames or a single DataFrame if one column
         """
-        pass
+        dfs = []
+        for col, value in self._data.items():
+            uniques, counts = np.unique(value, return_counts=True)
+            order = np.argsort(-counts)
+            uniques = uniques[order]
+            counts = counts[order]
+            if normalize:
+                counts = counts / len(self)
+            new_data = {col: uniques, 'count': counts}
+            dfs.append(DataFrame(new_data))
+        if len(dfs) == 1:
+            return dfs[0]
+        return dfs
 
     def rename(self, columns):
         """
@@ -343,7 +518,14 @@ def rename(self, columns):
         -------
         A DataFrame
         """
-        pass
+        if not isinstance(columns, dict):
+            return TypeError('`column` must be a dict')
+
+        new_data = {}
+        for col, value in self._data.items():
+            new_col = columns.get(col, col)
+            new_data[new_col] = value
+        return DataFrame(new_data)
 
     def drop(self, columns):
         """
@@ -357,7 +539,16 @@ def drop(self, columns):
         -------
         A DataFrame
         """
-        pass
+        if isinstance(columns, str):
+            columns = [columns]
+        elif not isinstance(columns, list):
+            raise TypeError('`columns` must be either a string or a list')
+
+        new_data = {}
+        for col, value in self._data.items():
+            if col not in columns:
+                new_data[col] = value
+        return DataFrame(new_data)
 
     #### Non-Aggregation Methods ####
 
@@ -450,7 +641,13 @@ def _non_agg(self, funcname, **kwargs):
         -------
         A DataFrame
         """
-        pass
+        new_data = {}
+        for col, value in self._data.items():
+            if value.dtype.kind == 'O':
+                new_data[col] = value.copy()
+            else:
+                new_data[col] = funcname(value, **kwargs)
+        return DataFrame(new_data)
 
     def diff(self, n=1):
         """
@@ -466,8 +663,15 @@ def diff(self, n=1):
         A DataFrame
         """
 
-        def func():
-            pass
+        def func(value):
+            value = value.astype('float')
+            value_shifted = np.roll(value, n)
+            value = value - value_shifted
+            if n >= 0:
+                value[:n] = np.nan
+            else:
+                value[n:] = np.nan
+            return value
 
         return self._non_agg(func)
 
@@ -485,12 +689,19 @@ def pct_change(self, n=1):
         A DataFrame
         """
 
-        def func():
-            pass
+        def func(value):
+            value = value.astype('float')
+            value_shifted = np.roll(value, n)
+            value = (value - value_shifted) / value_shifted
+            if n >= 0:
+                value[:n] = np.nan
+            else:
+                value[n:] = np.nan
+            return value
 
         return self._non_agg(func)
 
-    #### Arithmetic and Comparison Operators ####
+    ####  Arithmetic and Comparison Operators  ####
 
     def __add__(self, other):
         return self._oper('__add__', other)
@@ -559,7 +770,18 @@ def _oper(self, op, other):
         -------
         A DataFrame
         """
-        pass
+        if isinstance(other, DataFrame):
+            if other.shape[1] != 1:
+                raise ValueError('DataFrame must be single column')
+            else:
+                other = next(iter(other._data.values()))
+
+        new_data = {}
+        for col, value in self._data.items():
+            method = getattr(value, op)
+            new_data[col] = method(other)
+
+        return DataFrame(new_data)
 
     def sort_values(self, by, asc=True):
         """
@@ -574,7 +796,18 @@ def sort_values(self, by, asc=True):
         -------
         A DataFrame
         """
-        pass
+        if isinstance(by, str):
+            order = np.argsort(self._data[by])
+        elif isinstance(by, list):
+            by = [self._data[col] for col in by[::-1]]
+            order = np.lexsort(by)
+        else:
+            raise TypeError('`by` must be either a list or a string')
+
+        if not asc:
+            order = order[::-1]
+
+        return self[order.tolist(), :]
 
     def sample(self, n=None, frac=None, replace=False, seed=None):
         """
@@ -595,7 +828,19 @@ def sample(self, n=None, frac=None, replace=False, seed=None):
         -------
         A DataFrame
         """
-        pass
+        if seed:
+            np.random.seed(seed=seed)
+
+        if frac:
+            if frac <= 0:
+                raise ValueError('`frac` must be positive')
+            n = int(frac * len(self))
+
+        if not isinstance(n, int):
+            raise TypeError('`n` must be an integer')
+
+        rows = np.random.choice(range(len(self)), n, replace=replace)
+        return self[rows.tolist(), :]
 
     def pivot_table(self, rows=None, columns=None, values=None, aggfunc=None):
         """