Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
e02c08a
Setting up GitHub Classroom Feedback
github-classroom[bot] Jul 15, 2022
11d4c7b
Rodrigo (#3)
Rodrigox30 Jul 19, 2022
a2d58e3
Update README.md
kimimedina Jul 20, 2022
c3bd8c4
Update README.md
kimimedina Jul 20, 2022
b21fba5
DesignDoc
resoj Jul 21, 2022
1fc2cdb
Delete .DS_Store
Rodrigox30 Jul 25, 2022
c2a33b0
Rodrigo (#5)
Rodrigox30 Jul 25, 2022
2566d68
Update README.md
resoj Jul 25, 2022
f1d91a6
GB and Kmeans Classifiers
LukeTaylor1 Jul 25, 2022
49865d7
Merge branch 'main' of https://github.com/Applied-Machine-Learning-20…
LukeTaylor1 Jul 25, 2022
e763e23
Update README.md
kimimedina Jul 25, 2022
2765cee
Update README.md
kimimedina Jul 25, 2022
3e10532
Obsidian_Model (#6)
resoj Jul 26, 2022
0db5ea0
Changes made to read me
Rodrigox30 Jul 27, 2022
de4bedf
Update README.md
Rodrigox30 Jul 27, 2022
5166ac3
Replaced tensorflow model with cleaner version
Rodrigox30 Jul 27, 2022
ea503d1
Comments (#7)
resoj Jul 27, 2022
0480463
KneighborsClassifier updated
Rodrigox30 Jul 27, 2022
033dd8b
Merge branch 'main' of https://github.com/Applied-Machine-Learning-20…
Rodrigox30 Jul 27, 2022
119aed8
Luke (#9)
LukeTaylor1 Jul 28, 2022
121aee3
remove repeated files
LukeTaylor1 Jul 28, 2022
1fc461e
READ ME updates
Rodrigox30 Jul 29, 2022
58de9f1
Merge branch 'main' of https://github.com/Applied-Machine-Learning-20…
Rodrigox30 Jul 29, 2022
7fa04c6
Update to KNeighborsClassifier
Rodrigox30 Jul 29, 2022
3a0963a
Chert Model (#11)
kimimedina Jul 29, 2022
bbd838b
README by Kimi (#12)
kimimedina Jul 29, 2022
0adc7ec
Update README.md
kimimedina Jul 29, 2022
52d693f
Presentation
LukeTaylor1 Jul 29, 2022
9791786
added link to power point
LukeTaylor1 Jul 29, 2022
a61e594
Luke's ReadMe
LukeTaylor1 Jul 29, 2022
bb308de
Update README.md
LukeTaylor1 Jul 29, 2022
c7afba1
Add files via upload
resoj Jul 29, 2022
eb96164
Add files via upload
resoj Jul 29, 2022
787d56a
Update Obsidian Decision Tree Model.md
resoj Jul 29, 2022
3a8b8a7
Update README.md
resoj Jul 29, 2022
2cfbaee
Updated Link
Rodrigox30 Jul 29, 2022
842af9b
Update README.md
Rodrigox30 Jul 29, 2022
14be5fa
Merged all data into one dataframe and cleaned it (#2)
kimimedina Jul 29, 2022
3df1641
Delete EXP-00001-Master.xlsx
LukeTaylor1 Jul 30, 2022
083cdf0
Delete EXP-00002-Master.xlsx
LukeTaylor1 Jul 30, 2022
32e252f
Delete EXP-00003-Master.xlsx
LukeTaylor1 Jul 30, 2022
7d5f82e
Delete EXP-00004-Master.xlsx
LukeTaylor1 Jul 30, 2022
3e6dc6a
Delete EXP-00005-Master.xlsx
LukeTaylor1 Jul 30, 2022
6d67075
Delete application.ipynb
LukeTaylor1 Aug 1, 2022
8f697a4
Update README.md
kimimedina Aug 1, 2022
ad4689c
Update README.md
kimimedina Aug 1, 2022
09c670e
Update README.md
kimimedina Aug 1, 2022
491a5fb
Update README.md
kimimedina Aug 1, 2022
d3f6ce6
Update README.md
kimimedina Aug 1, 2022
5aae47e
Update README.md
Rodrigox30 Aug 2, 2022
d2da82a
Read me updated
Rodrigox30 Aug 2, 2022
e76b19b
11/11/22 Model (#15)
resoj Mar 30, 2023
1500787
Update Obsidian Decision Tree Model.md
resoj Mar 30, 2023
73e6c9c
Update README-KNeighborsClassifier.md
LukeTaylor1 Dec 30, 2023
6115380
Update Obsidian Decision Tree Model.md
resoj Jan 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
778 changes: 778 additions & 0 deletions Chert_Model__Decision_Trees.ipynb

Large diffs are not rendered by default.

1,443 changes: 1,443 additions & 0 deletions Decision_Tree_Random_Forest_FinalProject.ipynb

Large diffs are not rendered by default.

Binary file added DesignDoc.docx
Binary file not shown.
Binary file added EXP-00004-Master.xlsx
Binary file not shown.
Binary file added EXP-00005-Master.xlsx
Binary file not shown.
Binary file added EXP-00006-Master.xlsx
Binary file not shown.
1,167 changes: 1,167 additions & 0 deletions FinalProject_DecisionTree.ipynb

Large diffs are not rendered by default.

166 changes: 166 additions & 0 deletions GradientBoostingClassifier.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import tensorflow as tf\n",
"import pickle\n",
"\n",
"exp_1 = pd.read_excel(\"EXP-00001-Master.xlsx\")\n",
"exp_2 = pd.read_excel('EXP-00002-Master.xlsx')\n",
"exp_3 = pd.read_excel('EXP-00003-Master.xlsx')\n",
"exp_4 = pd.read_excel('EXP-00004-Master.xlsx')\n",
"exp_5 = pd.read_excel('EXP-00004-Master.xlsx')\n",
"\n",
"\n",
"exp_1.drop(index=0, inplace=True)\n",
"exp_2.drop(index=0, inplace=True)\n",
"exp_3.drop(index=0, inplace=True)\n",
"exp_4.drop(index = 0, inplace = True)\n",
"exp_5.drop(index = 0, inplace= True)\n",
"\n",
"exp_1.reset_index(drop=True, inplace=True)\n",
"exp_2.reset_index(drop=True, inplace=True)\n",
"exp_3.reset_index(drop=True, inplace=True)\n",
"exp_4.reset_index(drop=True, inplace=True)\n",
"exp_5.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"not_included = ['Id', 'Filter0','Filter1', 'Filter2','Filter3', 'Filter4', 'Filter5', 'Filter6', 'hash', 'Img Id', 'Curvature', 'Transparency', 'Angularity']\n",
"filtered = [x for x in exp_1.columns if x not in not_included]\n",
"\n",
"exp_1_filtered = exp_1[filtered]\n",
"exp_2_filtered = exp_2[filtered]\n",
"exp_3_filtered = exp_3[filtered]\n",
"exp_4_filtered = exp_4[filtered]\n",
"exp_5_filtered = exp_5[filtered]\n",
"\n",
"exp_1_filtered['Production Stage'] = 0\n",
"exp_2_filtered['Production Stage'] = 1\n",
"exp_3_filtered['Production Stage'] = 2\n",
"exp_4_filtered['Production Stage'] = 3\n",
"exp_5_filtered['Production Stage'] = 4"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" data[filtered],\n",
" data['Production Stage'],\n",
" test_size=0.1,\n",
" stratify= data['Production Stage'],\n",
" random_state=44)\n",
"\n",
"y_train.groupby(y_train).count()\n",
"y_test.groupby(y_test).count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Gradient Boosting\n",
"from sklearn.ensemble import GradientBoostingClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.metrics import precision_score\n",
"\n",
"GBModel = GradientBoostingClassifier(\n",
" n_estimators=100, \n",
" learning_rate = .30 , \n",
" max_depth = 3, \n",
" max_features= 'auto',\n",
" criterion= 'mse',\n",
" loss = 'deviance'\n",
" )\n",
"\n",
"\n",
"GBModel.fit(scale(data[filtered].values), data[\"Production Stage\"])\n",
"\n",
"# cluster = GBModel.predict([scale(data[filtered].values)[0]])[0]\n",
"\n",
"random = data.sample(frac = 1)\n",
"\n",
"ten_thousand = random[:10000]\n",
"\n",
"# predictions = model.predict(two_hundred[filtered])\n",
"\n",
"predictionsGB = GBModel.predict(ten_thousand[filtered])\n",
"\n",
"print('Accuracy score:',accuracy_score(ten_thousand['Production Stage'], predictionsGB))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import Pipeline\n",
"gbc_pipe = Pipeline([('GBC', GradientBoostingClassifier())])\n",
"\n",
"params = [{ 'max_depth': [ 1, 2, 3, 4, 5],\n",
" 'learning_rate': [.01, .10, .20, .30, .40, .50],\n",
" 'n_estimators': [50, 150, 250, 350, 450,],\n",
" 'loss': ['log_loss', 'deviance', 'exponential'],\n",
" 'criterion': ['friedman_mse', 'squared_error', 'mse']\n",
" }]\n",
"\n",
"search = GridSearchCV(GBModel, \n",
" param_grid = params,\n",
" scoring = 'accuracy',\n",
" cv = 5)\n",
"\n",
"search.fit(X_train,y_train)\n",
"print(search.best_params_)\n",
"\n",
"print(\"score: {}\".format(search.score(X_train, y_train)))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.12 ('base')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "57279b1f8ab34c5e05b79187a57f554f106338f30a65896aafc1120729eab81b"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Binary file added Heatmap.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
126 changes: 126 additions & 0 deletions KMeansClassifier.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import tensorflow as tf\n",
"import pickle\n",
"\n",
"exp_1 = pd.read_excel(\"EXP-00001-Master.xlsx\")\n",
"exp_2 = pd.read_excel('EXP-00002-Master.xlsx')\n",
"exp_3 = pd.read_excel('EXP-00003-Master.xlsx')\n",
"exp_4 = pd.read_excel('EXP-00004-Master.xlsx')\n",
"exp_5 = pd.read_excel('EXP-00004-Master.xlsx')\n",
"\n",
"\n",
"exp_1.drop(index=0, inplace=True)\n",
"exp_2.drop(index=0, inplace=True)\n",
"exp_3.drop(index=0, inplace=True)\n",
"exp_4.drop(index = 0, inplace = True)\n",
"exp_5.drop(index = 0, inplace= True)\n",
"\n",
"exp_1.reset_index(drop=True, inplace=True)\n",
"exp_2.reset_index(drop=True, inplace=True)\n",
"exp_3.reset_index(drop=True, inplace=True)\n",
"exp_4.reset_index(drop=True, inplace=True)\n",
"exp_5.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"not_included = ['Id', 'Filter0','Filter1', 'Filter2','Filter3', 'Filter4', 'Filter5', 'Filter6', 'hash', 'Img Id', 'Curvature', 'Transparency', 'Angularity']\n",
"filtered = [x for x in exp_1.columns if x not in not_included]\n",
"\n",
"exp_1_filtered = exp_1[filtered]\n",
"exp_2_filtered = exp_2[filtered]\n",
"exp_3_filtered = exp_3[filtered]\n",
"exp_4_filtered = exp_4[filtered]\n",
"exp_5_filtered = exp_5[filtered]\n",
"\n",
"exp_1_filtered['Production Stage'] = 0\n",
"exp_2_filtered['Production Stage'] = 1\n",
"exp_3_filtered['Production Stage'] = 2\n",
"exp_4_filtered['Production Stage'] = 3\n",
"exp_5_filtered['Production Stage'] = 4"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" data[filtered],\n",
" data['Production Stage'],\n",
" test_size=0.1,\n",
" stratify= data['Production Stage'],\n",
" random_state=44)\n",
"\n",
"y_train.groupby(y_train).count()\n",
"y_test.groupby(y_test).count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import KMeans\n",
"\n",
"model = KMeans(n_clusters= 5, random_state= 0, max_iter= 45 )\n",
"\n",
"model.fit(scale(data[filtered]))\n",
"\n",
"cluster = model.predict([scale(data[filtered])[0]])[0]\n",
"\n",
"two_hundred = random[:1000]\n",
"\n",
"predictions = model.predict(two_hundred[filtered])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Accuracy score:',accuracy_score(two_hundred['Production Stage'], predictions))\n",
"print(precision_score(two_hundred['Production Stage'], predictions, average='macro'))\n",
"\n",
"predictions"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.13 64-bit (windows store)",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.9.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "79ff1e5d1c988803ca272e854ac0ee62f88ccf4aa17fd038c7e703dc5466d506"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading