realpython
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎flask-connexion-rest/version_4/people.py‎
Lines changed: 1 addition & 1 deletion b/‎flask-connexion-rest/version_4/people.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas-gradebook-project/01-loading-the-data.py‎
Lines changed: 1 addition & 1 deletion b/‎pandas-gradebook-project/01-loading-the-data.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas-gradebook-project/02-merging-dataframes.py‎
Lines changed: 1 addition & 1 deletion b/‎pandas-gradebook-project/02-merging-dataframes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas-gradebook-project/03-calculating-grades.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas-gradebook-project/03-calculating-grades.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas-gradebook-project/04-grouping-the-data.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas-gradebook-project/04-grouping-the-data.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas-gradebook-project/05-plotting-summary-statistics.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas-gradebook-project/05-plotting-summary-statistics.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas-gradebook-project/06-final-gradebook.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas-gradebook-project/06-final-gradebook.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎practical-k-means/README.md‎
Lines changed: 38 additions & 0 deletions b/‎practical-k-means/README.md‎
Lines changed: 38 additions & 0 deletions
@@ -108,3 +108,6 @@ ENV/
 
 # Mac
 *.DS_Store
+
+# VS Code workspace
+*.code-workspace
@@ -90,7 +90,7 @@ def create(person):
     else:
         abort(
             406,
-            "Peron with last name {lname} already exists".format(lname=lname),
+            "Person with last name {lname} already exists".format(lname=lname),
         )
 
 
 
@@ -27,7 +27,7 @@
 
 hw_exam_grades = pd.read_csv(
     DATA_FOLDER / "hw_exam_grades.csv",
-    converters={"SID": str.lower, "Email Address": str.lower},
+    converters={"SID": str.lower},
     usecols=lambda x: "Submission" not in x,
     index_col="SID",
 )
 
@@ -27,7 +27,7 @@
 
 hw_exam_grades = pd.read_csv(
     DATA_FOLDER / "hw_exam_grades.csv",
-    converters={"SID": str.lower, "Email Address": str.lower},
+    converters={"SID": str.lower},
     usecols=lambda x: "Submission" not in x,
     index_col="SID",
 )
 
@@ -28,7 +28,7 @@
 
 hw_exam_grades = pd.read_csv(
     DATA_FOLDER / "hw_exam_grades.csv",
-    converters={"SID": str.lower, "Email Address": str.lower},
+    converters={"SID": str.lower},
     usecols=lambda x: "Submission" not in x,
     index_col="SID",
 )
@@ -88,7 +88,7 @@
 
 sum_of_quiz_scores = quiz_scores.sum(axis=1)
 sum_of_quiz_max = quiz_max_points.sum()
-final_data["Total Quizzes"] = sum_of_hw_scores / sum_of_hw_max
+final_data["Total Quizzes"] = sum_of_quiz_scores / sum_of_quiz_max
 
 average_quiz_scores = (quiz_scores / quiz_max_points).sum(axis=1)
 final_data["Average Quizzes"] = average_quiz_scores / quiz_scores.shape[1]
 
@@ -28,7 +28,7 @@
 
 hw_exam_grades = pd.read_csv(
     DATA_FOLDER / "hw_exam_grades.csv",
-    converters={"SID": str.lower, "Email Address": str.lower},
+    converters={"SID": str.lower},
     usecols=lambda x: "Submission" not in x,
     index_col="SID",
 )
@@ -88,7 +88,7 @@
 
 sum_of_quiz_scores = quiz_scores.sum(axis=1)
 sum_of_quiz_max = quiz_max_points.sum()
-final_data["Total Quizzes"] = sum_of_hw_scores / sum_of_hw_max
+final_data["Total Quizzes"] = sum_of_quiz_scores / sum_of_quiz_max
 
 average_quiz_scores = (quiz_scores / quiz_max_points).sum(axis=1)
 final_data["Average Quizzes"] = average_quiz_scores / quiz_scores.shape[1]
 
@@ -30,7 +30,7 @@
 
 hw_exam_grades = pd.read_csv(
     DATA_FOLDER / "hw_exam_grades.csv",
-    converters={"SID": str.lower, "Email Address": str.lower},
+    converters={"SID": str.lower},
     usecols=lambda x: "Submission" not in x,
     index_col="SID",
 )
@@ -90,7 +90,7 @@
 
 sum_of_quiz_scores = quiz_scores.sum(axis=1)
 sum_of_quiz_max = quiz_max_points.sum()
-final_data["Total Quizzes"] = sum_of_hw_scores / sum_of_hw_max
+final_data["Total Quizzes"] = sum_of_quiz_scores / sum_of_quiz_max
 
 average_quiz_scores = (quiz_scores / quiz_max_points).sum(axis=1)
 final_data["Average Quizzes"] = average_quiz_scores / quiz_scores.shape[1]
 
@@ -26,7 +26,7 @@
 
 hw_exam_grades = pd.read_csv(
     DATA_FOLDER / "hw_exam_grades.csv",
-    converters={"SID": str.lower, "Email Address": str.lower},
+    converters={"SID": str.lower},
     usecols=lambda x: "Submission" not in x,
     index_col="SID",
 )
@@ -78,7 +78,7 @@
 
 sum_of_quiz_scores = quiz_scores.sum(axis=1)
 sum_of_quiz_max = quiz_max_points.sum()
-final_data["Total Quizzes"] = sum_of_hw_scores / sum_of_hw_max
+final_data["Total Quizzes"] = sum_of_quiz_scores / sum_of_quiz_max
 
 average_quiz_scores = (quiz_scores / quiz_max_points).sum(axis=1)
 final_data["Average Quizzes"] = average_quiz_scores / quiz_scores.shape[1]
 
@@ -0,0 +1,38 @@
+# Practical K-Means Clustering in Python
+
+The Jupyter notebooks in this directory follow the code examples in Real Python's [Practical K-Means Clustering in Python](https://realpython.com/k-means-clustering-python/) article. The article is structured such that there are two main sections with code. The first section works with synthetic data. The second section starts when the TCGA cancer gene expression dataset is introduced.
+
+## Getting Started
+
+Follow the instructions below to get up and running with a Jupyter notebook and all the code from the article.
+
+### Install Dependencies
+
+These notebooks have dependencies. One way to install these dependencies is to use the Anaconda Python distribution.
+
+```bash
+(base) $ conda install jupyter matplotlib numpy pandas seaborn scikit-learn
+(base) $ conda install -c conda-forge kneed
+```
+
+You can also install all the requirements using `pip` and the `requirements.txt` file included in this directory.
+
+```bash
+$ python3 -m pip install -r requirements.txt
+```
+
+### Synthetic Data Notebook
+
+Open the notebook that accompanies the sections of the article that work with synthetic data:
+
+```bash
+(base) $ jupyter notebook practical-kmeans-synthetic.ipynb
+```
+
+### Cancer Gene Expression Data Notebook
+
+Open the notebook that accompanies the sections of the article that work with TCGA cancer gene expression data:
+
+```bash
+(base) $ jupyter notebook practical-kmeans-cancer-gene-expression.ipynb
+```
Original file line number	Diff line number	Diff line change
`@@ -90,7 +90,7 @@ def create(person):`
`90`	`90`	`else:`
`91`	`91`	`abort(`
`92`	`92`	`406,`
`93`		`- "Peron with last name {lname} already exists".format(lname=lname),`
	`93`	`+ "Person with last name {lname} already exists".format(lname=lname),`
`94`	`94`	`)`
`95`	`95`
`96`	`96`
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@`
`27`	`27`
`28`	`28`	`hw_exam_grades = pd.read_csv(`
`29`	`29`	`DATA_FOLDER / "hw_exam_grades.csv",`
`30`		`- converters={"SID": str.lower, "Email Address": str.lower},`
	`30`	`+ converters={"SID": str.lower},`
`31`	`31`	`usecols=lambda x: "Submission" not in x,`
`32`	`32`	`index_col="SID",`
`33`	`33`	`)`