Multiomics-Analytics-Group · angelphanth · Aug 12, 2025
diff --git a/notebooks/seq_analysis.ipynb b/notebooks/seq_analysis.ipynb
@@ -22,10 +22,7 @@
    "metadata": {},
    "source": [
     "\n",
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Multiomics-Analytics-Group/course_protein_language_modeling/blob/main/notebooks/model_training.ipynb)\n",
-    "\n",
-    "\n",
-    "This is a version of the notebook from [Matthew Carrigan](https://huggingface.co/Rocketknight1) --- [here](https://huggingface.co/blog/deep-learning-with-proteins)."
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Multiomics-Analytics-Group/course_protein_language_modeling/blob/main/notebooks/seq_analysis.ipynb)"
    ]
   },
   {
@@ -111,7 +108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "id": "1334524b-898b-4276-9c44-9016a10227d4",
    "metadata": {},
    "outputs": [
@@ -152,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "id": "c4e19716-4e0b-4607-90c1-4c1f7deeeab1",
    "metadata": {},
    "outputs": [
@@ -186,7 +183,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "id": "a7f1c4e4-0f9e-45ac-bbe7-f72bdd116219",
    "metadata": {},
    "outputs": [
@@ -216,7 +213,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "id": "fc25307c-ea84-4ef3-aa2d-91c7ea5f8515",
    "metadata": {},
    "outputs": [
@@ -250,7 +247,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "id": "6422e42d-4ead-46dc-810c-74ec9931b735",
    "metadata": {},
    "outputs": [
@@ -260,7 +257,7 @@
        "0.46875"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -292,7 +289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
    "id": "af46ca6e-c4a3-4d21-be12-12e0ec252d62",
    "metadata": {},
    "outputs": [
@@ -302,7 +299,7 @@
        "'GATCGATGGGCCTATATAGGATCGAAAATCGC'"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -321,7 +318,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
    "id": "e6e11edc-cd91-4012-8a37-5fa90959f375",
    "metadata": {},
    "outputs": [
@@ -378,7 +375,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
    "id": "95e01c9b-cb39-4c58-9f06-c782d890125c",
    "metadata": {},
    "outputs": [
@@ -650,7 +647,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 16,
    "id": "a9aca611-085f-4864-9c8f-acc4e775c89b",
    "metadata": {},
    "outputs": [],
@@ -668,7 +665,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 15,
    "id": "ce1b70bf-1a3c-4c32-84c4-38caaa6905d1",
    "metadata": {},
    "outputs": [],
@@ -681,12 +678,12 @@
    "id": "8d3275b5-c7ce-463d-acd9-c332a87b0b1b",
    "metadata": {},
    "source": [
-    "(Extra) 3. Which is the most abundant protein?"
+    "(Extra) 3. Which is the most abundant amino acid?"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 14,
    "id": "934a17ce-e409-4bd1-9c7f-5cd5166eade0",
    "metadata": {},
    "outputs": [],
@@ -756,16 +753,16 @@
     "  <summary><font color=\"green\"><b>(Optional) Solution 3</b></font></summary>\n",
     "    \n",
     "  ```python\n",
-    "    from Bio.SeqUtils.ProtParam import ProteinAnalysis\n",
+    "  from Bio.SeqUtils.ProtParam import ProteinAnalysis\n",
     "\n",
-    "    # instantiate\n",
-    "    protein_sequence = ProteinAnalysis(pro)\n",
-    "    \n",
-    "    # get aa count\n",
-    "    aa_count = protein_sequence.count_amino_acids()\n",
-    "    \n",
-    "    # most abdundant amino acid\n",
-    "    print(max(aa_count)) # Y - Tyrosine\n",
+    "  # instantiate\n",
+    "  protein_sequence = ProteinAnalysis(pro)\n",
+    "\n",
+    "  # get aa count\n",
+    "  aa_count = protein_sequence.count_amino_acids()\n",
+    "\n",
+    "  # most abdundant amino acid\n",
+    "  print(max(aa_count, key=aa_count.get)) # L - Leucine\n",
     "  ```\n",
     "</details>\n",
     "\n",