diff --git a/notebooks/seq_analysis.ipynb b/notebooks/seq_analysis.ipynb index dd45dc0..a7bac75 100644 --- a/notebooks/seq_analysis.ipynb +++ b/notebooks/seq_analysis.ipynb @@ -22,10 +22,7 @@ "metadata": {}, "source": [ "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Multiomics-Analytics-Group/course_protein_language_modeling/blob/main/notebooks/model_training.ipynb)\n", - "\n", - "\n", - "This is a version of the notebook from [Matthew Carrigan](https://huggingface.co/Rocketknight1) --- [here](https://huggingface.co/blog/deep-learning-with-proteins)." + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Multiomics-Analytics-Group/course_protein_language_modeling/blob/main/notebooks/seq_analysis.ipynb)" ] }, { @@ -111,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "1334524b-898b-4276-9c44-9016a10227d4", "metadata": {}, "outputs": [ @@ -152,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "c4e19716-4e0b-4607-90c1-4c1f7deeeab1", "metadata": {}, "outputs": [ @@ -186,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "id": "a7f1c4e4-0f9e-45ac-bbe7-f72bdd116219", "metadata": {}, "outputs": [ @@ -216,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "id": "fc25307c-ea84-4ef3-aa2d-91c7ea5f8515", "metadata": {}, "outputs": [ @@ -250,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "id": "6422e42d-4ead-46dc-810c-74ec9931b735", "metadata": {}, "outputs": [ @@ -260,7 +257,7 @@ "0.46875" ] }, - "execution_count": 7, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -292,7 +289,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "id": "af46ca6e-c4a3-4d21-be12-12e0ec252d62", "metadata": {}, "outputs": [ @@ -302,7 +299,7 @@ "'GATCGATGGGCCTATATAGGATCGAAAATCGC'" ] }, - "execution_count": 8, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -321,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "id": "e6e11edc-cd91-4012-8a37-5fa90959f375", "metadata": {}, "outputs": [ @@ -378,7 +375,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "id": "95e01c9b-cb39-4c58-9f06-c782d890125c", "metadata": {}, "outputs": [ @@ -650,7 +647,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "id": "a9aca611-085f-4864-9c8f-acc4e775c89b", "metadata": {}, "outputs": [], @@ -668,7 +665,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 15, "id": "ce1b70bf-1a3c-4c32-84c4-38caaa6905d1", "metadata": {}, "outputs": [], @@ -681,12 +678,12 @@ "id": "8d3275b5-c7ce-463d-acd9-c332a87b0b1b", "metadata": {}, "source": [ - "(Extra) 3. Which is the most abundant protein?" + "(Extra) 3. Which is the most abundant amino acid?" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 14, "id": "934a17ce-e409-4bd1-9c7f-5cd5166eade0", "metadata": {}, "outputs": [], @@ -756,16 +753,16 @@ " (Optional) Solution 3\n", " \n", " ```python\n", - " from Bio.SeqUtils.ProtParam import ProteinAnalysis\n", + " from Bio.SeqUtils.ProtParam import ProteinAnalysis\n", "\n", - " # instantiate\n", - " protein_sequence = ProteinAnalysis(pro)\n", - " \n", - " # get aa count\n", - " aa_count = protein_sequence.count_amino_acids()\n", - " \n", - " # most abdundant amino acid\n", - " print(max(aa_count)) # Y - Tyrosine\n", + " # instantiate\n", + " protein_sequence = ProteinAnalysis(pro)\n", + "\n", + " # get aa count\n", + " aa_count = protein_sequence.count_amino_acids()\n", + "\n", + " # most abdundant amino acid\n", + " print(max(aa_count, key=aa_count.get)) # L - Leucine\n", " ```\n", "\n", "\n",