diff --git a/config.json b/config.json index 2db6e13..764ece6 100644 --- a/config.json +++ b/config.json @@ -258,6 +258,14 @@ "prerequisites": [], "difficulty": 8 }, + { + "slug": "hamming", + "name": "Hamming", + "uuid": "d1ea133e-1d3f-4894-975e-6ab91aa33a35", + "practices": [], + "prerequisites": [], + "difficulty": 8 + }, { "slug": "high-scores", "name": "High Scores", diff --git a/exercises/practice/hamming/.docs/instructions.md b/exercises/practice/hamming/.docs/instructions.md new file mode 100644 index 0000000..8f47a17 --- /dev/null +++ b/exercises/practice/hamming/.docs/instructions.md @@ -0,0 +1,16 @@ +# Instructions + +Calculate the Hamming distance between two DNA strands. + +We read DNA using the letters C, A, G and T. +Two strands might look like this: + + GAGCCTACTAACGGGAT + CATCGTAATGACGGCCT + ^ ^ ^ ^ ^ ^^ + +They have 7 differences, and therefore the Hamming distance is 7. + +## Implementation notes + +The Hamming distance is only defined for sequences of equal length, so an attempt to calculate it between sequences of different lengths should not work. diff --git a/exercises/practice/hamming/.docs/introduction.md b/exercises/practice/hamming/.docs/introduction.md new file mode 100644 index 0000000..8419bf4 --- /dev/null +++ b/exercises/practice/hamming/.docs/introduction.md @@ -0,0 +1,12 @@ +# Introduction + +Your body is made up of cells that contain DNA. +Those cells regularly wear out and need replacing, which they achieve by dividing into daughter cells. +In fact, the average human body experiences about 10 quadrillion cell divisions in a lifetime! + +When cells divide, their DNA replicates too. +Sometimes during this process mistakes happen and single pieces of DNA get encoded with the incorrect information. +If we compare two strands of DNA and count the differences between them, we can see how many mistakes occurred. +This is known as the "Hamming distance". + +The Hamming distance is useful in many areas of science, not just biology, so it's a nice phrase to be familiar with :) diff --git a/exercises/practice/hamming/.meta/config.json b/exercises/practice/hamming/.meta/config.json new file mode 100644 index 0000000..e803823 --- /dev/null +++ b/exercises/practice/hamming/.meta/config.json @@ -0,0 +1,19 @@ +{ + "authors": [ + "jimmytty" + ], + "files": { + "solution": [ + "hamming.sql" + ], + "test": [ + "hamming_test.sql" + ], + "example": [ + ".meta/example.sql" + ] + }, + "blurb": "Calculate the Hamming distance between two DNA strands.", + "source": "The Calculating Point Mutations problem at Rosalind", + "source_url": "https://rosalind.info/problems/hamm/" +} diff --git a/exercises/practice/hamming/.meta/example.sql b/exercises/practice/hamming/.meta/example.sql new file mode 100644 index 0000000..b1d0e8c --- /dev/null +++ b/exercises/practice/hamming/.meta/example.sql @@ -0,0 +1,14 @@ +UPDATE hamming + SET result = ( + WITH RECURSIVE rcte(string1, string2, char1, char2) AS ( + VALUES(strand1, strand2, '', '') + UNION ALL + SELECT SUBSTRING(string1, 2), SUBSTRING(string2, 2), + SUBSTRING(string1, 1, 1), SUBSTRING(string2, 1, 1) + FROM rcte + WHERE string1 <> '' + ) + SELECT COUNT(*) + FROM rcte + WHERE char1 != char2 +); diff --git a/exercises/practice/hamming/.meta/tests.toml b/exercises/practice/hamming/.meta/tests.toml new file mode 100644 index 0000000..ed5e4f9 --- /dev/null +++ b/exercises/practice/hamming/.meta/tests.toml @@ -0,0 +1,71 @@ +# This is an auto-generated file. +# +# Regenerating this file via `configlet sync` will: +# - Recreate every `description` key/value pair +# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications +# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) +# - Preserve any other key/value pair +# +# As user-added comments (using the # character) will be removed when this file +# is regenerated, comments can be added via a `comment` key. + +[f6dcb64f-03b0-4b60-81b1-3c9dbf47e887] +description = "empty strands" + +[54681314-eee2-439a-9db0-b0636c656156] +description = "single letter identical strands" + +[294479a3-a4c8-478f-8d63-6209815a827b] +description = "single letter different strands" + +[9aed5f34-5693-4344-9b31-40c692fb5592] +description = "long identical strands" + +[cd2273a5-c576-46c8-a52b-dee251c3e6e5] +description = "long different strands" + +[919f8ef0-b767-4d1b-8516-6379d07fcb28] +description = "disallow first strand longer" +include = false + +[b9228bb1-465f-4141-b40f-1f99812de5a8] +description = "disallow first strand longer" +reimplements = "919f8ef0-b767-4d1b-8516-6379d07fcb28" +include = false + +[8a2d4ed0-ead5-4fdd-924d-27c4cf56e60e] +description = "disallow second strand longer" +include = false + +[dab38838-26bb-4fff-acbe-3b0a9bfeba2d] +description = "disallow second strand longer" +reimplements = "8a2d4ed0-ead5-4fdd-924d-27c4cf56e60e" +include = false + +[5dce058b-28d4-4ca7-aa64-adfe4e17784c] +description = "disallow left empty strand" +include = false + +[db92e77e-7c72-499d-8fe6-9354d2bfd504] +description = "disallow left empty strand" +include = false +reimplements = "5dce058b-28d4-4ca7-aa64-adfe4e17784c" + +[b764d47c-83ff-4de2-ab10-6cfe4b15c0f3] +description = "disallow empty first strand" +reimplements = "db92e77e-7c72-499d-8fe6-9354d2bfd504" +include = false + +[38826d4b-16fb-4639-ac3e-ba027dec8b5f] +description = "disallow right empty strand" +include = false + +[920cd6e3-18f4-4143-b6b8-74270bb8f8a3] +description = "disallow right empty strand" +include = false +reimplements = "38826d4b-16fb-4639-ac3e-ba027dec8b5f" + +[9ab9262f-3521-4191-81f5-0ed184a5aa89] +description = "disallow empty second strand" +reimplements = "920cd6e3-18f4-4143-b6b8-74270bb8f8a3" +include = false \ No newline at end of file diff --git a/exercises/practice/hamming/create_fixture.sql b/exercises/practice/hamming/create_fixture.sql new file mode 100644 index 0000000..c21daba --- /dev/null +++ b/exercises/practice/hamming/create_fixture.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS hamming; +CREATE TABLE hamming ( + strand1 TEXT NOT NULL, + strand2 TEXT NOT NULL, + result INTEGER +); + +.mode csv +.import ./data.csv hamming + +UPDATE hamming SET result = NULL; diff --git a/exercises/practice/hamming/create_test_table.sql b/exercises/practice/hamming/create_test_table.sql new file mode 100644 index 0000000..1ee31dd --- /dev/null +++ b/exercises/practice/hamming/create_test_table.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS tests; +CREATE TABLE IF NOT EXISTS tests ( + -- uuid and description are taken from the test.toml file + uuid TEXT PRIMARY KEY, + description TEXT NOT NULL, + -- The following section is needed by the online test-runner + status TEXT DEFAULT 'fail', + message TEXT, + output TEXT, + test_code TEXT, + task_id INTEGER DEFAULT NULL, + -- Here are columns for the actual tests + strand1 TEXT NOT NULL, + strand2 TEXT NOT NULL, + expected INTEGER NOT NULL +); + +INSERT INTO tests (uuid, description, strand1, strand2, expected) + VALUES + ('f6dcb64f-03b0-4b60-81b1-3c9dbf47e887','empty strands','','',0), + ('54681314-eee2-439a-9db0-b0636c656156','single letter identical strands','A','A',0), + ('294479a3-a4c8-478f-8d63-6209815a827b','single letter different strands','G','T',1), + ('9aed5f34-5693-4344-9b31-40c692fb5592','long identical strands','GGACTGAAATCTG','GGACTGAAATCTG',0), + ('cd2273a5-c576-46c8-a52b-dee251c3e6e5','long different strands','GGACGGATTCTG','AGGACGGATTCT',9); diff --git a/exercises/practice/hamming/data.csv b/exercises/practice/hamming/data.csv new file mode 100644 index 0000000..acf6c56 --- /dev/null +++ b/exercises/practice/hamming/data.csv @@ -0,0 +1,5 @@ +"","", +"A","A", +"G","T", +"GGACTGAAATCTG","GGACTGAAATCTG", +"GGACGGATTCTG","AGGACGGATTCT", diff --git a/exercises/practice/hamming/hamming.sql b/exercises/practice/hamming/hamming.sql new file mode 100644 index 0000000..404d9b1 --- /dev/null +++ b/exercises/practice/hamming/hamming.sql @@ -0,0 +1,7 @@ +-- Schema: +-- CREATE TABLE hamming ( +-- strand1 TEXT NOT NULL, +-- strand2 TEXT NOT NULL, +-- result INTEGER +-- ); +-- Task: update the hamming table and set the result based on the comparison between strand1 and strand2 diff --git a/exercises/practice/hamming/hamming_test.sql b/exercises/practice/hamming/hamming_test.sql new file mode 100644 index 0000000..5e5c5e5 --- /dev/null +++ b/exercises/practice/hamming/hamming_test.sql @@ -0,0 +1,40 @@ +-- Create database: +.read ./create_fixture.sql + +-- Read user student solution and save any output as markdown in user_output.md: +.mode markdown +.output user_output.md +.read ./hamming.sql +.output + +-- Create a clean testing environment: +.read ./create_test_table.sql + +-- Comparison of user input and the tests updates the status for each test: +UPDATE tests +SET status = 'pass' +FROM (SELECT strand1, strand2, result FROM hamming) AS actual +WHERE (actual.strand1, actual.strand2, actual.result) = (tests.strand1, tests.strand2, tests.expected); + +-- Update message for failed tests to give helpful information: +UPDATE tests +SET message = ( + 'Result for "' + || PRINTF('strand1=''%s'' and strand2=''%s''', actual.strand1, actual.strand2) + || '"' + || ' is <' || COALESCE(actual.result, 'NULL') + || '> but should be <' || tests.expected || '>' +) +FROM (SELECT strand1, strand2, result FROM hamming) AS actual +WHERE (actual.strand1, actual.strand2) = (tests.strand1, tests.strand2) AND tests.status = 'fail'; + +-- Save results to ./output.json (needed by the online test-runner) +.mode json +.once './output.json' +SELECT description, status, message, output, test_code, task_id +FROM tests; + +-- Display test results in readable form for the student: +.mode table +SELECT description, status, message +FROM tests;