Skip to content

Commit 7be3423

Browse files
authored
Add new practice exercise: hamming (#113)
1 parent facd179 commit 7be3423

File tree

11 files changed

+242
-0
lines changed

11 files changed

+242
-0
lines changed

config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,14 @@
298298
"prerequisites": [],
299299
"difficulty": 8
300300
},
301+
{
302+
"slug": "hamming",
303+
"name": "Hamming",
304+
"uuid": "d1ea133e-1d3f-4894-975e-6ab91aa33a35",
305+
"practices": [],
306+
"prerequisites": [],
307+
"difficulty": 8
308+
},
301309
{
302310
"slug": "high-scores",
303311
"name": "High Scores",
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Instructions
2+
3+
Calculate the Hamming distance between two DNA strands.
4+
5+
We read DNA using the letters C, A, G and T.
6+
Two strands might look like this:
7+
8+
GAGCCTACTAACGGGAT
9+
CATCGTAATGACGGCCT
10+
^ ^ ^ ^ ^ ^^
11+
12+
They have 7 differences, and therefore the Hamming distance is 7.
13+
14+
## Implementation notes
15+
16+
The Hamming distance is only defined for sequences of equal length, so an attempt to calculate it between sequences of different lengths should not work.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Introduction
2+
3+
Your body is made up of cells that contain DNA.
4+
Those cells regularly wear out and need replacing, which they achieve by dividing into daughter cells.
5+
In fact, the average human body experiences about 10 quadrillion cell divisions in a lifetime!
6+
7+
When cells divide, their DNA replicates too.
8+
Sometimes during this process mistakes happen and single pieces of DNA get encoded with the incorrect information.
9+
If we compare two strands of DNA and count the differences between them, we can see how many mistakes occurred.
10+
This is known as the "Hamming distance".
11+
12+
The Hamming distance is useful in many areas of science, not just biology, so it's a nice phrase to be familiar with :)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"authors": [
3+
"jimmytty"
4+
],
5+
"files": {
6+
"solution": [
7+
"hamming.sql"
8+
],
9+
"test": [
10+
"hamming_test.sql"
11+
],
12+
"example": [
13+
".meta/example.sql"
14+
]
15+
},
16+
"blurb": "Calculate the Hamming distance between two DNA strands.",
17+
"source": "The Calculating Point Mutations problem at Rosalind",
18+
"source_url": "https://rosalind.info/problems/hamm/"
19+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
UPDATE hamming
2+
SET error = 'strands must be of equal length'
3+
WHERE LENGTH(strand1) != LENGTH(strand2)
4+
;
5+
6+
UPDATE hamming
7+
SET result = (
8+
WITH RECURSIVE rcte(string1, string2, char1, char2) AS (
9+
VALUES(strand1, strand2, '', '')
10+
UNION ALL
11+
SELECT SUBSTRING(string1, 2), SUBSTRING(string2, 2),
12+
SUBSTRING(string1, 1, 1), SUBSTRING(string2, 1, 1)
13+
FROM rcte
14+
WHERE string1 <> ''
15+
)
16+
SELECT COUNT(*)
17+
FROM rcte
18+
WHERE char1 != char2
19+
)
20+
WHERE LENGTH(strand1) = LENGTH(strand2)
21+
;
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# This is an auto-generated file.
2+
#
3+
# Regenerating this file via `configlet sync` will:
4+
# - Recreate every `description` key/value pair
5+
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
6+
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
7+
# - Preserve any other key/value pair
8+
#
9+
# As user-added comments (using the # character) will be removed when this file
10+
# is regenerated, comments can be added via a `comment` key.
11+
12+
[f6dcb64f-03b0-4b60-81b1-3c9dbf47e887]
13+
description = "empty strands"
14+
15+
[54681314-eee2-439a-9db0-b0636c656156]
16+
description = "single letter identical strands"
17+
18+
[294479a3-a4c8-478f-8d63-6209815a827b]
19+
description = "single letter different strands"
20+
21+
[9aed5f34-5693-4344-9b31-40c692fb5592]
22+
description = "long identical strands"
23+
24+
[cd2273a5-c576-46c8-a52b-dee251c3e6e5]
25+
description = "long different strands"
26+
27+
[919f8ef0-b767-4d1b-8516-6379d07fcb28]
28+
description = "disallow first strand longer"
29+
include = false
30+
31+
[b9228bb1-465f-4141-b40f-1f99812de5a8]
32+
description = "disallow first strand longer"
33+
reimplements = "919f8ef0-b767-4d1b-8516-6379d07fcb28"
34+
35+
[8a2d4ed0-ead5-4fdd-924d-27c4cf56e60e]
36+
description = "disallow second strand longer"
37+
include = false
38+
39+
[dab38838-26bb-4fff-acbe-3b0a9bfeba2d]
40+
description = "disallow second strand longer"
41+
reimplements = "8a2d4ed0-ead5-4fdd-924d-27c4cf56e60e"
42+
43+
[5dce058b-28d4-4ca7-aa64-adfe4e17784c]
44+
description = "disallow left empty strand"
45+
include = false
46+
47+
[db92e77e-7c72-499d-8fe6-9354d2bfd504]
48+
description = "disallow left empty strand"
49+
include = false
50+
reimplements = "5dce058b-28d4-4ca7-aa64-adfe4e17784c"
51+
52+
[b764d47c-83ff-4de2-ab10-6cfe4b15c0f3]
53+
description = "disallow empty first strand"
54+
reimplements = "db92e77e-7c72-499d-8fe6-9354d2bfd504"
55+
56+
[38826d4b-16fb-4639-ac3e-ba027dec8b5f]
57+
description = "disallow right empty strand"
58+
include = false
59+
60+
[920cd6e3-18f4-4143-b6b8-74270bb8f8a3]
61+
description = "disallow right empty strand"
62+
include = false
63+
reimplements = "38826d4b-16fb-4639-ac3e-ba027dec8b5f"
64+
65+
[9ab9262f-3521-4191-81f5-0ed184a5aa89]
66+
description = "disallow empty second strand"
67+
reimplements = "920cd6e3-18f4-4143-b6b8-74270bb8f8a3"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
DROP TABLE IF EXISTS hamming;
2+
CREATE TABLE hamming (
3+
strand1 TEXT NOT NULL,
4+
strand2 TEXT NOT NULL,
5+
result INTEGER,
6+
error TEXT
7+
);
8+
9+
.mode csv
10+
.import ./data.csv hamming
11+
12+
UPDATE hamming SET result = NULL, error = NULL;
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
DROP TABLE IF EXISTS tests;
2+
CREATE TABLE IF NOT EXISTS tests (
3+
-- uuid and description are taken from the test.toml file
4+
uuid TEXT PRIMARY KEY,
5+
description TEXT NOT NULL,
6+
-- The following section is needed by the online test-runner
7+
status TEXT DEFAULT 'fail',
8+
message TEXT,
9+
output TEXT,
10+
test_code TEXT,
11+
task_id INTEGER DEFAULT NULL,
12+
-- Here are columns for the actual tests
13+
strand1 TEXT NOT NULL,
14+
strand2 TEXT NOT NULL,
15+
expected_result INTEGER,
16+
expected_error TEXT
17+
);
18+
19+
INSERT INTO tests (uuid, description, strand1, strand2, expected_result, expected_error)
20+
VALUES
21+
('f6dcb64f-03b0-4b60-81b1-3c9dbf47e887', 'empty strands', '', '', 0, null),
22+
('54681314-eee2-439a-9db0-b0636c656156', 'single letter identical strands', 'A', 'A', 0, null),
23+
('294479a3-a4c8-478f-8d63-6209815a827b', 'single letter different strands', 'G', 'T', 1, null),
24+
('9aed5f34-5693-4344-9b31-40c692fb5592', 'long identical strands', 'GGACTGAAATCTG', 'GGACTGAAATCTG', 0, null),
25+
('cd2273a5-c576-46c8-a52b-dee251c3e6e5', 'long different strands', 'GGACGGATTCTG', 'AGGACGGATTCT', 9, null),
26+
('b9228bb1-465f-4141-b40f-1f99812de5a8', 'disallow first strand longer', 'AATG', 'AAA', null, 'strands must be of equal length'),
27+
('dab38838-26bb-4fff-acbe-3b0a9bfeba2d', 'disallow second strand longer', 'ATA', 'AGTG', null, 'strands must be of equal length'),
28+
('b764d47c-83ff-4de2-ab10-6cfe4b15c0f3', 'disallow empty first strand', '', 'G', null, 'strands must be of equal length'),
29+
('9ab9262f-3521-4191-81f5-0ed184a5aa89', 'disallow empty second strand', 'G', '', null, 'strands must be of equal length');

exercises/practice/hamming/data.csv

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
"","","",""
2+
"A","A","",""
3+
"G","T","",""
4+
"GGACTGAAATCTG","GGACTGAAATCTG","",""
5+
"GGACGGATTCTG","AGGACGGATTCT","",""
6+
"AATG","AAA","",""
7+
"ATA","AGTG","",""
8+
"","G","",""
9+
"G","","",""
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
-- Schema:
2+
-- CREATE TABLE hamming (
3+
-- strand1 TEXT NOT NULL,
4+
-- strand2 TEXT NOT NULL,
5+
-- result INTEGER,
6+
-- error TEXT
7+
-- );
8+
--
9+
-- Task: update the hamming table and set the result column or the error column based on the comparison between strand1 and strand2.

0 commit comments

Comments
 (0)