-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnum9Kmers.cpp
More file actions
84 lines (69 loc) · 2.48 KB
/
num9Kmers.cpp
File metadata and controls
84 lines (69 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#include <iostream>
#include <fstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
using namespace std;
// Function to construct the frequency table for a given window
unordered_map<string, int> FrequencyTable(const string &window, int k) {
unordered_map<string, int> freqMap;
int n = window.length();
for (int i = 0; i <= n - k; i++) {
string pattern = window.substr(i, k);
freqMap[pattern]++;
}
return freqMap;
}
void findClumps(const string &text, int k, int L, int t) {
unordered_set<string> clumpKmers; // Store unique k-mers forming clumps
int n = text.length();
// Slide the window through the text
for (int i = 0; i <= n - L; i++) {
string window = text.substr(i, L);
// Build frequency table for the current window
unordered_map<string, int> freqMap = FrequencyTable(window, k);
// Check for k-mers that meet or exceed the threshold t
for (const auto &entry : freqMap) {
if (entry.second >= t) {
clumpKmers.insert(entry.first);
}
}
}
// Output the number of unique k-mers forming (L,t)-clumps
cout << "Total number of unique 9-mers forming (500,3)-clumps: " << clumpKmers.size() << endl;
}
int main() {
int k = 9; // Length of the k-mer
int L = 500; // Window length
int t = 3; // Minimum number of occurrences within the window
string fileName;
cout << "Enter the filename containing the DNA sequence: ";
cin >> fileName;
// Open the file
ifstream inputFile(fileName);
if (!inputFile) {
cerr << "Failed to open the file: " << fileName << endl;
return 1;
}
// Read the DNA sequence from the file
string text;
string line;
while (getline(inputFile, line)) {
// Remove any whitespace
line.erase(remove_if(line.begin(), line.end(), ::isspace), line.end());
text += line;
}
inputFile.close();
// Convert text to uppercase
transform(text.begin(), text.end(), text.begin(), ::toupper);
// Validate the DNA sequence
for (char nucleotide : text) {
if (nucleotide != 'A' && nucleotide != 'C' && nucleotide != 'G' && nucleotide != 'T') {
cerr << "Invalid nucleotide '" << nucleotide << "' in DNA sequence." << endl;
return 1;
}
}
findClumps(text, k, L, t);
return 0;
}