@@ -51,58 +51,56 @@ Clustering::Clustering(const std::string &seqDB, const std::string &seqDBIndex,
5151 std::ifstream mappingStream (seqDB + " .lookup" );
5252 std::string line;
5353 unsigned int setkey = 0 ;
54+ unsigned int maxsetkey = 0 ;
5455 while (std::getline (mappingStream, line)) {
5556 std::vector<std::string> split = Util::split (line, " \t " );
5657 unsigned int key = strtoul (split[0 ].c_str (), NULL , 10 );
5758 setkey = strtoul (split[2 ].c_str (), NULL , 10 );
5859 keyToSet[key] = setkey;
60+ if (maxsetkey < setkey) {
61+ maxsetkey = setkey;
62+ }
5963 }
6064 for (size_t id = 0 ; id < originalseqDbr->getSize (); id++) {
6165 setToLength[keyToSet[seqIndex[id].id ]] += seqIndex[id].length ;
6266 keysInSeq[seqIndex[id].id ] = 1 ;
6367 }
64- unsigned int sourceLen = setkey + 1 ;
68+ unsigned int sourceLen = maxsetkey + 1 ;
6569 seqnum = setToLength.size ();
6670 sourceList = new (std::nothrow) unsigned int [lastKey];
67- sourceOffsets = new (std::nothrow) size_t [sourceLen + 1 ];
71+ sourceOffsets = new (std::nothrow) size_t [sourceLen + 1 ]() ;
6872 sourceLookupTable = new (std::nothrow) unsigned int *[sourceLen];
73+ size_t * sourceOffsetsDecrease = new (std::nothrow) size_t [sourceLen + 1 ]();
6974
7075 mappingStream.close ();
7176 mappingStream.open (seqDB + " .lookup" );
77+
78+ line = " " ;
79+ while (std::getline (mappingStream, line)) {
80+ std::vector<std::string> split = Util::split (line, " \t " );
81+ setkey = strtoul (split[2 ].c_str (), NULL , 10 );
82+ sourceOffsets[setkey]++;
83+ sourceOffsetsDecrease[setkey]++;
84+ }
85+ AlignmentSymmetry::computeOffsetFromCounts (sourceOffsets, sourceLen);
86+ AlignmentSymmetry::setupPointers<unsigned int >(sourceList, sourceLookupTable, sourceOffsets, sourceLen, lastKey);
87+
88+ mappingStream.close ();
89+ mappingStream.open (seqDB + " .lookup" );
90+
7291 line = " " ;
73- unsigned int prevsetkey = UINT_MAX;
74- size_t n = 0 ;
75- size_t lookupOrder = 0 ;
76- setkey = UINT_MAX;
7792 while (std::getline (mappingStream, line)) {
7893 std::vector<std::string> split = Util::split (line, " \t " );
7994 unsigned int key = strtoul (split[0 ].c_str (), NULL , 10 );
8095 setkey = strtoul (split[2 ].c_str (), NULL , 10 );
81- if (setkey != prevsetkey) {
82- if (prevsetkey != UINT_MAX){
83- sourceOffsets[prevsetkey] = n;
84- for (size_t k = prevsetkey+1 ; k<setkey; k++) {
85- sourceOffsets[k] = 0 ;
86- }
87- }
88- prevsetkey = setkey;
89- if (keysInSeq[key] == 1 ) {
90- sourceKeyVec.emplace_back (setkey);
91- }
92- n = 0 ;
93- }
96+ size_t order = sourceOffsets[setkey + 1 ] - sourceOffsetsDecrease[setkey];
9497 if (keysInSeq[key] == 1 ) {
95- sourceList[lookupOrder ] = key;
98+ sourceList[order ] = key;
9699 } else {
97- sourceList[lookupOrder ] = UINT_MAX;
100+ sourceList[order ] = UINT_MAX;
98101 }
99- n++;
100- lookupOrder++;
102+ sourceOffsetsDecrease[setkey]--;
101103 }
102- sourceOffsets[prevsetkey] = n;
103- AlignmentSymmetry::computeOffsetFromCounts (sourceOffsets, sourceLen);
104- AlignmentSymmetry::setupPointers<unsigned int >(sourceList, sourceLookupTable, sourceOffsets, sourceLen, lastKey);
105-
106104 char * data = (char *)malloc (
107105 sizeof (size_t ) +
108106 sizeof (size_t ) +
@@ -114,7 +112,7 @@ Clustering::Clustering(const std::string &seqDB, const std::string &seqDBIndex,
114112
115113 std::vector<DBReader<unsigned int >::Index*> indexStorage (seqnum);
116114
117- n = 0 ;
115+ size_t n = 0 ;
118116 for (const auto & pairs : setToLength) {
119117 indexStorage[n] = new DBReader<unsigned int >::Index;
120118 indexStorage[n]->id = pairs.first ;
0 commit comments