Skip to content

Commit e0384a4

Browse files
committed
fixed bug in qcFoundRepeats
found that one of the tests was unreachable in some cases from what looks like a copying error. Refactored the function to make it easier to read.
1 parent fd0ecc1 commit e0384a4

File tree

2 files changed

+116
-101
lines changed

2 files changed

+116
-101
lines changed

src/crass/libcrispr.cpp

Lines changed: 106 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -763,8 +763,101 @@ unsigned int extendPreRepeat(ReadHolder& tmp_holder, int searchWindowLength, in
763763
return static_cast<unsigned int>(tmp_holder.getRepeatLength());
764764

765765
}
766+
bool testSpacerLength(int minSpacerLength, int maxSpacerLength, int minAllowedSpacerLength, int maxAllowedSpacerLength)
767+
{
768+
/*
769+
* MAX AND MIN SPACER LENGTHS
770+
*/
771+
if (minSpacerLength < minAllowedSpacerLength)
772+
{
773+
#ifdef DEBUG
774+
logInfo("\tFailed test 4a. Min spacer length out of range: "<<minSpacerLength<<" < "<<minAllowedSpacerLength, 8);
775+
#endif
776+
return false;
777+
}
778+
#ifdef DEBUG
779+
logInfo("\tPassed test 4a. Min spacer length within range: "<<minSpacerLength<<" > "<<minAllowedSpacerLength, 8);
780+
#endif
781+
if (maxSpacerLength > maxAllowedSpacerLength)
782+
{
783+
#ifdef DEBUG
784+
logInfo("\tFailed test 4b. Max spacer length out of range: "<<maxSpacerLength<<" > "<<maxAllowedSpacerLength, 8);
785+
#endif
786+
return false;
787+
}
788+
#ifdef DEBUG
789+
logInfo("\tPassed test 4b. Max spacer length within range: "<<maxSpacerLength<<" < "<<maxAllowedSpacerLength, 8);
790+
#endif
766791

792+
return true;
793+
}
767794

795+
bool testSpacerRepeatSimilarity(float similarity)
796+
{
797+
if (similarity > CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY)
798+
{
799+
#ifdef DEBUG
800+
logInfo("\tFailed test 5b. Spacers are too similar to the repeat: "<<similarity<<" > "<<CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY, 8);
801+
#endif
802+
return false;
803+
}
804+
#ifdef DEBUG
805+
logInfo("\tPassed test 5b. Spacers are not too similar to the repeat: "<<similarity<<" < "<<CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY, 8);
806+
#endif
807+
return true;
808+
}
809+
810+
bool testSpacerSpacerSimilarity(float similarity)
811+
{
812+
/*
813+
* REPEAT AND SPACER CONTENT SIMILARITIES
814+
*/
815+
if (similarity > CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY)
816+
{
817+
#ifdef DEBUG
818+
logInfo("\tFailed test 5a. Spacers are too similar: "<<similarity<<" > "<<CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY, 8);
819+
#endif
820+
return false;
821+
}
822+
#ifdef DEBUG
823+
logInfo("\tPassed test 5a. Spacers are not too similar: "<<similarity<<" < "<<CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY, 8);
824+
#endif
825+
826+
return true;
827+
}
828+
829+
bool testSpacerSpacerLengthDiff(int difference)
830+
{
831+
/*
832+
* REPEAT AND SPACER LENGTH SIMILARITIES
833+
*/
834+
if (difference > CRASS_DEF_SPACER_TO_SPACER_LENGTH_DIFF)
835+
{
836+
#ifdef DEBUG
837+
logInfo("\tFailed test 6a. Spacer lengths differ too much: "<<difference<<" > "<<CRASS_DEF_SPACER_TO_SPACER_LENGTH_DIFF, 8);
838+
#endif
839+
return false;
840+
}
841+
#ifdef DEBUG
842+
logInfo("\tPassed test 6a. Spacer lengths do not differ too much: "<<difference<<" < "<<CRASS_DEF_SPACER_TO_SPACER_LENGTH_DIFF, 8);
843+
#endif
844+
return true;
845+
}
846+
bool testRepeatSpacerLengthDiff(int difference)
847+
{
848+
849+
if (difference > CRASS_DEF_SPACER_TO_REPEAT_LENGTH_DIFF)
850+
{
851+
#ifdef DEBUG
852+
logInfo("\tFailed test 6b. Repeat to spacer lengths differ too much: "<<difference<<" > "<<CRASS_DEF_SPACER_TO_REPEAT_LENGTH_DIFF, 8);
853+
#endif
854+
return false;
855+
}
856+
#ifdef DEBUG
857+
logInfo("\tPassed test 6b. Repeat to spacer lengths do not differ too much: "<<difference<<" < "<<CRASS_DEF_SPACER_TO_REPEAT_LENGTH_DIFF, 8);
858+
#endif
859+
return true;
860+
}
768861
//need at least two elements
769862
bool qcFoundRepeats(ReadHolder& tmp_holder, int minSpacerLength, int maxSpacerLength)
770863
{
@@ -881,136 +974,48 @@ bool qcFoundRepeats(ReadHolder& tmp_holder, int minSpacerLength, int maxSpacerLe
881974
ave_spacer_to_spacer_len_difference = abs(ave_spacer_to_spacer_len_difference /= static_cast<float>(num_compared));
882975
ave_repeat_to_spacer_len_difference = abs(ave_repeat_to_spacer_len_difference /= static_cast<float>(num_compared));
883976

884-
/*
885-
* MAX AND MIN SPACER LENGTHS
886-
*/
887-
if (min_spacer_length < minSpacerLength)
888-
{
889-
#ifdef DEBUG
890-
logInfo("\tFailed test 4a. Min spacer length out of range: "<<min_spacer_length<<" < "<<minSpacerLength, 8);
891-
#endif
977+
978+
if(! testSpacerLength(min_spacer_length, max_spacer_length, minSpacerLength, maxSpacerLength)) {
892979
return false;
893980
}
894-
#ifdef DEBUG
895-
logInfo("\tPassed test 4a. Min spacer length within range: "<<min_spacer_length<<" > "<<minSpacerLength, 8);
896-
#endif
897-
if (max_spacer_length > maxSpacerLength)
981+
if(! testSpacerSpacerSimilarity(ave_spacer_to_spacer_difference))
898982
{
899-
#ifdef DEBUG
900-
logInfo("\tFailed test 4b. Max spacer length out of range: "<<max_spacer_length<<" > "<<maxSpacerLength, 8);
901-
#endif
902983
return false;
903984
}
904-
#ifdef DEBUG
905-
logInfo("\tPassed test 4b. Max spacer length within range: "<<max_spacer_length<<" < "<<maxSpacerLength, 8);
906-
#endif
907-
908-
/*
909-
* REPEAT AND SPACER CONTENT SIMILARITIES
910-
*/
911-
if (ave_spacer_to_spacer_difference > CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY)
985+
if(! testSpacerRepeatSimilarity(ave_repeat_to_spacer_difference))
912986
{
913-
#ifdef DEBUG
914-
logInfo("\tFailed test 5a. Spacers are too similar: "<<ave_spacer_to_spacer_difference<<" > "<<CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY, 8);
915-
#endif
916987
return false;
917988
}
918-
#ifdef DEBUG
919-
logInfo("\tPassed test 5a. Spacers are not too similar: "<<ave_spacer_to_spacer_difference<<" < "<<CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY, 8);
920-
#endif
921-
if (ave_repeat_to_spacer_difference > CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY)
922-
{
923-
#ifdef DEBUG
924-
logInfo("\tFailed test 5b. Spacers are too similar to the repeat: "<<ave_repeat_to_spacer_difference<<" > "<<CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY, 8);
925-
#endif
926-
return false;
927-
}
928-
#ifdef DEBUG
929-
logInfo("\tPassed test 5b. Spacers are not too similar to the repeat: "<<ave_repeat_to_spacer_difference<<" < "<<CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY, 8);
930-
#endif
931-
932-
/*
933-
* REPEAT AND SPACER LENGTH SIMILARITIES
934-
*/
935-
if (ave_spacer_to_spacer_len_difference > CRASS_DEF_SPACER_TO_SPACER_LENGTH_DIFF)
989+
if(! testSpacerSpacerLengthDiff(ave_spacer_to_spacer_len_difference))
936990
{
937-
#ifdef DEBUG
938-
logInfo("\tFailed test 6a. Spacer lengths differ too much: "<<ave_spacer_to_spacer_len_difference<<" > "<<CRASS_DEF_SPACER_TO_SPACER_LENGTH_DIFF, 8);
939-
#endif
940991
return false;
941992
}
942-
#ifdef DEBUG
943-
logInfo("\tPassed test 6a. Spacer lengths do not differ too much: "<<ave_spacer_to_spacer_len_difference<<" < "<<CRASS_DEF_SPACER_TO_SPACER_LENGTH_DIFF, 8);
944-
#endif
945-
if (ave_repeat_to_spacer_len_difference > CRASS_DEF_SPACER_TO_REPEAT_LENGTH_DIFF)
993+
if(! testRepeatSpacerLengthDiff(ave_repeat_to_spacer_len_difference))
946994
{
947-
#ifdef DEBUG
948-
logInfo("\tFailed test 6b. Repeat to spacer lengths differ too much: "<<ave_repeat_to_spacer_len_difference<<" > "<<CRASS_DEF_SPACER_TO_REPEAT_LENGTH_DIFF, 8);
949-
#endif
950995
return false;
951996
}
952-
#ifdef DEBUG
953-
logInfo("\tPassed test 6b. Repeat to spacer lengths do not differ too much: "<<ave_repeat_to_spacer_len_difference<<" < "<<CRASS_DEF_SPACER_TO_REPEAT_LENGTH_DIFF, 8);
954-
#endif
955-
956997
}
957998
}
958999

9591000
// Are we testing a short read or only one spacer?
9601001
if(is_short)
9611002
{
9621003
std::string spacer = tmp_holder.spacerStringAt(single_compare_index);
1004+
if(! testSpacerLength(spacer.length(), spacer.length(), minSpacerLength, maxSpacerLength))
1005+
{
1006+
return false;
1007+
}
9631008
float similarity = PatternMatcher::getStringSimilarity(repeat, spacer);
964-
if (similarity > CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY)
1009+
if(! testSpacerRepeatSimilarity(similarity))
9651010
{
966-
/*
967-
* MAX AND MIN SPACER LENGTHS
968-
*/
969-
if (static_cast<int>(spacer.length()) < minSpacerLength)
970-
{
971-
#ifdef DEBUG
972-
logInfo("\tFailed test 4a. Min spacer length out of range: "<<spacer.length()<<" < "<<minSpacerLength, 8);
973-
#endif
974-
return false;
975-
}
976-
#ifdef DEBUG
977-
logInfo("\tPassed test 4a. Min spacer length within range: "<<spacer.length()<<" > "<<minSpacerLength, 8);
978-
#endif
979-
if (static_cast<int>(spacer.length()) > maxSpacerLength)
980-
{
981-
#ifdef DEBUG
982-
logInfo("\tFailed test 4b. Max spacer length out of range: "<<spacer.length()<<" > "<<maxSpacerLength, 8);
983-
#endif
984-
return false;
985-
}
986-
#ifdef DEBUG
987-
logInfo("\tPassed test 4b. Max spacer length within range: "<<spacer.length()<<" < "<<maxSpacerLength, 8);
988-
#endif
989-
/*
990-
* REPEAT AND SPACER CONTENT SIMILARITIES
991-
*/
992-
#ifdef DEBUG
993-
logInfo("\tFailed test 5. Spacer is too similar to the repeat: "<<similarity<<" > "<<CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY, 8);
994-
#endif
9951011
return false;
9961012
}
997-
#ifdef DEBUG
998-
logInfo("\tPassed test 5. Spacer is not too similar to the repeat: "<<similarity<<" < "<<CRASS_DEF_SPACER_OR_REPEAT_MAX_SIMILARITY, 8);
999-
#endif
10001013

1001-
/*
1002-
* REPEAT AND SPACER LENGTH SIMILARITIES
1003-
*/
1004-
if (abs(static_cast<int>(spacer.length()) - static_cast<int>(repeat.length())) > CRASS_DEF_SPACER_TO_REPEAT_LENGTH_DIFF)
1014+
int repeat_spacer_len_diff = abs(static_cast<int>(spacer.length()) - static_cast<int>(repeat.length()));
1015+
if(! testRepeatSpacerLengthDiff(repeat_spacer_len_diff))
10051016
{
1006-
#ifdef DEBUG
1007-
logInfo("\tFailed test 6. Repeat to spacer length differ too much: "<<abs((int)spacer.length() - (int)repeat.length())<<" > "<<CRASS_DEF_SPACER_TO_REPEAT_LENGTH_DIFF, 8);
1008-
#endif
10091017
return false;
10101018
}
1011-
#ifdef DEBUG
1012-
logInfo("\tPassed test 6. Repeat to spacer length do not differ too much: "<<abs((int)spacer.length() - (int)repeat.length())<<" < "<<CRASS_DEF_SPACER_TO_REPEAT_LENGTH_DIFF, 8);
1013-
#endif
10141019
}
10151020

10161021
return true;

src/crass/libcrispr.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,16 @@ unsigned int extendPreRepeat(ReadHolder& tmp_holder,
100100
int searchWindowLength,
101101
int minSpacerLength);
102102

103+
bool testSpacerLength(int minSpacerLength, int maxSpacerLength, int minAllowedSpacerLength, int maxAllowedSpacerLength);
104+
105+
bool testSpacerRepeatSimilarity(float similarity);
106+
107+
bool testSpacerSpacerSimilarity(float similarity);
108+
109+
bool testSpacerSpacerLengthDiff(int difference);
110+
111+
bool testRepeatSpacerLengthDiff(int differnce);
112+
103113
bool qcFoundRepeats(ReadHolder& tmp_holder,
104114
int minSpacerLength,
105115
int maxSpacerLength);

0 commit comments

Comments
 (0)