Skip to content

Commit 19f73f5

Browse files
committed
add in additional test in extendPreRepeat
Make sure that the final repeat cannot be greater than the length of the read. Change documentation to cut a new version
1 parent e0384a4 commit 19f73f5

File tree

7 files changed

+64
-26
lines changed

7 files changed

+64
-26
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
crass -- CRisprASSembler -- version 1 subversion 0 revision 0 (1.0.0)
1+
crass -- CRisprASSembler -- version 1 subversion 0 revision 0 (1.0.1)
22
=======================================================================
33

44
CITATION

configure.ac

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Process this file with autoconf to produce a configure script.
33

44
AC_PREREQ(2.61)
5-
AC_INIT(crass, 1.0.0, c.skennerton@gmail.com)
5+
AC_INIT(crass, 1.0.1, c.skennerton@gmail.com)
66

77
AC_CONFIG_AUX_DIR(build)
88
AC_CONFIG_MACRO_DIR([m4])
@@ -16,7 +16,7 @@ LT_LANG([C++])
1616
AC_DEFINE([PACKAGE_FULL_NAME],["CRisprASSembler"],[Define the full name of the package])
1717
AC_DEFINE([PACKAGE_MAJOR_VERSION],[1],[Define the major version of the package])
1818
AC_DEFINE([PACKAGE_MINOR_VERSION],[0],[Define the minor version of the package])
19-
AC_DEFINE([PACKAGE_REVISION],[0],[Define the revision of the package])
19+
AC_DEFINE([PACKAGE_REVISION],[1],[Define the revision of the package])
2020

2121

2222
# extra configure options

doc/manual.tex

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,9 @@
185185

186186
%%% The "real" document content comes below...
187187

188-
\title{Crass: The CRISPR assembler (v0.3.12)}
188+
\title{Crass: The CRISPR assembler (v1.0.1)}
189189
\author{Connor Skennerton and Michael Imelfort}
190-
\date{10th February 2015} % Activate to display a given date or no date (if empty),
190+
\date{5th September 2016} % Activate to display a given date or no date (if empty),
191191
% otherwise the current date is printed
192192

193193
\begin{document}
@@ -301,7 +301,6 @@ \subsubsection{User Flags}
301301
\combinedoptionflag{g}{logToScreen} & Does not produce a log file but instead prints the contents to screen.\\ \\
302302
\combinedoptionflag{G}{showSingletons} & Set this flag if you would like to see unconnected singleton spacers in the final graph.\\ \\
303303
\combinedoptionflag{h}{help} & Print the basic usage and version information. \\ \\
304-
\combinedoptionflag{H}{removeHomopolymers} & This is an experimental feature of Crass where the search algorithms attempt to correct for homopolymer errors in reads.\\ \\
305304
\combinedoptionflagarg{k}{kmerCount}{INT} & Sets the number of kmers that need to be shared between putative direct repeats for them to be clustered together after the find stage. Clustered direct repeats are eventually concatenated to form a 'true' direct repeat for a CRISPR; putative repeats that cannot be clustered are removed from consideration. Change this variable if you feel that the clustering is too stringent and is breaking appart one CRISPR into multiple types. The default number of kmers is 6, however the value should not be set below 6 as this would not be stringent enough; a higher value would split closely related direct repeats apart\\ \\
306305
\combinedoptionflagarg{K}{graphNodeLen}{INT} & Crass makes a graph by cutting kmers on either side of the direct repeat and then joining these together. The length of the kmer will dictate how connected the graph will be. A smaller number will increase the chances of new conections being formed, however it also increases the chances of false positives. The default value is 9.\\ \\
307306
\combinedoptionflagarg{l}{logLevel}{INT} & Sets the verbosity of the log file. Under most circumstances the log level cannot go higher than 4, unless the enable-debug option is set during configuration, which will increase the maximum value to 10. Note that above a level of 4 alot of the information will not be understandable to the user as most of these messages are specifically for us, the developers to track down bugs. \\ \\
@@ -313,9 +312,6 @@ \subsubsection{User Flags}
313312
\combinedoptionflagarg{S}{maxSpacer}{INT} & The upper bound considered acceptable for the size of a spacer sequence. Default is 50bp.\\ \\
314313
\combinedoptionflag{V}{version} & Preints out program version information. \\ \\
315314
\combinedoptionflagarg{w}{windowLength}{INT} & When using the long read search algorithm, changes the window length for finding seed sequences; can be set between 6 - 9bp. The default value is 8bp.\\ \\
316-
\combinedoptionflagarg{x}{spacerScalling}{DECIMAL} & Overide the default scalling of the spacer bounds (\optionflag{sS}) set by \longoptionflag{removeHomopolymers}. The default is 0.7, i.e. the size of the spacer bounds is reduced by 30\% when removing homopolymers in sequences. The value must be a decimal. \\ \\
317-
\combinedoptionflagarg{y}{repeatScalling}{DECIMAL} & Overide the default scalling of the direct repeat bounds (\optionflag{dD}) set by \longoptionflag{removeHomopolymers}. The default is 0.7, i.e. the size of the direct repeat bounds is reduced by 30\% when removing homopolymers in sequences. The value must be a decimal.\\
318-
\combinedoptionflag{z}{noScalling} & This turns off the effects of (\optionflag{x} or \optionflag{y}) so that the bounds of the direct repeat and spacer (\optionflag{dDsS}) given on the command line are interpreted literally when the \longoptionflag{removeHomopolymers} option is set. \\ \\
319315
\hline
320316
\end{longtable}
321317
\subsubsection{Output From Crass}

man/crass.1

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33
.\"man mdoc.samples for a complete listing of options
44
.\"man mdoc for the short list of editing options
55
.\"/usr/share/misc/mdoc.template
6-
.Dd 17/04/13
6+
.Dd 5/09/16
77
.Dt crass 1
88
.Os Darwin
99
.Sh NAME
1010
.Nm crass
1111
.Nd the CRISPR Assembler.
1212
.Sh SYNOPSIS
1313
.Nm
14-
.Op Fl abcdDefgGhHkKlLnorsSVwxyz
14+
.Op Fl abcdDefgGhkKlLnorsSVw
1515
.Ar
1616

1717
.Sh DESCRIPTION
@@ -35,7 +35,7 @@ then reads containing direct repeats are then outputed for further analysis.
3535
.Bl -tag -width -indent
3636
.It
3737
.Nm
38-
.Op Fl eghrzGHL
38+
.Op Fl eghrGL
3939
.Op Fl a Ar LAYOUT_TYPE
4040
.Op Fl b Ar INT
4141
.Op Fl c Ar COLOUR_TYPE
@@ -48,8 +48,6 @@ then reads containing direct repeats are then outputed for further analysis.
4848
.Op Fl o Ar DIR
4949
.Op Fl s Ar INT
5050
.Op Fl w Ar INT
51-
.Op Fl x Ar REAL
52-
.Op Fl y Ar REAL
5351
.Op Fl D Ar INT
5452
.Op Fl K Ar INT
5553
.Op Fl S Ar INT
@@ -78,15 +76,13 @@ The Maximum length of the direct repeat to search for [Default: 47]
7876
.It Fl e Ar "" Fl "\^\-noDebugGraph"
7977
Option available only when DEBUG preoprocessor symbol is set. Will turn off generating debugging graphs
8078
.It Fl f Ar INT Fl "\^\-covCutoff" Ar INT
81-
Defines the minimim number of reads that a putative CRISPR must contain to be considered real. [Default: 10]
79+
Defines the minimim number of spacers that a putative CRISPR must contain to be considered real. [Default: 3]
8280
.It Fl g Ar "" Fl "\^\-logToScreen"
8381
Print the logging info to stdout rather than to a file
8482
.It Fl G Ar "" Fl "\^\-showSingletons" Ar ""
8583
Set to show unattached spacers in the graph output
8684
.It Fl h Ar "" Fl "\^\-help" Ar ""
8785
Output basic usage informtion to screen
88-
.It Fl H Ar "" Fl "\^\-removeHomopolymers"
89-
Correct for homopolymer errors [default: no correction]
9086
.It Fl l Ar INT Fl "\^\-logLevel" Ar INT
9187
The level of verbosity to ouput in the
9288
.Nm
@@ -96,7 +92,7 @@ The number of kmers at two direct repeats must share to be considered part of th
9692
.It Fl K Ar INT Fl "\^\-graphNodeLen" Ar INT
9793
The length of the kmer used to define a node in the graph. The lower the number the more connected the graph will be but also increases the chance of false positive edges [Default: 7]
9894
.It Fl n Ar INT Fl "\^\-minNumRepeats" Ar INT
99-
The minimim number of repeats that a candidate CRISPR locus must contain to be considered 'real' [Default: 3]
95+
The minimim number of repeats that a candidate CRISPR locus must contain to be considered 'real' [Default: 2]
10096
.It Fl o Ar LOCATION Fl "\^\-outDir" Ar LOCATION
10197
The name of the ouput directory for the output files [Default: ./]
10298
.It Fl r Ar "" Fl "\^\-noRendering" Ar ""
@@ -109,12 +105,6 @@ The maximim length of the spacer to search for [Default: 50]
109105
Print version and copy right information
110106
.It Fl w Ar INT Fl "\^\-windowLength" Ar INT
111107
The length of the window size for searching a genome. Must be between 6 - 9 [Default: 8]
112-
.It Fl x Ar REAL Fl "\^\-spacerScalling" Ar REAL
113-
A decimal number that represents the reduction in size of the spacer when the --removeHomopolymers option is set [Default: 0.7]
114-
.It Fl y Ar REAL Fl "\^\-repeatScalling" Ar REAL
115-
A decimal number that represents the reduction in size of the direct repeat when the --removeHomopolymers option is set [Default: 0.7]
116-
.It Fl z Ar "" Fl "\^\-noScalling" Ar ""
117-
Use the given spacer and direct repeat ranges when --removeHomopolymers is set. The default is to use the scale these values based on the values of -x and -y.
118108
.El
119109

120110
.\" .Sh ENVIRONMENT \" May not be needed

src/crass/libcrispr.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -747,12 +747,19 @@ unsigned int extendPreRepeat(ReadHolder& tmp_holder, int searchWindowLength, in
747747
if(*repeat_iter < static_cast<unsigned int>(left_extension_length))
748748
{
749749
*repeat_iter = 0;
750-
*(repeat_iter + 1) += right_extension_length;
751750
}
752751
else
753752
{
754753
*repeat_iter -= left_extension_length;
755-
*(repeat_iter+1) += right_extension_length;
754+
}
755+
756+
if(*(repeat_iter+1) + right_extension_length >= tmp_holder.getSeqLength())
757+
{
758+
*(repeat_iter + 1) = tmp_holder.getSeqLength() - 1;
759+
}
760+
else
761+
{
762+
*(repeat_iter + 1) += right_extension_length;
756763
}
757764
#ifdef DEBUG
758765
logInfo("\t"<<*repeat_iter<<","<<*(repeat_iter+1), 9);

src/test/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ check_PROGRAMS = crass-test
33
AM_CXXFLAGS = -I$(top_builddir)/src/crass/
44
AM_LDFLAGS = @zlib_flags@
55
crass_test_SOURCES = \
6+
test_readholder.cpp\
67
test_libcrispr.cpp\
78
test_main.cpp
89

src/test/test_libcrispr.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,50 @@
44
#include "libcrispr.h"
55
#include "ReadHolder.h"
66

7+
// 0 1
8+
// 0 1 2 3 4 5 6 7 8 9 0 1 2
9+
// 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345
10+
// CACCATGGAAGACCTTCCTAACACCATGGTAGACATTCCTTACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTTCTAA
11+
// rrrrrrrr rrrrrrrr rrrrrrrr
12+
13+
TEST_CASE("searching for additional repeated kmer in a 126bp read", "[libcrispr]") {
14+
ReadHolder read("CACCATGGAAGACCTTCCTAACACCATGGTAGACATTCCTTACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTTCTAA","HWI-D00456:77:C70WLANXX:1:1101:10963:2182");
15+
SECTION("where there should be one additional match with a minimum spacer length of 26"){
16+
read.startStopsAdd(0, 7);
17+
read.startStopsAdd(63,70);
18+
std::string pattern = "CACCATGG";
19+
scanRight(read, pattern, 26, 24);
20+
StartStopList reppos = read.getStartStopList();
21+
REQUIRE(reppos.size() == 6);
22+
REQUIRE(reppos[0] == 0);
23+
REQUIRE(reppos[1] == 7);
24+
REQUIRE(reppos[2] == 63);
25+
REQUIRE(reppos[3] == 70);
26+
REQUIRE(reppos[4] == 105);
27+
REQUIRE(reppos[5] == 112);
28+
}
29+
}
30+
31+
TEST_CASE("check extending repeat with 126bp read", "[libcrispr]") {
32+
ReadHolder read("CACCATGGAAGACCTTCCTAACACCATGGTAGACATTCCTTACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTTCTAA","HWI-D00456:77:C70WLANXX:1:1101:10963:2182");
33+
34+
SECTION("The search window length is 8 and the min spacer length is 26") {
35+
read.startStopsAdd(0, 7);
36+
read.startStopsAdd(63,70);
37+
read.startStopsAdd(105,112);
38+
int repeat_length = extendPreRepeat(read, 8, 26);
39+
REQUIRE(repeat_length == 23);
40+
StartStopList reppos = read.getStartStopList();
41+
REQUIRE(reppos.size() == 6);
42+
REQUIRE(reppos[0] == 0);
43+
REQUIRE(reppos[1] == 21);
44+
REQUIRE(reppos[2] == 62);
45+
REQUIRE(reppos[3] == 84);
46+
REQUIRE(reppos[4] == 104);
47+
REQUIRE(reppos[5] == 125);
48+
}
49+
}
50+
751
TEST_CASE("searching for additional repeated kmers in 100bp read", "[libcrispr]"){
852
// read
953
// 0 1

0 commit comments

Comments
 (0)