Skip to content

Commit f793394

Browse files
committed
Merge branch 'develop'
2 parents c34a02e + ed8bb2c commit f793394

File tree

13 files changed

+1017
-32
lines changed

13 files changed

+1017
-32
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
<groupId>com.milaboratory</groupId>
66
<artifactId>migec</artifactId>
7-
<version>1.2.2</version>
7+
<version>1.2.3-SNAPSHOT</version>
88
<packaging>jar</packaging>
99

1010
<name>migec</name>

src/main/groovy/com/milaboratory/migec/CdrBlast.groovy

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ import org.codehaus.groovy.runtime.ResourceGroovyMethods
4040
import java.util.concurrent.ConcurrentHashMap
4141
import java.util.concurrent.atomic.AtomicInteger
4242

43-
import static com.milaboratory.migec.Util.BLANK_PATH
43+
import static com.milaboratory.migec.Util.*
4444

4545
//////////////
4646
// CLI //
@@ -66,6 +66,8 @@ cli.N(args: 1,
6666
"Number of reads to take. For downsampling purposes")
6767
cli._(longOpt: "no-sort",
6868
"Do not sort output by clonotype count, can save some time")
69+
cli._(longOpt: "allow-chimeric",
70+
"Allow V-J combinations coming from different genes")
6971
cli._(longOpt: "blast-path", args: 1, argName: "directory",
7072
"Path to blast executable.")
7173
cli._(longOpt: "same-sample", "Assembled data only (-a). Input files come from the same sample so " +
@@ -110,10 +112,11 @@ if (opt.h) {
110112
}
111113

112114
// SEGMENTS STUFF
113-
boolean includeNonFuncitonal = opt.'all-segments', includeAlleles = opt.'all-alleles'
115+
boolean includeNonFuncitonal = opt.'all-segments', includeAlleles = opt.'all-alleles',
116+
allowChimeric = opt.'allow-chimeric'
114117
if (opt.'print-library') {
115118
println "CDR3 extraction is possible for the following data (segments include non-functional = $includeNonFuncitonal):"
116-
Util.listAvailableSegments(includeNonFuncitonal, includeAlleles)
119+
listAvailableSegments(includeNonFuncitonal, includeAlleles)
117120
System.exit(0)
118121
}
119122

@@ -132,14 +135,15 @@ if (!chainOpt) {
132135

133136
def chains = chainOpt.split(",")
134137

135-
chains.each { chain ->
136-
if (!Util.isAvailable(species, chain, includeNonFuncitonal, includeAlleles)) {
138+
def hasD = chains.collect { chain ->
139+
if (!isAvailable(species, chain, includeNonFuncitonal, includeAlleles)) {
137140
println "[ERROR] Sorry, no analysis could be performed for $species gene $chain " +
138141
"(include non-functional = $includeNonFuncitonal). " +
139142
"Possible variants are:\n"
140-
Util.listAvailableSegments(includeNonFuncitonal, includeAlleles)
143+
listAvailableSegments(includeNonFuncitonal, includeAlleles)
141144
System.exit(2)
142145
}
146+
hasD(species, chain, includeNonFuncitonal, includeAlleles)
143147
}
144148

145149
// SYSTEM
@@ -225,10 +229,10 @@ def vAlleles = new ArrayList<Allele>(), jAlleles = new ArrayList<Allele>(),
225229
dAlleles = new ArrayList<Allele>()
226230
def collapseAlleleMap = new HashMap<String, Allele>()
227231

228-
chains.each { chain ->
232+
chains.eachWithIndex { String chain, int ind ->
229233
println "${timestamp()} Loading ${chain} Variable and Joining segment data"
230234

231-
def resFile = Util.getSegmentsFile(includeNonFuncitonal, includeAlleles)
235+
def resFile = getSegmentsFile(includeNonFuncitonal, includeAlleles)
232236

233237
resFile.splitEachLine("\t") {
234238
if (species.toUpperCase() == it[0].toUpperCase() &&
@@ -243,7 +247,7 @@ chains.each { chain ->
243247
// note that D segment could have different strand
244248
dAlleles.add(new Allele(alleleId: it[3],
245249
segmentId: it[3].split("[*]")[0],
246-
seq: Util.revCompl(seq)))
250+
seq: revCompl(seq)))
247251
} else {
248252
// More complex processing for V/J alleles
249253
// Mask all except seed region
@@ -272,7 +276,9 @@ chains.each { chain ->
272276
allele = new Allele(alleleId: it[3], segmentId: it[3].split("[*]")[0],
273277
refPoint: refPoint,
274278
seq: seq,
275-
type: type)
279+
type: type,
280+
chain: chain,
281+
hasD: hasD[ind])
276282
collapseAlleleMap.put(seq, allele)
277283
} else
278284
allele.alleleId += "," + it[3]
@@ -331,7 +337,7 @@ def seqMap = new HashMap<String, Integer>(),
331337
int counter = 0, uniqueCounter = 0
332338
inputFileNames.each { inputFileName ->
333339
println "${timestamp()} Reading $inputFileName and generating list of unique sequences to map V and J genes"
334-
def reader = Util.getReader(inputFileName)
340+
def reader = getReader(inputFileName)
335341
String header
336342
while (((header = reader.readLine()) != null) && (nReads < 0 || counter < nReads)) {
337343
if (assembledInput) {
@@ -360,7 +366,7 @@ inputFileNames.each { inputFileName ->
360366
}
361367
}
362368

363-
def queryFilePrefix = TMP_FOLDER + "/" + Util.getFastqPrefix(inputFileNames[0])
369+
def queryFilePrefix = TMP_FOLDER + "/" + getFastqPrefix(inputFileNames[0])
364370
println "[${new Date()} $SCRIPT_NAME] Making a temporary FASTA file for BLAST queries"
365371
int chunk_sz = seqList.size() / THREADS
366372
for (int p = 0; p < THREADS; p++) { // split fasta for blast parallelizaiton
@@ -454,7 +460,8 @@ GParsPool.withPool THREADS, {
454460
String seq = entry.key
455461
int id = entry.value
456462
def vMapping = vMappingData[id], jMapping = jMappingData[id]
457-
if (vMapping != null && jMapping != null) {
463+
if (vMapping != null && jMapping != null &&
464+
(allowChimeric || vMapping.allele.chain == jMapping.allele.chain)) {
458465
Allele vAllele = vMapping.allele,
459466
jAllele = jMapping.allele
460467
boolean vRC = vMapping.aFrom > vMapping.aTo, jRC = jMapping.aFrom > jMapping.aTo
@@ -485,7 +492,7 @@ GParsPool.withPool THREADS, {
485492
if (vRefInAlign && jRefInAlign && vRC == jRC) {
486493
if (vRC) { // after that step RC is same to non-RC
487494
reverseFound.get()
488-
seq = Util.revCompl(seq)
495+
seq = revCompl(seq)
489496

490497
int temp = vMapping.qTo
491498
vMapping.qTo = seq.length() - vMapping.qFrom - 1
@@ -553,16 +560,16 @@ GParsPool.withPool THREADS, {
553560

554561
if (DEBUG) {
555562
if (bad && badExamples.get() < DEBUG_MAX_MESSAGES) {
556-
def message = ((vRC == jRC && vRC) ? Util.revCompl(seq) : seq) + "\n"
563+
def message = ((vRC == jRC && vRC) ? revCompl(seq) : seq) + "\n"
557564

558565
if (!vRefInAlign)
559566
message += vAllele.alleleId + "\t" +
560-
(vRC ? Util.revCompl(vAllele.seq.toUpperCase()) : vAllele.seq.toUpperCase()) + "\t" +
567+
(vRC ? revCompl(vAllele.seq.toUpperCase()) : vAllele.seq.toUpperCase()) + "\t" +
561568
"aFrom=$vMapping.aFrom aTo=$vMapping.aTo ref=$vRef\n"
562569

563570
if (!jRefInAlign)
564571
message += jAllele.alleleId + "\t" +
565-
(vRC ? Util.revCompl(jAllele.seq.toUpperCase()) : jAllele.seq.toUpperCase()) + "\t" +
572+
(vRC ? revCompl(jAllele.seq.toUpperCase()) : jAllele.seq.toUpperCase()) + "\t" +
566573
"aFrom=$jMapping.aFrom aTo=$jMapping.aTo ref=$jRef\n"
567574

568575
debugMessagesBad.add(message)
@@ -577,7 +584,7 @@ GParsPool.withPool THREADS, {
577584
vAllele.seq.toUpperCase() + "\t" +
578585
"qTo=$vMapping.qTo qFrom=$vMapping.qFrom aTo=$vMapping.aTo aFrom=$vMapping.aFrom ref=$vRef\n"
579586

580-
message += jAllele.alleleId + "\t" + Util.revCompl(jAllele.seq.toUpperCase()) + "\t" +
587+
message += jAllele.alleleId + "\t" + revCompl(jAllele.seq.toUpperCase()) + "\t" +
581588
"qTo=$jMapping.qTo qFrom=$jMapping.qFrom aTo=$jMapping.aTo aFrom=$jMapping.aFrom ref=$jRef\n"
582589

583590
debugMessagesGood.add(message)
@@ -624,9 +631,9 @@ if (!dAlleles.empty) {
624631
////////////////////////////////
625632
counter = 0
626633
int goodReads = 0, goodEvents = 0, mappedReads = 0, mappedEvents = 0, totalReads = 0, totalEvents = 0
627-
def writerCdr3Fastq = cdr3FastqFile ? Util.getWriter(cdr3FastqFile) : null,
628-
writerUnmapped3Fastq = unmappedFastqFile ? Util.getWriter(unmappedFastqFile) : null,
629-
writerCdr3Umi = assembledInput && cdr3UmiTable ? Util.getWriter(cdr3UmiTable) : null
634+
def writerCdr3Fastq = cdr3FastqFile ? getWriter(cdr3FastqFile) : null,
635+
writerUnmapped3Fastq = unmappedFastqFile ? getWriter(unmappedFastqFile) : null,
636+
writerCdr3Umi = assembledInput && cdr3UmiTable ? getWriter(cdr3UmiTable) : null
630637

631638
if (writerCdr3Umi)
632639
writerCdr3Umi.println("#umi\tmig_sz\tcdr3nt\tv\tj\td")
@@ -635,7 +642,7 @@ def usedUmis = new HashSet<String>()
635642

636643
inputFileNames.each { inputFileName ->
637644
println "${timestamp()} Appending read data from $inputFileName"
638-
def reader = Util.getReader(inputFileName)
645+
def reader = getReader(inputFileName)
639646
while (((header = reader.readLine()) != null) && (nReads < 0 || counter < nReads)) {
640647
def seq = reader.readLine()
641648
reader.readLine()
@@ -662,7 +669,7 @@ inputFileNames.each { inputFileName ->
662669
qual = qual.substring(clonotypeData.cdrFrom, clonotypeData.cdrTo)
663670

664671
// Increment counters if quality is good
665-
if (Util.minQual(qual) >= qualThreshold) {
672+
if (minQual(qual) >= qualThreshold) {
666673
if (assembledInput && sameSample) {
667674
def umiCdr = umi + "_" + clonotypeData.cdr3Seq
668675
duplicateUmi = usedUmis.contains(umi)
@@ -812,7 +819,7 @@ if (logFileName) {
812819
} else {
813820
logFile.absoluteFile.parentFile.mkdirs()
814821
logFile.withPrintWriter { pw ->
815-
pw.println(Util.CDRBLAST_LOG_HEADER)
822+
pw.println(CDRBLAST_LOG_HEADER)
816823
}
817824
}
818825

src/main/groovy/com/milaboratory/migec/Util.groovy

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ class Util {
9595
}
9696

9797
static boolean isAvailable(String species, String gene, boolean includeNonFunctional, boolean includeAllAlleles) {
98-
boolean result = false
98+
def result = false
9999
getSegmentMetadataFile(includeNonFunctional, includeAllAlleles).splitEachLine("\t") { splitLine ->
100100
if (!splitLine[0].startsWith("#") && splitLine[-1] == "1" &&
101101
splitLine[0].toUpperCase() == species.toUpperCase() &&
@@ -105,6 +105,18 @@ class Util {
105105
result
106106
}
107107

108+
static boolean hasD(String species, String gene, boolean includeNonFunctional, boolean includeAllAlleles) {
109+
def result = false
110+
getSegmentMetadataFile(includeNonFunctional, includeAllAlleles).splitEachLine("\t") { splitLine ->
111+
if (!splitLine[0].startsWith("#") &&
112+
splitLine[0].toUpperCase() == species.toUpperCase() &&
113+
splitLine[1].toUpperCase() == gene.toUpperCase() &&
114+
splitLine[3].toUpperCase() == "1")
115+
result = true
116+
}
117+
result
118+
}
119+
108120
private static getResourceAsStream(String resourceName) {
109121
new InputStreamReader(Migec.class.classLoader.getResourceAsStream(resourceName))
110122
}

src/main/groovy/com/milaboratory/migec/clonotype/ClonotypeData.groovy

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,13 @@ class ClonotypeData {
8080
}
8181

8282
void appendDInfo(SegmentSearcher dSegmentSearcher) {
83-
def result = dSegmentSearcher.scan(cdr3Seq, vEndRel, jStartRel)
84-
if (result) {
85-
dStartRel = result.segmentStart
86-
dEndRel = result.segmentEnd
87-
dAlleles = result.alleles as Allele[]
83+
if (vAllele.hasD) {
84+
def result = dSegmentSearcher.scan(cdr3Seq, vEndRel, jStartRel)
85+
if (result) {
86+
dStartRel = result.segmentStart
87+
dEndRel = result.segmentEnd
88+
dAlleles = result.alleles as Allele[]
89+
}
8890
}
8991
}
9092

src/main/groovy/com/milaboratory/migec/segment/Allele.groovy

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
package com.milaboratory.migec.segment
3131

3232
class Allele {
33-
String alleleId, segmentId, seq, type
33+
String alleleId, segmentId, seq, type, chain
34+
boolean hasD
3435
int refPoint
3536
}

src/main/resources/segments.all.metadata

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ CercocebusTorquatusAtys IGH 0 0 0 0
5757
MusCookii IGK 0 0 0 0
5858
BosTaurus TRD 1 1 1 1
5959
BosTaurus TRA 0 0 1 0
60+
BosTaurus TRB 1 1 1 1
6061
BosTaurus TRG 1 0 1 1
6162
RattusNorvegicus IGH 1 1 1 1
6263
RattusNorvegicus IGL 1 0 1 1

src/main/resources/segments.all.minor.metadata

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ MusMusculus_129/Sv IGH 1 0 0 0
5555
RattusRattus IGH 0 0 0 0
5656
BosTaurus TRA 0 0 1 0
5757
BosTaurus TRD 1 1 1 1
58+
BosTaurus TRB 1 1 1 1
5859
BosTaurus TRG 1 0 1 1
5960
RattusNorvegicus_BN/SsNHsdMCW IGK 1 0 0 0
6061
OryctolagusCuniculus IGH 1 1 1 1

0 commit comments

Comments
 (0)