Skip to content

Commit b2dca9e

Browse files
committed
Corrected bugs with previous commit. Better output header. corrected jackknife error calculation.
1 parent ae4e12f commit b2dca9e

File tree

2 files changed

+29
-28
lines changed

2 files changed

+29
-28
lines changed

RASCalculator.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ def getTotalDerivedAC(afDict):
9797
# Bin maxAF + 1: Outgroup F3 stats
9898
RAS = [[[[0 for i in range(NumBins)] for j in range(maxAF+2)] for k in range(len(TestPops))] for x in range(len(LeftPops))]
9999
# The normalization only records a total for all allele frequencies.
100-
mj = [0 for i in range(NumBins)] ## Bin sizes are now stable across all RAS calculations.
101-
#[[ [0 for i in range(NumBins)] for k in range(len(TestPops))] for x in range(len(LeftPops))]
100+
blockSizes = [0 for i in range(NumBins)] ## Bin sizes are now stable across all RAS calculations.
101+
mj = [[ [0 for i in range(NumBins)] for k in range(len(TestPops))] for x in range(len(LeftPops))]
102102

103103

104104
totalRightPopSize = sum(freqSumParser.sizes[p] for p in RightPops)
@@ -114,7 +114,7 @@ def getTotalDerivedAC(afDict):
114114
if args.NoTransitions and isTransition(Ref, Alt):
115115
continue
116116

117-
mj[Chrom] += 1
117+
blockSizes[Chrom] += 1
118118

119119
missingness = getMissingness(afDict)
120120
if missingness > args.MissingnessCutoff:
@@ -137,7 +137,7 @@ def getTotalDerivedAC(afDict):
137137
rightSize = freqSumParser.sizes[testPop]
138138

139139
if afDict[leftPop] >= 0 and afDict[testPop] >= 0:
140-
# mj[Lftidx][Tstidx][Chrom] += 1
140+
mj[Lftidx][Tstidx][Chrom] += 1
141141
xLeft = afDict[leftPop] / leftSize
142142
xRight = afDict[testPop] / rightSize
143143
xOutgroup = afDict[args.outgroup] / freqSumParser.sizes[args.outgroup]
@@ -160,26 +160,26 @@ def getTotalDerivedAC(afDict):
160160
for x in range(len(LeftPops)):
161161
for j in range(len(TestPops)):
162162
for i in range(minAF-1,maxAF+2):
163-
thetaJ,sigma2=ras.getJackknife(RAS[x][j][i],mj)
164-
ThetaJ[x][j][i]=thetaJ
165-
Sigma2[x][j][i]=sigma2
163+
thetaJ,sigma2 = ras.getJackknife(RAS[x][j][i], mj[x][j], blockSizes)
164+
ThetaJ[x][j][i] = thetaJ
165+
Sigma2[x][j][i] = sigma2
166166

167167
# print ("#FREQSUM POPULATIONS & SIZES:",*Pops, file=args.Output, sep=" ", end="\n")
168168
print ("#Left Populations: ", *LeftPops, sep=" ", file=args.Output, end="\n")
169169
print ("#Tested Populations: ", *TestPops, sep=" ", file=args.Output, end="\n")
170170
print ("#Populations considered for allele frequency calculation (Rights):", *RightPops, file=args.Output, sep="\t", end="\n")
171171
print ("#Outgroup: ", args.outgroup, file=args.Output, sep="\t", end="\n")
172172
# RAS, number of sites, RAS /Site, stderr of (RAS/site), Allele Freq
173-
print("TestPop","LeftPop","RAS","Number of sites","RAS/site JK Estimate", "Jackknife Error", "Allele Frequency", sep="\t", file=args.Output)
173+
print("TestPop","LeftPop","RAS","NumSites","RAS/site", "Error", "AlleleCount", sep="\t", file=args.Output)
174174
for leftidx, leftPop in enumerate(LeftPops):
175175
for tstidx, testPop in enumerate(TestPops):
176176
if args.details:
177177
for m in range(minAF,maxAF+1):
178-
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj)), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),m, sep="\t", file=args.Output)
178+
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj[leftidx][tstidx])), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),m, sep="\t", file=args.Output)
179179
m=minAF-1
180-
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj)), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),"Total [{},{}]".format(minAF,maxAF), sep="\t", file=args.Output)
180+
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj[leftidx][tstidx])), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),"Total [{},{}]".format(minAF,maxAF), sep="\t", file=args.Output)
181181
m=maxAF+1
182-
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj)), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),"Outgroup F3", sep="\t", file=args.Output)
182+
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj[leftidx][tstidx])), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),"Outgroup F3", sep="\t", file=args.Output)
183183
#print ("", file=args.Output)
184184

185185
print ("Program finished running at:", strftime("%D %H:%M:%S"), file=sys.stderr)

RASUtils.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,30 +40,31 @@ def __readHeader(self):
4040
self.sizes[popName] = popSize
4141
print("#Available populations in Input File and their respective sizes: ", self.sizes, file=self.output)
4242

43-
def getJackknife(blockValues, blockSizes):
43+
def getJackknife(blockValues, totalObservations, blockSizes):
4444
thetaminus=[0 for x in range(len(blockSizes))]
4545
sum1=0
4646
sum2=0
4747
jackknifeStdErr=0
48-
if sum(blockSizes)==0:
48+
if sum(totalObservations)==0:
4949
thetahat=0
5050
else:
51-
thetahat=sum(blockValues)/sum(blockSizes)
52-
for c in range(len(blockValues)):
53-
if blockSizes[c]==sum(blockSizes):
54-
thetaminus[c]=0
51+
thetahat=sum(blockValues)/sum(totalObservations)
52+
53+
## Normalise blockValues.
54+
normalisedValues = [0.0 for c in range(len(blockSizes))]
55+
for c in range(len(blockSizes)):
56+
if totalObservations[c] == 0:
57+
continue
5558
else:
56-
thetaminus[c]=(sum(blockValues)-blockValues[c])/(sum(blockSizes)-blockSizes[c])
59+
normalisedValues[c] = blockValues[c]/totalObservations[c]
60+
61+
for c in range(len(blockSizes)):
62+
thetaminus[c]=( (sum(blockValues)-blockValues[c]) / (sum(totalObservations)-totalObservations[c]) )
5763
sum1+=thetahat-thetaminus[c]
58-
if sum(blockSizes)!=0:
59-
sum2+=(blockSizes[c]*thetaminus[c])/sum(blockSizes)
64+
sum2+=(blockSizes[c]*thetaminus[c])/sum(blockSizes)
6065
jackknifeEstimator=sum1+sum2
6166
for c in range(len(blockSizes)):
62-
if blockSizes[c]!=0:
63-
hj=sum(blockSizes)/blockSizes[c]
64-
if hj==1:
65-
jackknifeStdErr+=1
66-
else:
67-
pseudoval = (hj*thetahat)-((hj-1)*thetaminus[c])
68-
jackknifeStdErr+=(1/len(blockSizes))*(((pseudoval-jackknifeEstimator)**2)/(hj-1))
69-
return (jackknifeEstimator,jackknifeStdErr)
67+
hj=sum(blockSizes)/blockSizes[c]
68+
pseudoval = (hj*thetahat)-((hj-1)*thetaminus[c])
69+
jackknifeStdErr+=(1/len(blockSizes))*(((pseudoval-jackknifeEstimator)**2)/(hj-1))
70+
return (jackknifeEstimator,jackknifeStdErr)

0 commit comments

Comments
 (0)