Skip to content

Commit 7f376ba

Browse files
committed
p-value should not be required
1 parent a3ee9a4 commit 7f376ba

File tree

2 files changed

+15
-7
lines changed

2 files changed

+15
-7
lines changed

idr/run_idr.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
https://sites.google.com/site/anshulkundaje/projects/idr
1515
1616
'''
17-
import os
1817
from argparse import ArgumentParser
19-
from idr.utils import IdrUtilities
20-
from idr.idr_caller import IdrCaller
2118
import math
19+
import os
2220

21+
from idr.idr_caller import IdrCaller
22+
from idr.utils import IdrUtilities
2323
class IdrArgumentParser(ArgumentParser):
2424
def __init__(self):
2525
description = '''Functions for running Irreproducibility Discovery Rate
@@ -105,7 +105,8 @@ def homer2narrow(self, options, peak_files, output_dir=None):
105105
106106
Returns the set of filenames for generated narrowPeak files.
107107
'''
108-
self.check_output_dir(output_dir or options.output_dir)
108+
output_dir = output_dir or options.output_dir
109+
self.check_output_dir(output_dir)
109110

110111
idrutils = IdrUtilities()
111112
output_files = []

idr/utils.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,15 @@ def homer_to_narrow_peaks(self, data, output_file):
131131
132132
'''
133133

134+
# We don't want to require p-value, as Homer doesn't always output it.
135+
# Prep it here if it exists, or substitute tag count.
136+
pval_col = self.get_first_column(data,
137+
self.p_value_columns, required=False)
138+
if pval_col:
139+
pvals = -np.log10(pval_col)
140+
else:
141+
pvals = pvals = [-1]*data.shape[0]
142+
134143
columns = OrderedDict((
135144
('chrom', self.get_first_column(data, ['chr','chrom', 'chromosome'])),
136145
('chromStart', self.get_first_column(data, ['chromStart','start'])),
@@ -139,9 +148,7 @@ def homer_to_narrow_peaks(self, data, output_file):
139148
('score', Series([0]*data.shape[0])), # Leave zero so that signalValue column is used
140149
('strand', self.get_first_column(data, ['strand'])),
141150
('signalValue', self.get_first_column(data, self.tag_count_columns)),
142-
('pValue', (-np.log10(self.get_first_column(data,
143-
self.p_value_columns, required=False))
144-
or self.get_first_column(data, self.tag_count_columns))), # P-value if it exists, or tag count
151+
('pValue', pvals), # P-value if it exists, or tag count
145152
('qValue', Series([-1]*data.shape[0])), # Leave -1 as no individual FDR is called for each peak
146153
('peak', Series([-1]*data.shape[0])), # Leave -1 as no point-source is called for each peak
147154
))

0 commit comments

Comments
 (0)