Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions examples/gwas.gwas
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
CHR BP SNP P BETA SE MAF
X 3001200 rs100001 2.5e-09 0.62 0.09 0.21
X 3003450 rs100002 7.8e-08 0.55 0.10 0.23
X 3007890 rs100003 1.1e-06 0.41 0.08 0.27
X 3009990 rs100004 4.3e-05 0.33 0.08 0.30
X 3032100 rs100005 9.2e-10 0.71 0.11 0.18
X 3045600 rs100006 3.4e-08 0.59 0.10 0.20
X 3060200 rs100007 6.7e-07 0.48 0.09 0.25
X 3078500 rs100008 1.9e-05 0.36 0.08 0.29
X 3100400 rs100009 8.8e-06 -0.40 0.09 0.26
X 3150000 rs100010 2.2e-07 -0.52 0.10 0.22
X 3195000 rs100011 1.5e-09 -0.68 0.11 0.19
X 3199800 rs100012 5.0e-08 -0.60 0.10 0.21
80 changes: 80 additions & 0 deletions pygenometracks/readGwas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
import sys
import collections
from .utilities import to_string, InputError


class ReadGwas(object):
"""
Reads a GWAS file. The expected fields are:
chromosome, position, name, and pvalue.

Example:
gwas = ReadGwas(open("file.gwas", 'r'))
for record in gwas:
print(record.chromosome, record.position, record.pvalue)
"""

def __init__(self, file_handle, has_header=False):
"""
:param file_handle: file handle
"""
self.file_handle = file_handle
self.line_number = 0

# Define the fields for GWAS
self.fields = ['chromosome', 'position', 'name', 'pvalue']
self.GwasRecord = collections.namedtuple('GwasRecord', self.fields)

# Skip the header line if present
if has_header:
header = next(self.file_handle)
self.line_number += 1

def __iter__(self):
return self

def get_no_comment_line(self):
"""
Skips comment lines starting with '#' or empty lines.
:return: a valid line
"""
line = next(self.file_handle)
line = to_string(line)
if line.startswith("#") or line.strip() == '':
line = self.get_no_comment_line()

self.line_number += 1
return line

def __next__(self):
"""
:return: GwasRecord object
"""
line = self.get_no_comment_line()
return self.get_gwas_record(line)

def get_gwas_record(self, gwas_line):
"""
Processes each line from a GWAS file and returns a namedtuple object.

:param gwas_line: a single line from the GWAS file
:return: GwasRecord object
"""
line_data = gwas_line.strip()
line_data = to_string(line_data)
line_data = line_data.split("\t")

if len(line_data) < 4:
raise InputError(f"Line {self.line_number} does not have 4 fields: {gwas_line}."
f"We expect at least 4 field, corresponding to: chromosome, position, name, pvalue.")

try:
chromosome = line_data[0]
position = int(line_data[1])
name = line_data[2]
pvalue = float(line_data[3])
except ValueError as e:
raise InputError(f"Error parsing line {self.line_number}: {gwas_line}\n{e}")

return self.GwasRecord(chromosome, position, name, pvalue)
16 changes: 16 additions & 0 deletions pygenometracks/tests/test_data/gwas.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

[gwas]
file = gwas_1.gwas
height = 4
title = test_1

[spacer]

[gwas_2]
file = gwas_2.gwas
file_has_header = True
height = 2
title = test_2
color = #50E3C2

[x-axis]
12 changes: 12 additions & 0 deletions pygenometracks/tests/test_data/gwas_1.gwas
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
X 3002145 rs7823451 4.7e-08
X 3005892 rs6610293 1.3e-06
X 3008734 rs9034128 9.2e-05
X 3031022 rs4420981 6.1e-10
X 3042567 rs2093847 2.8e-07
X 3056789 rs7734012 5.5e-09
X 3072341 rs1182736 3.9e-06
X 3079880 rs5502918 7.4e-05
X 3112450 rs3847291 1.8e-08
X 3145670 rs6029184 9.9e-07
X 3180230 rs2938471 3.2e-10
X 3197650 rs8840123 6.3e-08
13 changes: 13 additions & 0 deletions pygenometracks/tests/test_data/gwas_2.gwas
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
CHR BP SNP P BETA SE MAF
X 3001200 rs100001 2.5e-09 0.62 0.09 0.21
X 3003450 rs100002 7.8e-08 0.55 0.10 0.23
X 3007890 rs100003 1.1e-06 0.41 0.08 0.27
X 3009990 rs100004 4.3e-05 0.33 0.08 0.30
X 3032100 rs100005 9.2e-10 0.71 0.11 0.18
X 3045600 rs100006 3.4e-08 0.59 0.10 0.20
X 3060200 rs100007 6.7e-07 0.48 0.09 0.25
X 3078500 rs100008 1.9e-05 0.36 0.08 0.29
X 3100400 rs100009 8.8e-06 -0.40 0.09 0.26
X 3150000 rs100010 2.2e-07 -0.52 0.10 0.22
X 3195000 rs100011 1.5e-09 -0.68 0.11 0.19
X 3199800 rs100012 5.0e-08 -0.60 0.10 0.21
Binary file added pygenometracks/tests/test_data/master_gwas.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
50 changes: 50 additions & 0 deletions pygenometracks/tests/test_gwasTrack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import matplotlib as mpl
from matplotlib.testing.compare import compare_images
from tempfile import NamedTemporaryFile
import os.path
import pygenometracks.plotTracks

mpl.use('agg')

ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"test_data")

tracks = """
[gwas]
file = gwas_1.gwas
height = 4
title = test_1

[spacer]

[gwas_2]
file = gwas_2.gwas
file_has_header = True
height = 2
title = test_2
color = #50E3C2

[x-axis]
"""

with open(os.path.join(ROOT, "gwas.ini"), 'w') as fh:
fh.write(tracks)

tolerance = 13 # default matplotlib pixed difference tolerance


def test_gwas_track():
outfile = NamedTemporaryFile(suffix='.png', prefix='gwas_test_',
delete=False)
ini_file = os.path.join(ROOT, "gwas.ini")
region = "X:3000000-3200000"
expected_file = os.path.join(ROOT, 'master_gwas.png')
args = f"--tracks {ini_file} --region {region} " \
"--trackLabelFraction 0.2 --dpi 130 " \
f"--outFileName {outfile.name}".split()
pygenometracks.plotTracks.main(args)
res = compare_images(expected_file,
outfile.name, tolerance)
assert res is None, res

os.remove(outfile.name)
76 changes: 76 additions & 0 deletions pygenometracks/tracks/GwasTrack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from .GenomeTrack import GenomeTrack
import numpy as np
from .. readGwas import ReadGwas

DEFAULT_GWAS_COLOR = '#ff7f00'


class GwasTrack(GenomeTrack):
SUPPORTED_ENDINGS = ['.gwas', '.linear', '.logistic', '.assoc', '.qassoc'] # this is used by make_tracks_file to guess the type of track based on file name
TRACK_TYPE = 'gwas'
OPTIONS_TXT = GenomeTrack.OPTIONS_TXT + f"""
# Title of the track. Usually displayed to the right as a label
title =
# Height of the track
height =
# File containing the data. We expect an IGV .gwas format file with the columns: CHR, BP, SNP and P. Optionally, extra
# annotation columns can be added.
file =
# Y label text
ylabel =
# Fontsize of the labels
fontsize =
# Color
color =
# Optional. If not given is guessed from the file ending.
file_type = {TRACK_TYPE}
"""

DEFAULTS_PROPERTIES = {'fontsize': 12,
'orientation': None,
'color': DEFAULT_GWAS_COLOR,
'border_color': 'black',
'labels': True,
'line_width': 0.5,
'max_labels': 60,
'max_value': 1,
'min_value': 0,
'fontstyle': 'normal',
'y_axis_max_val': None,
'id_fontsize': 12,
'marker_size': 45,
'file_has_header': False}

NECESSARY_PROPERTIES = ['file']
SYNONYMOUS_PROPERTIES = {}
POSSIBLE_PROPERTIES = {}
BOOLEAN_PROPERTIES = ['file_has_header']
STRING_PROPERTIES = ['title', 'file_type', 'file', 'color']
FLOAT_PROPERTIES = {'height': [0, np.inf], 'fontsize': [0, np.inf], 'id_fontsize': [0, np.inf],
'marker_size': [0, np.inf], 'y_axis_max_val': [0, np.inf]}
INTEGER_PROPERTIES = {}

def plot(self, ax, chrom, region_start, region_end):
"""
Plot a scatter plot for the GWAS data.
The p-values are transformed as -log10(pvalue), so the y-axis will show the exponents of the p-values.

:param ax: matplotlib axis
:param chrom: chromosome name
:param region_start: start position of the region
:param region_end: end position of the region
:return: None
"""
gwas_reader = ReadGwas(open(self.properties['file'], 'r'), has_header=self.properties['file_has_header'])

# Fill in the position and pvalues lists with data from the GWAS file
position = []
pvalues = []
for record in gwas_reader:
if record.chromosome == chrom and region_start <= record.position <= region_end:
position.append(record.position)
pvalues.append(-np.log10(record.pvalue) if record.pvalue > 0 else 0) # Notice the -log10 transformation

# Plot the scatterplot
ax.scatter(position, pvalues, s=self.properties['marker_size'], color=self.properties['color'], marker='o',
edgecolors='black', linewidths=.66)
Loading