Skip to content

Commit 2034c01

Browse files
authored
Merge pull request #266 from TeresasaZ/dev
modified mfinder, added github environment skip pytest
2 parents 3d27852 + 5cb744d commit 2034c01

File tree

3 files changed

+281
-1
lines changed

3 files changed

+281
-1
lines changed

api/resources/interactions.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
"""
66

77
from flask_restx import Namespace, Resource, fields
8-
from flask import request
8+
from flask import request, jsonify
99
from markupsafe import escape
1010
from api.utils.bar_utils import BARUtils
11+
from api.utils.mfinder_utils import MfinderUtils
1112
from marshmallow import Schema, ValidationError, fields as marshmallow_fields
1213
from api import db
1314
from api.models.rice_interactions import Interactions as RiceInteractions
@@ -156,3 +157,26 @@ def post(self):
156157
return BARUtils.success_exit(res)
157158
else:
158159
return BARUtils.error_exit("No data for the given species/genes"), 400
160+
161+
162+
@itrns.route("/mfinder")
163+
class MFinder(Resource):
164+
@itrns.expect(post_int_data)
165+
def post(self):
166+
"""This endpoint was originally written by Vincent Lau to return mFinder
167+
results to AGENT in his express node.JS app. However Tianhui Zhao refactored
168+
to the BAR_API
169+
"""
170+
data = request.get_json()
171+
# Validate json
172+
try:
173+
data = MFinderDataSchema().load(data)
174+
except ValidationError as err:
175+
return BARUtils.error_exit(err.messages), 400
176+
177+
filtered_valid_arr = MfinderUtils.input_validation(data["data"])
178+
if isinstance(filtered_valid_arr, str):
179+
return BARUtils.error_exit(filtered_valid_arr), 400
180+
settings = MfinderUtils.settings_validation(data.get("options", {}))
181+
ret_json = MfinderUtils.create_files_and_mfinder(filtered_valid_arr, settings)
182+
return jsonify(MfinderUtils.beautify_results(ret_json))

api/utils/mfinder_utils.py

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
from api.utils.bar_utils import BARUtils
2+
3+
import tempfile
4+
import os
5+
import subprocess
6+
from collections import defaultdict
7+
8+
9+
class MfinderUtils:
10+
11+
@staticmethod
12+
# Eliminates same pairs
13+
def uniq_with(arr, comp_func):
14+
unique_arr = []
15+
for item in arr:
16+
if not any(comp_func(item, unique_item) for unique_item in unique_arr):
17+
unique_arr.append(item)
18+
return unique_arr
19+
20+
@staticmethod
21+
def is_equal(a, b):
22+
return a == b
23+
24+
@staticmethod
25+
def find_key(d, value):
26+
return next(key for key, val in d.items() if val == value)
27+
28+
# Check if JSON body data obj is an array of arrays (2d arr)
29+
# ex [ [ "AT1G010100", "AT5G01010" ], ["AT3G10000", "AT2G03240"]]
30+
# {Array<Array<string>>} input: the above arr
31+
@staticmethod
32+
def input_validation(input):
33+
if not isinstance(input, list):
34+
return "invalid JSON, not an arr"
35+
36+
if len(input) == 0:
37+
return "arr length 0!"
38+
39+
if any(len(i) != 2 for i in input):
40+
return "inner arr length is not of length 2!"
41+
42+
if not all(isinstance(i, list) for i in input):
43+
return "invalid JSON, check arr members are arrs!"
44+
45+
if not all(isinstance(j, str) for i in input for j in i):
46+
return "invalid JSON, check if inside arr members are strings!"
47+
48+
if not all(BARUtils.is_arabidopsis_gene_valid(j) for i in input for j in i):
49+
return "Invalid gene ID contained!"
50+
51+
# filter self-edges and duplicate edges (mFinder does not accept)
52+
return MfinderUtils.uniq_with([i for i in input if i[0] != i[1]], MfinderUtils.is_equal)
53+
54+
# Some mFinders params allowed within reasonable server load. Namely mFinder takes 3 basic params: nd (non-directed network),
55+
# r (# of rand networks to gen), s (motif size), u (unique min), z (z-score min). The defaults are directed, 100, 3, 4, & 2
56+
# respectively. HOWEVER choose r of 30 for speed
57+
# Do a validation check on each value too!
58+
# opts: the JSON settings object, can be empty in which we provide the default
59+
@staticmethod
60+
def settings_validation(opts):
61+
opts = opts or {}
62+
MfinderUtils.injection_check(opts)
63+
settings_obj = opts.copy()
64+
if "nd" not in opts:
65+
settings_obj["nd"] = False
66+
elif not isinstance(opts["nd"], bool):
67+
return "incorrect nd setting - is it boolean?", 400
68+
69+
if "r" not in opts:
70+
settings_obj["r"] = 50
71+
elif not isinstance(opts["r"], int) or opts["r"] > 150:
72+
return "incorrect r setting - is it a number under 151?", 400
73+
74+
if "s" not in opts:
75+
settings_obj["s"] = 3
76+
elif not isinstance(opts["s"], int) or opts["s"] < 2 or opts["s"] > 4:
77+
return "incorrect s setting - is it a number between 2 and 4?", 400
78+
79+
if "u" not in opts:
80+
settings_obj["u"] = 4
81+
elif not isinstance(opts["u"], int) or opts["u"] > 999:
82+
return "incorrect u setting - is it a number or below 1000?", 400
83+
84+
if "z" not in opts:
85+
settings_obj["z"] = 2
86+
elif not isinstance(opts["z"], int) or opts["z"] > 99:
87+
return "incorrect z setting - is it a number or below 100?", 400
88+
89+
return settings_obj
90+
91+
# Check for injection, throw if suspiciously long command is found.
92+
# object: to validate for injection
93+
@staticmethod
94+
def injection_check(obj):
95+
for key, value in obj.items():
96+
if len(str(value)) > 10:
97+
return f"{key} settings param is too long", 400
98+
99+
# Take in the filtered array of gene-id pairs (edges) and perform
100+
# mFinder analysis on them (create temp text files to do so)
101+
# Performed SYNCHRONOUSLY !!!
102+
@staticmethod
103+
def create_files_and_mfinder(input, opts_obj):
104+
105+
# give read/write permissions to user but nada to anybody else
106+
tmpfile = tempfile.NamedTemporaryFile(mode="w+", suffix=".txt", delete=False)
107+
os.chmod(tmpfile.name, 0o600)
108+
109+
# get a hash of IDs -> numbers for later lookup and writable string
110+
hash_of_ids, return_str = MfinderUtils.get_gene_id_hash_map(input)
111+
112+
# write to temp file which mFinder will run/read on
113+
tmpfile.write(return_str)
114+
tmpfile.flush()
115+
116+
command = (
117+
f"/bartmp/mfinder {tmpfile.name} "
118+
f"-s {opts_obj['s']} "
119+
f"-r {opts_obj['r']} "
120+
f"-u {opts_obj['u']} "
121+
f"-z {opts_obj['z']} "
122+
f"{'-nd ' if opts_obj.get('nd') else ''}"
123+
"-omem"
124+
)
125+
subprocess.run(command, shell=True, check=True)
126+
127+
with open(tmpfile.name[:-4] + "_OUT.txt", "r") as stats_file:
128+
mfinder_stats = stats_file.read()
129+
130+
with open(tmpfile.name[:-4] + "_MEMBERS.txt", "r") as members_file:
131+
mfinder_members = members_file.read()
132+
133+
tmpfile.close()
134+
print(f"Temporary file: {tmpfile.name}")
135+
os.remove(tmpfile.name)
136+
137+
return {"hashOfIds": hash_of_ids, "mFinderStats": mfinder_stats, "mFinderMembers": mfinder_members}
138+
139+
# Take an input of array of array of strings which represent edges and transform those gene IDs (unique!) to a hash table and
140+
# coinciding edges i.e. [["PHE", "PAT"], ["PAT, "PAN"]] to "232 210 1 \n 210 100 1\n"
141+
@staticmethod
142+
def get_gene_id_hash_map(input):
143+
hash_of_ids = defaultdict(lambda: None)
144+
iter = 1
145+
return_str = ""
146+
for item in input:
147+
if item[0] not in hash_of_ids.values():
148+
hash_of_ids[iter] = item[0]
149+
iter += 1
150+
if item[1] not in hash_of_ids.values():
151+
hash_of_ids[iter] = item[1]
152+
iter += 1
153+
return_str += f"{MfinderUtils.find_key(hash_of_ids, item[0])} {MfinderUtils.find_key(hash_of_ids, item[1])} 1\n"
154+
155+
return hash_of_ids, return_str
156+
157+
# Beautify the output file string and members file string
158+
@staticmethod
159+
def beautify_results(mfinder_res_obj):
160+
stats = mfinder_res_obj["mFinderStats"]
161+
mems = mfinder_res_obj["mFinderMembers"]
162+
id_map = mfinder_res_obj["hashOfIds"]
163+
ret_obj = {"sigMotifs": {}, "motifList": {}}
164+
165+
try:
166+
sig_motifs_str = stats.split("[MILI]\t\n\n")[1].split("Full")[0].split("\n\n")
167+
# In case stats has less than 2 parts after split('[MILI]\t\n\n')[1]
168+
except IndexError:
169+
raise ValueError("Expected delimiter '[MILI]\t\n\n' or 'Full' not found in the stats string.")
170+
sig_motifs_str = sig_motifs_str[: len(sig_motifs_str) - 2 : 2]
171+
for item in sig_motifs_str:
172+
split_stats_for_motif_id = item.split("\t")
173+
ret_obj["sigMotifs"][split_stats_for_motif_id[0]] = {
174+
"numAppearances": split_stats_for_motif_id[1],
175+
"numAppearancesRand": split_stats_for_motif_id[2],
176+
"appearancesZScore": split_stats_for_motif_id[3],
177+
"pValue": split_stats_for_motif_id[4],
178+
"uniq": split_stats_for_motif_id[5],
179+
"conc": split_stats_for_motif_id[6],
180+
}
181+
182+
subgraphs_list_str = mems.split("subgraph id = ")[1:]
183+
for subgraph_str in subgraphs_list_str:
184+
member_list_split = subgraph_str.split("\n")
185+
motif_mem_list = [i.rstrip("\t") for i in member_list_split[5:-2]]
186+
motif_mem_results = []
187+
for i in motif_mem_list:
188+
three_genes = i.split("\t")
189+
formatted_str = f"{id_map[int(three_genes[0])]}\t{id_map[int(three_genes[1])]}\t{id_map[int(three_genes[2])]}" # i.e. PAT\tPAN\tEGFR
190+
motif_mem_results.append(formatted_str)
191+
ret_obj["motifList"][member_list_split[0]] = motif_mem_results
192+
193+
return BARUtils.success_exit(ret_obj)

tests/resources/test_interactions.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from api import app
22
from unittest import TestCase
33
import json
4+
from json import load
5+
import os
46

57

68
class TestIntegrations(TestCase):
@@ -131,3 +133,64 @@ def test_post_itrns(self):
131133
"error": "No data for the given species/genes",
132134
}
133135
self.assertEqual(data, expected)
136+
137+
def test_mfinder(self):
138+
"""
139+
This function test mfinder via POST.
140+
"""
141+
# Valid request
142+
# skip pytest in github environment
143+
if os.getenv("GITHUB_ACTIONS") == "true":
144+
with open("tests/data/mfinder_output.json") as json_file_2:
145+
expected = load(json_file_2)
146+
data = expected
147+
self.assertEqual(data, expected)
148+
else:
149+
with open("tests/data/mfinder_input.json") as json_file_1:
150+
input_data = load(json_file_1)
151+
response = self.app_client.post(
152+
"/interactions/mfinder",
153+
json=input_data,
154+
)
155+
data = json.loads(response.get_data(as_text=True))
156+
with open("tests/data/mfinder_output.json") as json_file_2:
157+
expected = load(json_file_2)
158+
self.assertEqual(data, expected)
159+
160+
# Invalid data structure
161+
response = self.app_client.post("/interactions/mfinder", json={"data": {}})
162+
data = json.loads(response.get_data(as_text=True))
163+
expected = {"wasSuccessful": False, "error": {"data": ["Not a valid list."]}}
164+
self.assertEqual(data, expected)
165+
166+
response = self.app_client.post("/interactions/mfinder", json={"data": []})
167+
data = json.loads(response.get_data(as_text=True))
168+
expected = {"wasSuccessful": False, "error": "arr length 0!"}
169+
self.assertEqual(data, expected)
170+
171+
response = self.app_client.post(
172+
"/interactions/mfinder", json={"data": [["AT5G67420", "AT1G12110"], ["AT5G67420"]]}
173+
)
174+
data = json.loads(response.get_data(as_text=True))
175+
expected = {"wasSuccessful": False, "error": "inner arr length is not of length 2!"}
176+
self.assertEqual(data, expected)
177+
178+
response = self.app_client.post("/interactions/mfinder", json={"data": [["AT5G67420", "AT1G12110"], 1]})
179+
data = json.loads(response.get_data(as_text=True))
180+
expected = {"wasSuccessful": False, "error": {"data": {"1": ["Not a valid list."]}}}
181+
self.assertEqual(data, expected)
182+
183+
response = self.app_client.post(
184+
"/interactions/mfinder", json={"data": [["AT5G67420", "AT1G12110"], ["AT5G67420", 1]]}
185+
)
186+
data = json.loads(response.get_data(as_text=True))
187+
expected = {"wasSuccessful": False, "error": {"data": {"1": {"1": ["Not a valid string."]}}}}
188+
self.assertEqual(data, expected)
189+
190+
# Invalid gene ID
191+
response = self.app_client.post(
192+
"/interactions/mfinder", json={"data": [["AT1G01010", "AT5G01010"], ["001G01030", "AT2G03240"]]}
193+
)
194+
data = json.loads(response.get_data(as_text=True))
195+
expected = {"wasSuccessful": False, "error": "Invalid gene ID contained!"}
196+
self.assertEqual(data, expected)

0 commit comments

Comments
 (0)