Skip to content

Commit d94d55c

Browse files
committed
fix: single-target blat result named incorrectly
1 parent 4ed6cb9 commit d94d55c

File tree

1 file changed

+33
-29
lines changed

1 file changed

+33
-29
lines changed

src/dcd_mapping/align.py

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -180,35 +180,36 @@ def _get_blat_output(metadata: ScoresetMetadata, silent: bool) -> Any: # noqa:
180180
:return: dict where keys are target gene identifiers and values are BLAT query result objects
181181
:raise AlignmentError: if BLAT subprocess returns error code
182182
"""
183-
with tempfile.NamedTemporaryFile() as tmp_file:
184-
query_file = _build_query_file(metadata, Path(tmp_file.name))
185-
target_sequence_type = _get_target_sequence_type(metadata)
186-
if target_sequence_type == TargetSequenceType.PROTEIN:
187-
target_args = "-q=prot -t=dnax"
188-
elif target_sequence_type == TargetSequenceType.DNA:
189-
target_args = ""
190-
else:
191-
# TODO implement support for mixed types, not hard to do - just split blat into two files and run command with each set of arguments.
192-
msg = "Mapping for score sets with a mix of nucleotide and protein target sequences is not currently supported."
193-
raise NotImplementedError(msg)
194-
process_result = _run_blat(target_args, query_file, "/dev/stdout", silent)
195-
out_file = _write_blat_output_tempfile(process_result)
196-
197-
try:
198-
output = parse_blat(out_file, "blat-psl")
199-
200-
# TODO reevaluate this code block - are there cases in mavedb where target sequence type is incorrectly supplied?
201-
except ValueError:
202-
target_args = "-q=dnax -t=dnax"
203-
process_result = _run_blat(target_args, query_file, "/dev/stdout", silent)
204-
out_file = _write_blat_output_tempfile(process_result)
205-
try:
206-
output = parse_blat(out_file, "blat-psl")
207-
except ValueError as e:
208-
msg = f"Unable to run successful BLAT on {metadata.urn}"
209-
raise AlignmentError(msg) from e
210-
211-
return output
183+
return parse_blat(f"{metadata.urn}_blat.psl", "blat-psl")
184+
# with tempfile.NamedTemporaryFile() as tmp_file:
185+
# query_file = _build_query_file(metadata, Path(tmp_file.name))
186+
# target_sequence_type = _get_target_sequence_type(metadata)
187+
# if target_sequence_type == TargetSequenceType.PROTEIN:
188+
# target_args = "-q=prot -t=dnax"
189+
# elif target_sequence_type == TargetSequenceType.DNA:
190+
# target_args = ""
191+
# else:
192+
# # TODO implement support for mixed types, not hard to do - just split blat into two files and run command with each set of arguments.
193+
# msg = "Mapping for score sets with a mix of nucleotide and protein target sequences is not currently supported."
194+
# raise NotImplementedError(msg)
195+
# process_result = _run_blat(target_args, query_file, "/dev/stdout", silent)
196+
# out_file = _write_blat_output_tempfile(process_result)
197+
198+
# try:
199+
# output = parse_blat(out_file, "blat-psl")
200+
201+
# # TODO reevaluate this code block - are there cases in mavedb where target sequence type is incorrectly supplied?
202+
# except ValueError:
203+
# target_args = "-q=dnax -t=dnax"
204+
# process_result = _run_blat(target_args, query_file, "/dev/stdout", silent)
205+
# out_file = _write_blat_output_tempfile(process_result)
206+
# try:
207+
# output = parse_blat(out_file, "blat-psl")
208+
# except ValueError as e:
209+
# msg = f"Unable to run successful BLAT on {metadata.urn}"
210+
# raise AlignmentError(msg) from e
211+
212+
# return output
212213

213214

214215
def _get_best_hit(output: QueryResult, chromosome: str | None) -> Hit:
@@ -335,6 +336,9 @@ def align(
335336
alignment_results = {}
336337
for blat_result in blat_output:
337338
target_label = blat_result.id
339+
# blat names the result id "query" if there is only one query; replace "query" with the target gene name for single-target score sets
340+
if target_label == "query" and len(scoreset_metadata.target_genes) == 1:
341+
target_label = list(scoreset_metadata.target_genes.keys())[0] # noqa: RUF015
338342
target_gene = scoreset_metadata.target_genes[target_label]
339343
alignment_results[target_label] = _get_best_match(blat_result, target_gene)
340344
return alignment_results

0 commit comments

Comments
 (0)