Skip to content

Commit bf690d4

Browse files
committed
fix: improved query profile creation to fix avx512 bug without using keep
Signed-off-by: Seth Stadick <[email protected]>
1 parent 5be7699 commit bf690d4

File tree

5 files changed

+59
-41
lines changed

5 files changed

+59
-41
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11

22
# Next
33

4+
# v1.2.2 (06-25-2025)
5+
6+
- Improved fix for [AVX512](https://github.com/BioRadOpenSource/ish/issues/50) without using `keep`
7+
48
# v1.2.1 (06-25-2025)
59

610
- Fixed bug in logging module related to update from Mojo 24.3 to 24.4 replacing `write_args`.

ishlib/matcher/alignment/local_aln/striped.mojo

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -118,24 +118,29 @@ struct Profile[SIMD_U8_WIDTH: Int, SIMD_U16_WIDTH: Int]:
118118
var length = Int(score_matrix.size * segment_length)
119119
var profile = AlignedMemory[T, size, size](length)
120120

121-
# Generate query profile and rearrange query sequence and calculate the weight of match/mismatch
122121
var p = profile.as_span()
123-
var t_idx = 0
122+
123+
var bias_typed = bias.cast[T]()
124+
124125
for nt in range(0, score_matrix.size):
126+
var nt_base_idx = nt * segment_length
127+
125128
for i in range(0, segment_length):
126-
var j = i
127-
for segment_idx in range(0, size):
128-
keep(t_idx)
129-
keep(segment_idx)
130-
p[t_idx][segment_idx] = (
131-
bias if j
132-
>= len(query) else (
133-
score_matrix.get(nt, Int(query[j]))
134-
+ bias.cast[DType.int8]()
135-
).cast[DType.uint8]()
136-
).cast[T]()
137-
j += segment_length
138-
t_idx += 1
129+
var simd_vector = SIMD[T, size]()
130+
131+
@parameter
132+
for lane in range(size):
133+
var query_pos = i + lane * segment_length
134+
if query_pos < len(query):
135+
var score = score_matrix.get(nt, Int(query[query_pos]))
136+
simd_vector[lane] = (
137+
score + bias.cast[DType.int8]()
138+
).cast[T]()
139+
else:
140+
simd_vector[lane] = bias_typed
141+
142+
p[nt_base_idx + i] = simd_vector
143+
139144
return profile
140145

141146

@@ -294,7 +299,9 @@ fn sw[
294299
p_vecs.init_columns(len(reference))
295300
var max_score = UInt8(0).cast[dt]()
296301
var end_query: Int32 = query_len - 1
297-
var end_reference: Int32 = -1 # 0 based best alignment ending point; initialized as isn't aligned -1
302+
var end_reference: Int32 = (
303+
-1
304+
) # 0 based best alignment ending point; initialized as isn't aligned -1
298305
var segment_length = p_vecs.segment_length
299306

300307
# Note:

ishlib/matcher/alignment/semi_global_aln/striped.mojo

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -107,24 +107,29 @@ struct Profile[
107107
var length = Int(score_matrix.size * segment_length)
108108
var profile = AlignedMemory[T, size, size](length)
109109

110-
# Generate query profile and rearrange query sequence and calculate the weight of match/mismatch
111110
var p = profile.as_span()
112-
var t_idx = 0
111+
112+
var bias_typed = bias.cast[T]()
113+
113114
for nt in range(0, score_matrix.size):
115+
var nt_base_idx = nt * segment_length
116+
114117
for i in range(0, segment_length):
115-
var j = i
116-
for segment_idx in range(0, size):
117-
keep(t_idx)
118-
keep(segment_idx)
119-
p[t_idx][segment_idx] = (
120-
bias if j
121-
>= len(query) else (
122-
score_matrix.get(nt, Int(query[j]))
123-
+ bias.cast[DType.int8]()
124-
).cast[DType.uint8]()
125-
).cast[T]()
126-
j += segment_length
127-
t_idx += 1
118+
var simd_vector = SIMD[T, size]()
119+
120+
@parameter
121+
for lane in range(size):
122+
var query_pos = i + lane * segment_length
123+
if query_pos < len(query):
124+
var score = score_matrix.get(nt, Int(query[query_pos]))
125+
simd_vector[lane] = (
126+
score + bias.cast[DType.int8]()
127+
).cast[T]()
128+
else:
129+
simd_vector[lane] = bias_typed
130+
131+
p[nt_base_idx + i] = simd_vector
132+
128133
return profile
129134

130135

@@ -287,7 +292,9 @@ fn semi_global_aln[
287292
return AlignmentResult(AlignmentEnd(0, 0, 0))
288293

289294
var end_query: Int32 = query_len - 1
290-
var end_reference: Int32 = -1 # 0 based best alignment ending point; initialized as isn't aligned -1
295+
var end_reference: Int32 = (
296+
-1
297+
) # 0 based best alignment ending point; initialized as isn't aligned -1
291298
var segment_length = (query_len + width - 1) // width
292299
var offset = (query_len - 1) % segment_length
293300
var position = (width - 1) - (query_len - 1) // segment_length
@@ -413,13 +420,13 @@ fn semi_global_aln[
413420
# Possible speedup - check if v_f has any updates to start with
414421
var break_out = False
415422
for _k in range(0, width):
416-
var tmp = (ZERO - gap_open_penalty).cast[
417-
DType.int32
418-
]() if free_target_start_gaps else (
419-
boundary[i + 1] - gap_open_penalty
420-
).cast[
421-
DType.int32
422-
]()
423+
var tmp = (
424+
(ZERO - gap_open_penalty)
425+
.cast[DType.int32]() if free_target_start_gaps else (
426+
boundary[i + 1] - gap_open_penalty
427+
)
428+
.cast[DType.int32]()
429+
)
423430
var tmp2 = MIN if tmp < Int32(MIN) else tmp.cast[dt]()
424431

425432
v_f = v_f.shift_right[1]()

pixi.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ preview = ["pixi-build"]
1212

1313
[package]
1414
name = "ish"
15-
version = "1.2.1"
15+
version = "1.2.2"
1616
license = "Apache-2.0"
1717

1818
[package.build]

recipe.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# yaml-language-server: $schema=https://raw.githubusercontent.com/prefix-dev/recipe-format/main/schema.json
22

33
context:
4-
version: "1.2.1"
4+
version: "1.2.2"
55
modular_version: "=25.4"
66
extramojo_version: "=0.15"
77

0 commit comments

Comments
 (0)