diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e7010a..2bf2562 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ # Next +# v1.2.2 (06-25-2025) + +- Improved fix for [AVX512](https://github.com/BioRadOpenSource/ish/issues/50) without using `keep` + # v1.2.1 (06-25-2025) - Fixed bug in logging module related to update from Mojo 24.3 to 24.4 replacing `write_args`. diff --git a/ishlib/matcher/alignment/local_aln/striped.mojo b/ishlib/matcher/alignment/local_aln/striped.mojo index 0fc3fbb..9cac438 100644 --- a/ishlib/matcher/alignment/local_aln/striped.mojo +++ b/ishlib/matcher/alignment/local_aln/striped.mojo @@ -118,24 +118,29 @@ struct Profile[SIMD_U8_WIDTH: Int, SIMD_U16_WIDTH: Int]: var length = Int(score_matrix.size * segment_length) var profile = AlignedMemory[T, size, size](length) - # Generate query profile and rearrange query sequence and calculate the weight of match/mismatch var p = profile.as_span() - var t_idx = 0 + + var bias_typed = bias.cast[T]() + for nt in range(0, score_matrix.size): + var nt_base_idx = nt * segment_length + for i in range(0, segment_length): - var j = i - for segment_idx in range(0, size): - keep(t_idx) - keep(segment_idx) - p[t_idx][segment_idx] = ( - bias if j - >= len(query) else ( - score_matrix.get(nt, Int(query[j])) - + bias.cast[DType.int8]() - ).cast[DType.uint8]() - ).cast[T]() - j += segment_length - t_idx += 1 + var simd_vector = SIMD[T, size]() + + @parameter + for lane in range(size): + var query_pos = i + lane * segment_length + if query_pos < len(query): + var score = score_matrix.get(nt, Int(query[query_pos])) + simd_vector[lane] = ( + score + bias.cast[DType.int8]() + ).cast[T]() + else: + simd_vector[lane] = bias_typed + + p[nt_base_idx + i] = simd_vector + return profile @@ -294,7 +299,9 @@ fn sw[ p_vecs.init_columns(len(reference)) var max_score = UInt8(0).cast[dt]() var end_query: Int32 = query_len - 1 - var end_reference: Int32 = -1 # 0 based best alignment ending point; initialized as isn't aligned -1 + var end_reference: Int32 = ( + -1 + ) # 0 based best alignment ending point; initialized as isn't aligned -1 var segment_length = p_vecs.segment_length # Note: diff --git a/ishlib/matcher/alignment/semi_global_aln/striped.mojo b/ishlib/matcher/alignment/semi_global_aln/striped.mojo index cc6d6c8..6d6d672 100644 --- a/ishlib/matcher/alignment/semi_global_aln/striped.mojo +++ b/ishlib/matcher/alignment/semi_global_aln/striped.mojo @@ -107,24 +107,29 @@ struct Profile[ var length = Int(score_matrix.size * segment_length) var profile = AlignedMemory[T, size, size](length) - # Generate query profile and rearrange query sequence and calculate the weight of match/mismatch var p = profile.as_span() - var t_idx = 0 + + var bias_typed = bias.cast[T]() + for nt in range(0, score_matrix.size): + var nt_base_idx = nt * segment_length + for i in range(0, segment_length): - var j = i - for segment_idx in range(0, size): - keep(t_idx) - keep(segment_idx) - p[t_idx][segment_idx] = ( - bias if j - >= len(query) else ( - score_matrix.get(nt, Int(query[j])) - + bias.cast[DType.int8]() - ).cast[DType.uint8]() - ).cast[T]() - j += segment_length - t_idx += 1 + var simd_vector = SIMD[T, size]() + + @parameter + for lane in range(size): + var query_pos = i + lane * segment_length + if query_pos < len(query): + var score = score_matrix.get(nt, Int(query[query_pos])) + simd_vector[lane] = ( + score + bias.cast[DType.int8]() + ).cast[T]() + else: + simd_vector[lane] = bias_typed + + p[nt_base_idx + i] = simd_vector + return profile @@ -287,7 +292,9 @@ fn semi_global_aln[ return AlignmentResult(AlignmentEnd(0, 0, 0)) var end_query: Int32 = query_len - 1 - var end_reference: Int32 = -1 # 0 based best alignment ending point; initialized as isn't aligned -1 + var end_reference: Int32 = ( + -1 + ) # 0 based best alignment ending point; initialized as isn't aligned -1 var segment_length = (query_len + width - 1) // width var offset = (query_len - 1) % segment_length var position = (width - 1) - (query_len - 1) // segment_length @@ -413,13 +420,13 @@ fn semi_global_aln[ # Possible speedup - check if v_f has any updates to start with var break_out = False for _k in range(0, width): - var tmp = (ZERO - gap_open_penalty).cast[ - DType.int32 - ]() if free_target_start_gaps else ( - boundary[i + 1] - gap_open_penalty - ).cast[ - DType.int32 - ]() + var tmp = ( + (ZERO - gap_open_penalty) + .cast[DType.int32]() if free_target_start_gaps else ( + boundary[i + 1] - gap_open_penalty + ) + .cast[DType.int32]() + ) var tmp2 = MIN if tmp < Int32(MIN) else tmp.cast[dt]() v_f = v_f.shift_right[1]() diff --git a/pixi.toml b/pixi.toml index 569115c..417a6e4 100644 --- a/pixi.toml +++ b/pixi.toml @@ -12,7 +12,7 @@ preview = ["pixi-build"] [package] name = "ish" -version = "1.2.1" +version = "1.2.2" license = "Apache-2.0" [package.build] diff --git a/recipe.yaml b/recipe.yaml index 706fc53..380e27e 100644 --- a/recipe.yaml +++ b/recipe.yaml @@ -1,7 +1,7 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/prefix-dev/recipe-format/main/schema.json context: - version: "1.2.1" + version: "1.2.2" modular_version: "=25.4" extramojo_version: "=0.15"