Skip to content

Commit 6246745

Browse files
authored
Merge pull request #114 from SGSSGene/feat/searhchng28kstep
Feat/searhchng28kstep
2 parents 45ac222 + 94640e1 commit 6246745

34 files changed

+4708
-623
lines changed

.github/workflows/ci_on_pr.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,18 @@ jobs:
3535
- {os: ubuntu-22.04, compiler: gcc-latest-cpp23-debug-sanitize_thread}
3636
- {os: ubuntu-22.04, compiler: clang-third-latest-cpp23-release}
3737
- {os: ubuntu-22.04, compiler: clang-second-latest-cpp23-release, cmake_flags: "-DLIBSAIS_USE_OPENMP=NO"}
38-
- {os: ubuntu-22.04, compiler: clang-latest-cpp23-release, cmake_flags: "-DLIBSAIS_USE_OPENMP=NO"}
38+
# - {os: ubuntu-22.04, compiler: clang-latest-cpp23-release, cmake_flags: "-DLIBSAIS_USE_OPENMP=NO"}
3939
- {os: macos-15-intel, compiler: gcc-latest-cpp23-release, cmake_flags: "-DFMC_USE_SDSL=NO"}
40-
- {os: macos-15-intel, compiler: clang-latest-cpp23-release, cmake_flags: "-DFMC_USE_SDSL=NO"}
40+
# - {os: macos-15-intel, compiler: clang-latest-cpp23-release, cmake_flags: "-DFMC_USE_SDSL=NO"}
4141
- {os: macos-14, compiler: gcc-latest-cpp23-release, cmake_flags: "-DFMC_USE_SDSL=NO"}
42-
- {os: macos-14, compiler: clang-latest-cpp23-release, cmake_flags: "-DFMC_USE_SDSL=NO"}
42+
# - {os: macos-14, compiler: clang-latest-cpp23-release, cmake_flags: "-DFMC_USE_SDSL=NO"}
4343
- {os: windows-2022, compiler: msvc-cpp23-release, cmake_flags: "-DFMC_USE_SDSL=NO"}
4444
# not running any tests, since it takes to long
4545
- {os: windows-2022, compiler: msvc-cpp23-debug, cmake_flags: "-DFMC_USE_SDSL=NO"}
4646
- {os: ubuntu-22.04, compiler: emscripten-cpp23-release, cmake_flags: "-DCMAKE_EXE_LINKER_FLAGS='-sALLOW_MEMORY_GROWTH=1 -sSTACK_SIZE=16000000 -sMEMORY64'", cmake_cxx_flags: "-Wno-c++11-narrowing -sMEMORY64", cmake_c_flags: "-sMEMORY64"}
4747
steps:
4848
- name: Standard IV-project testing
49-
uses: iv-project/IVaction@v11.1
49+
uses: iv-project/IVaction@v12.1
5050
with:
5151
compiler: ${{ matrix.compiler }}
5252
threads: 2

.github/workflows/cron_daily.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
- {os: ubuntu-22.04, compiler: check_tag-open_issue}
2828
steps:
2929
- name: Standard IV-project testing
30-
uses: iv-project/IVaction@v11.1
30+
uses: iv-project/IVaction@v12.1
3131
with:
3232
compiler: ${{ matrix.compiler }}
3333
threads: 2

.github/workflows/cron_weekly.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
- {os: ubuntu-22.04, compiler: gcc-latest-cpp23-release-open_issue}
2929
steps:
3030
- name: Standard IV-project testing
31-
uses: iv-project/IVaction@v11.1
31+
uses: iv-project/IVaction@v12.1
3232
with:
3333
compiler: ${{ matrix.compiler }}
3434
threads: 2

cpm.dependencies

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
{
4747
"if": "PROJECT_IS_TOP_LEVEL",
4848
"name": "Catch2",
49-
"version": "3.11.0",
49+
"version": "3.13.0",
5050
"github_repository": "catchorg/Catch2"
5151
},
5252
{

src/fmindex-collection/DenseVector.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ struct DenseVector {
199199

200200
template <typename Archive>
201201
void serialize(this auto&& self, Archive& ar) {
202-
ar(self.data, self.bitCount, self.bits);
202+
ar(self.data, self.bitCount, self.bits, self.largestValue, self.commonDivisor);
203203
}
204204

205205
};

src/fmindex-collection/fmindex/BiFMIndexCursor.h

Lines changed: 109 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ struct BiFMIndexCursor {
1515
static constexpr size_t Sigma = Index::Sigma;
1616
static constexpr bool Reversed = false;
1717

18+
constexpr bool static HasDualRank = requires(Index::String str, size_t idx) {
19+
{ str.all_ranks_dual(idx, idx, [](size_t, size_t, size_t, size_t, size_t) {}) };
20+
};
21+
1822
Index const* index{};
1923
size_t lb;
2024
size_t lbRev;
@@ -42,57 +46,65 @@ struct BiFMIndexCursor {
4246
size_t count() const {
4347
return len;
4448
}
45-
auto extendLeft() const -> std::array<BiFMIndexCursor, Sigma> {
49+
50+
auto fetchRightBwt() const -> auto const& {
51+
if constexpr (std::same_as<typename Index::RevBwtType, std::nullptr_t>) {
52+
return index->bwt;
53+
} else {
54+
return index->bwtRev;
55+
}
56+
}
57+
58+
auto extendLeft() const -> std::array<BiFMIndexCursor, Sigma> requires (!HasDualRank) {
59+
auto cursors = std::array<BiFMIndexCursor, Sigma>{};
60+
4661
auto const& bwt = index->bwt;
4762
auto [rs1, prs1] = bwt.all_ranks_and_prefix_ranks(lb);
4863
auto [rs2, prs2] = bwt.all_ranks_and_prefix_ranks(lb+len);
4964

50-
for (size_t i{0}; i < rs1.size(); ++i) {
51-
rs1[i] += index->C[i];
52-
rs2[i] += index->C[i];
65+
for (size_t i{0}; i < Sigma; ++i) {
66+
cursors[i] = BiFMIndexCursor{*index, rs1[i] + index->C[i], lbRev + prs2[i] - prs1[i], rs2[i] - rs1[i], steps+1};
5367
}
68+
return cursors;
69+
}
5470

71+
auto extendRight() const -> std::array<BiFMIndexCursor, Sigma> requires (!HasDualRank) {
5572
auto cursors = std::array<BiFMIndexCursor, Sigma>{};
73+
74+
auto const& bwt = fetchRightBwt();
75+
auto [rs1, prs1] = bwt.all_ranks_and_prefix_ranks(lbRev);
76+
auto [rs2, prs2] = bwt.all_ranks_and_prefix_ranks(lbRev+len);
77+
5678
for (size_t i{0}; i < Sigma; ++i) {
57-
cursors[i] = BiFMIndexCursor{*index, rs1[i], lbRev + prs2[i] - prs1[i], rs2[i] - rs1[i], steps+1};
79+
cursors[i] = BiFMIndexCursor{*index, lb + prs2[i] - prs1[i], rs1[i] + index->C[i], rs2[i] - rs1[i], steps+1};
5880
}
5981
return cursors;
6082
}
6183

62-
auto extendRight() const -> std::array<BiFMIndexCursor, Sigma> {
63-
// Reuse bwt if bwtrev is not available
64-
if constexpr (std::same_as<typename Index::RevBwtType, std::nullptr_t>) {
65-
auto const& bwt = index->bwt;
66-
auto [rs1, prs1] = bwt.all_ranks_and_prefix_ranks(lbRev);
67-
auto [rs2, prs2] = bwt.all_ranks_and_prefix_ranks(lbRev+len);
68-
69-
for (size_t i{0}; i < rs1.size(); ++i) {
70-
rs1[i] += index->C[i];
71-
rs2[i] += index->C[i];
72-
}
73-
74-
auto cursors = std::array<BiFMIndexCursor, Sigma>{};
75-
for (size_t i{0}; i < Sigma; ++i) {
76-
cursors[i] = BiFMIndexCursor{*index, lb + prs2[i] - prs1[i], rs1[i], rs2[i] - rs1[i], steps+1};
77-
}
78-
return cursors;
79-
} else {
80-
auto const& bwt = index->bwtRev;
81-
auto [rs1, prs1] = bwt.all_ranks_and_prefix_ranks(lbRev);
82-
auto [rs2, prs2] = bwt.all_ranks_and_prefix_ranks(lbRev+len);
84+
auto extendLeft() const -> std::array<BiFMIndexCursor, Sigma> requires HasDualRank {
85+
auto ret = std::array<BiFMIndexCursor, Sigma>{};
86+
auto& bwt = index->bwt;
87+
bwt.all_ranks_dual(lb, lb+len, [&](size_t symb, size_t rs1, size_t rs2, size_t prs1, size_t prs2) {
88+
auto newLb = index->C[symb] + rs1;
89+
auto newLen = rs2 - rs1;
90+
auto newLbRev = lbRev + prs2 - prs1;
91+
ret[symb] = BiFMIndexCursor{*index, newLb, newLbRev, newLen, steps+1};
92+
});
93+
return ret;
94+
}
95+
auto extendRight() const -> std::array<BiFMIndexCursor, Sigma> requires HasDualRank {
96+
auto ret = std::array<BiFMIndexCursor, Sigma>{};
97+
auto& bwt = fetchRightBwt();
98+
bwt.all_ranks_dual(lbRev, lbRev+len, [&](size_t symb, size_t rs1, size_t rs2, size_t prs1, size_t prs2) {
99+
auto newLbRev = index->C[symb] + rs1;
100+
auto newLen = rs2 - rs1;
101+
auto newLb = lb + prs2 - prs1;
102+
ret[symb] = BiFMIndexCursor{*index, newLb, newLbRev, newLen, steps+1};
103+
});
104+
return ret;
105+
}
83106

84-
for (size_t i{0}; i < rs1.size(); ++i) {
85-
rs1[i] += index->C[i];
86-
rs2[i] += index->C[i];
87-
}
88107

89-
auto cursors = std::array<BiFMIndexCursor, Sigma>{};
90-
for (size_t i{0}; i < Sigma; ++i) {
91-
cursors[i] = BiFMIndexCursor{*index, lb + prs2[i] - prs1[i], rs1[i], rs2[i] - rs1[i], steps+1};
92-
}
93-
return cursors;
94-
}
95-
}
96108
void prefetchLeft() const {
97109
}
98110
void prefetchRight() const {
@@ -107,23 +119,64 @@ struct BiFMIndexCursor {
107119
return newCursor;
108120
}
109121
auto extendRight(size_t symb) const -> BiFMIndexCursor {
110-
if constexpr (std::same_as<typename Index::RevBwtType, std::nullptr_t>) {
111-
auto const& bwt = index->bwt;
112-
size_t newLb = lb + bwt.prefix_rank(lbRev+len, symb) - bwt.prefix_rank(lbRev, symb);
113-
size_t newLbRev = bwt.rank(lbRev, symb);
114-
size_t newLen = bwt.rank(lbRev+len, symb) - newLbRev;
115-
auto newCursor = BiFMIndexCursor{*index, newLb, newLbRev + index->C[symb], newLen, steps+1};
116-
return newCursor;
117-
} else {
118-
auto const& bwt = index->bwtRev;
119-
size_t newLb = lb + bwt.prefix_rank(lbRev+len, symb) - bwt.prefix_rank(lbRev, symb);
120-
size_t newLbRev = bwt.rank(lbRev, symb);
121-
size_t newLen = bwt.rank(lbRev+len, symb) - newLbRev;
122-
auto newCursor = BiFMIndexCursor{*index, newLb, newLbRev + index->C[symb], newLen, steps+1};
123-
return newCursor;
124-
}
122+
auto const& bwt = fetchRightBwt();
123+
size_t newLb = lb + bwt.prefix_rank(lbRev+len, symb) - bwt.prefix_rank(lbRev, symb);
124+
size_t newLbRev = bwt.rank(lbRev, symb);
125+
size_t newLen = bwt.rank(lbRev+len, symb) - newLbRev;
126+
auto newCursor = BiFMIndexCursor{*index, newLb, newLbRev + index->C[symb], newLen, steps+1};
127+
return newCursor;
128+
}
129+
130+
// This requires that all rows have the same BWT entry (or only a single one is available)
131+
// - must have at least marked a single row
132+
// - all rows must have the same 'bwt' symbol
133+
auto extendLeftBySymbol(size_t symb) const -> BiFMIndexCursor {
134+
auto& bwt = index->bwt;
135+
136+
assert(count() > 0);
137+
assert([&]() {
138+
for (size_t i{lb}; i < lb + len; ++i) {
139+
if (symb != bwt.symbol(i)) {
140+
return false;
141+
}
142+
}
143+
return true;
144+
}());
145+
146+
size_t newLb = bwt.rank(lb, symb);
147+
size_t newLbRev = lbRev;
148+
size_t newLen = len;
149+
auto newCursor = BiFMIndexCursor{*index, newLb + index->C[symb], newLbRev, newLen, steps+1};
150+
return newCursor;
125151
}
126152

153+
// see extendLeftBySymbol
154+
auto extendRightBySymbol(size_t symb) const -> BiFMIndexCursor {
155+
auto const& bwt = fetchRightBwt();
156+
size_t newLb = lb;
157+
size_t newLbRev = bwt.rank(lbRev, symb);
158+
size_t newLen = len;
159+
auto newCursor = BiFMIndexCursor{*index, newLb, newLbRev + index->C[symb], newLen, steps+1};
160+
return newCursor;
161+
}
162+
163+
// This requires that all rows have the same BWT entry (or only a single one is available)
164+
// - must have at least marked a single row
165+
// - all rows must have the same 'bwt' symbol
166+
auto extendLeftBySymbol() const -> std::tuple<size_t, BiFMIndexCursor> {
167+
auto& bwt = index->bwt;
168+
auto symb = bwt.symbol(lb);
169+
return {symb, extendLeftBySymbol(symb)};
170+
}
171+
172+
// see extendLeftBySymbol
173+
auto extendRightBySymbol() const -> std::tuple<size_t, BiFMIndexCursor> {
174+
auto const& bwt = fetchRightBwt();
175+
auto symb = bwt.symbol(lbRev);
176+
return {symb, extendRightBySymbol(symb)};
177+
}
178+
179+
127180
auto symbolLeft() const -> size_t {
128181
return index->bwt.symbol(lb);
129182
}
@@ -180,19 +233,14 @@ struct LeftBiFMIndexCursor {
180233
return len;
181234
}
182235
auto extendLeft() const -> std::array<LeftBiFMIndexCursor, Sigma> {
236+
auto cursors = std::array<LeftBiFMIndexCursor, Sigma>{};
237+
183238
auto const& bwt = index->bwt;
184239
auto [rs1, prs1] = bwt.all_ranks_and_prefix_ranks(lb);
185240
auto [rs2, prs2] = bwt.all_ranks_and_prefix_ranks(lb+len);
186241

187-
for (size_t i{0}; i < rs1.size(); ++i) {
188-
rs1[i] += index->C[i];
189-
rs2[i] += index->C[i];
190-
}
191-
192-
auto cursors = std::array<LeftBiFMIndexCursor, Sigma>{};
193-
cursors[0] = LeftBiFMIndexCursor{*index, rs1[0], rs2[0] - rs1[0], steps+1};
194-
for (size_t i{1}; i < Sigma; ++i) {
195-
cursors[i] = LeftBiFMIndexCursor{*index, rs1[i], rs2[i] - rs1[i], steps+1};
242+
for (size_t i{0}; i < Sigma; ++i) {
243+
cursors[i] = LeftBiFMIndexCursor{*index, rs1[i] + index->C[i], rs2[i] - rs1[i], steps+1};
196244
}
197245
return cursors;
198246
}

0 commit comments

Comments
 (0)