Skip to content

Commit c5fd4bc

Browse files
committed
Make: Separate StringCuZilla
Separate directory for parallel algorithms
1 parent 1c1582f commit c5fd4bc

File tree

10 files changed

+83
-16
lines changed

10 files changed

+83
-16
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ status_t levenshtein_distances(
355355
cudaError_t error = cudaStreamSynchronize(stream);
356356
if (error != cudaSuccess) {
357357
if (error == cudaErrorMemoryAllocation) { return status_t::bad_alloc_k; }
358-
else { return status_t::unknown_error_k; }
358+
else { return status_t::unknown_k; }
359359
}
360360
return status_t::success_k;
361361
}
@@ -415,7 +415,7 @@ status_t needleman_wunsch_scores(
415415
cudaError_t error = cudaStreamSynchronize(stream);
416416
if (error != cudaSuccess) {
417417
if (error == cudaErrorMemoryAllocation) { return status_t::bad_alloc_k; }
418-
else { return status_t::unknown_error_k; }
418+
else { return status_t::unknown_k; }
419419
}
420420
return status_t::success_k;
421421
}
Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ SZ_INTERNAL sz_status_t _sz_levenshtein_distance_skewed_diagonals_serial( //
319319
previous_distances[0] = 0;
320320
current_distances[0] = current_distances[1] = 1;
321321

322-
// Progress through the upper triangle of the Levenshtein matrix.
322+
// Progress through the upper-left triangle of the Levenshtein matrix.
323323
sz_size_t next_diagonal_index = 2;
324324
for (; next_diagonal_index != n; ++next_diagonal_index) {
325325
sz_size_t const next_diagonal_length = next_diagonal_index + 1;
@@ -338,7 +338,7 @@ SZ_INTERNAL sz_status_t _sz_levenshtein_distance_skewed_diagonals_serial( //
338338
next_distances = temporary;
339339
}
340340

341-
// By now we've scanned through the upper triangle of the matrix, where each subsequent iteration results in a
341+
// By now we've scanned through the upper-left triangle of the matrix, where each subsequent iteration results in a
342342
// larger diagonal. From now onwards, we will be shrinking. Instead of adding value equal to the skewed diagonal
343343
// index on either side, we will be cropping those values out.
344344
sz_size_t diagonals_count = n + n - 1;
@@ -836,7 +836,7 @@ SZ_INTERNAL sz_size_t _sz_levenshtein_distance_skewed_diagonals_upto63_ice( //
836836
sz_size_t next_diagonal_index = 2;
837837
__mmask64 next_diagonal_mask = 0;
838838

839-
// Progress through the upper triangle of the Levenshtein matrix.
839+
// Progress through the upper-left triangle of the Levenshtein matrix.
840840
for (; next_diagonal_index != shorter_dim; ++next_diagonal_index) {
841841
// After this iteration, the values at offset `0` and `next_diagonal_index` in the `next_vec`
842842
// should be set to `next_diagonal_index`, but it's easier to broadcast the value to the whole vector,
@@ -869,7 +869,7 @@ SZ_INTERNAL sz_size_t _sz_levenshtein_distance_skewed_diagonals_upto63_ice( //
869869
if (_ktestz_mask64_u8(within_bound_mask, next_diagonal_mask) == 1) return bound;
870870
}
871871

872-
// Now let's handle the anti-diagonal band of the matrix, between the top and bottom triangles.
872+
// Now let's handle the anti-diagonal band of the matrix, between the top and bottom-right triangles.
873873
for (; next_diagonal_index != longer_dim; ++next_diagonal_index) {
874874
// After this iteration, the value `shorted_dim - 1` in the `next_vec`
875875
// should be set to `next_diagonal_index`, but it's easier to broadcast the value to the whole vector,
@@ -1072,7 +1072,7 @@ SZ_INTERNAL sz_status_t _sz_levenshtein_distance_skewed_diagonals_upto65k_ice( /
10721072
// - 3 diagonals of decreasing length, at positions: 6, 7, 8.
10731073
sz_size_t const diagonals_count = shorter_dim + longer_dim - 1;
10741074

1075-
// Progress through the upper triangle of the Levenshtein matrix.
1075+
// Progress through the upper-left triangle of the Levenshtein matrix.
10761076
sz_size_t next_diagonal_index = 2;
10771077
for (; next_diagonal_index != shorter_dim; ++next_diagonal_index) {
10781078
sz_size_t const next_diagonal_length = next_diagonal_index + 1;
@@ -1118,7 +1118,7 @@ SZ_INTERNAL sz_status_t _sz_levenshtein_distance_skewed_diagonals_upto65k_ice( /
11181118
next_distances = temporary;
11191119
}
11201120

1121-
// By now we've scanned through the upper triangle of the matrix, where each subsequent iteration results in a
1121+
// By now we've scanned through the upper-left triangle of the matrix, where each subsequent iteration results in a
11221122
// larger diagonal. From now onwards, we will be shrinking. Instead of adding value equal to the skewed diagonal
11231123
// index on either side, we will be cropping those values out.
11241124
for (; next_diagonal_index != diagonals_count; ++next_diagonal_index) {
@@ -1216,7 +1216,9 @@ SZ_PUBLIC sz_status_t sz_levenshtein_distance_ice( //
12161216
}
12171217

12181218
/**
1219-
* Computes the Needleman Wunsch alignment score between two strings.
1219+
* @brief Computes the Needleman-Wunsch alignment score between two strings. Uses the Wagner-Fischer algorithm
1220+
* with the AVX-512VBMI extensions, vectorizing the substitution costs in each row.
1221+
*
12201222
* The method uses 32-bit integers to accumulate the running score for every cell in the matrix.
12211223
* Assuming the costs of substitutions can be arbitrary signed 8-bit integers, the method is expected to be used
12221224
* on strings not exceeding 2^24 length or 16.7 million characters.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/**
2+
* @brief StringZilla is a collection of advanced string algorithms, designed to be used in Big Data applications.
3+
* It is generally faster than LibC, and has a broader & cleaner interface for safer @b length-bounded strings.
4+
* On modern CPUs it uses AVX2, AVX-512, NEON, SVE, & SVE2 @b SIMD instructions & provides SWAR for older CPUs.
5+
* On @b CUDA-capable GPUs it also provides C++ kernels for bulk processing.
6+
*
7+
* @file stringzilla.cuh
8+
* @author Ash Vardanian
9+
*/
10+
#ifndef STRINGZILLA_CUH_
11+
#define STRINGZILLA_CUH_
12+
13+
#include "stringzilla.h"
14+
15+
#ifdef __cplusplus
16+
extern "C" {
17+
#endif
18+
19+
SZ_DYNAMIC sz_status_t sz_levenshtein_distances_u32tape( //
20+
sz_cptr_t a_data, sz_u32_t const *a_lengths, //
21+
sz_cptr_t b_data, sz_u32_t const *b_lengths, //
22+
sz_size_t count, //
23+
sz_size_t bound, //
24+
sz_memory_allocator_t *alloc, sz_size_t *results);
25+
26+
SZ_DYNAMIC sz_status_t sz_needleman_wunsch_scores_u32tape( //
27+
sz_cptr_t a_data, sz_u32_t const *a_lengths, //
28+
sz_cptr_t b_data, sz_u32_t const *b_lengths, //
29+
sz_size_t count, //
30+
sz_error_cost_t const *subs, sz_error_cost_t gap, //
31+
sz_memory_allocator_t *alloc, sz_ssize_t *results);
32+
33+
#ifdef __cplusplus
34+
}
35+
#endif // __cplusplus
36+
37+
#endif // STRINGZILLA_CUH_

include/stringzilla/hash.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,13 @@
1313
* Why the hell do we need a yet another hashing library?!
1414
* Turns out, most existing libraries have noticeable constraints. Try finding a library that:
1515
*
16-
* - Outputs 64-bit or 128-bit hashes and passes the SMHasher test suite.
17-
* - Is fast for both short and long strings.
16+
* - Outputs 64-bit or 128-bit hashes and passes the @b SMHasher `--extra` tests.
17+
* - Is fast for both short @b (velocity) and long strings @b (throughput).
1818
* - Supports incremental @b (streaming) hashing, when the data arrives in chunks.
19-
* - Supports custom seeds hashes and secret strings for security.
20-
* - Provides dynamic dispatch for different architectures to simplify deployment.
21-
* - Uses modern SIMD, including not just AVX2 and NEON, but also AVX-512 and SVE2.
22-
* - Documents its logic and guarantees the same output across different platforms.
19+
* - Supports custom @b seeds for hashes and have it affecting every bit of the output.
20+
* - Provides @b dynamic-dispatch for different architectures to simplify deployment.
21+
* - Uses @b SIMD, including not just AVX2 & NEON, but also masking AVX-512 & predicated SVE2.
22+
* - Documents its logic and @b guarantees the same output across different platforms.
2323
*
2424
* This includes projects like "MurmurHash", "CityHash", "SpookyHash", "FarmHash", "MetroHash", "HighwayHash", etc.
2525
* There are 2 libraries that are close to meeting these requirements: "xxHash" in C++ and "aHash" in Rust:

include/stringzilla/stringzilla.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,24 @@
1212
*
1313
* @section Introduction
1414
*
15+
* StringZilla is multi-language project designed for high-throughput string processing, differentiating
16+
* the low-level "embeddable" mostly-C core implementation, containing:
17+
*
18+
* - `compare.h` - byte-level comparison functions.
19+
* - `memory.h` - copying, moving, and filling raw memory.
20+
* - `hash.h` - hash functions and checksum algorithms.
21+
* - `find.h` - searching for substrings and byte sets.
22+
* - `sort.h` - single-threaded sorting algorithms.
23+
* - `intersect.h` - intersections of unordered string sets.
24+
* - `small_string.h` - "Small String Optimization" in C 99.
25+
* - `stringzilla.h` - umbrella header for the core C API.
26+
* - `stringzilla.hpp` - umbrella header for the core C++ API.
27+
*
28+
* It also provides many higher-level algorithms, mostly implemented in C++ with OpenMP and CUDA,
29+
* also exposed via the stable C 99 ABI, but requiring C++17 and CUDA 17 compilers to build the shared libraries:
30+
*
31+
* - `similarity.hpp` - similarity measures, like Levenshtein distance, Needleman-Wunsch, & Smith-Waterman alignment.
32+
* - `features.hpp` - feature extraction for TF-IDF and other Machine Learning algorithms.
1533
*
1634
* @section Compilation Settings
1735
*
@@ -36,6 +54,8 @@
3654
* - `SZ_USE_NEON=?` - whether to use NEON instructions on ARM.
3755
* - `SZ_USE_SVE=?` - whether to use SVE instructions on ARM.
3856
* - `SZ_USE_SVE2=?` - whether to use SVE2 instructions on ARM.
57+
* - `SZ_USE_CUDA=?` -
58+
* - `SZ_USE_OPENMP=?` -
3959
*/
4060
#ifndef STRINGZILLA_H_
4161
#define STRINGZILLA_H_

include/stringzilla/types.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,12 @@ typedef enum { sz_false_k = 0, sz_true_k = 1 } sz_bool_t;
411411
*/
412412
typedef enum { sz_less_k = -1, sz_equal_k = 0, sz_greater_k = 1 } sz_ordering_t;
413413

414+
/**
415+
* @brief Describes the alignment goal for string similarity algorithms.
416+
* @sa sz_align_global_k, sz_align_local_k
417+
*/
418+
typedef enum { sz_align_global_k = 0, sz_align_local_k = 1 } sz_alignment_locality_t;
419+
414420
/**
415421
* @brief A simple signed integer type describing the status of a faulty operation.
416422
* @sa sz_success_k, sz_bad_alloc_k, sz_invalid_utf8_k, sz_contains_duplicates_k
@@ -424,6 +430,8 @@ typedef enum {
424430
sz_invalid_utf8_k = -2,
425431
/** For algorithms that take collections of unique elements, this status indicates presence of duplicates. */
426432
sz_contains_duplicates_k = -3,
433+
/** A sink-hole status for unknown errors. */
434+
sz_status_unknown_k = -4,
427435
} sz_status_t;
428436

429437
/**

include/stringzilla/types.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ enum class status_t {
115115
bad_alloc_k = sz_bad_alloc_k,
116116
invalid_utf8_k = sz_invalid_utf8_k,
117117
contains_duplicates_k = sz_contains_duplicates_k,
118-
unknown_error_k = sz_unknown_error_k,
118+
unknown_k = sz_status_unknown_k,
119119
};
120120

121121
struct uniform_substitution_cost_t {

0 commit comments

Comments
 (0)