@@ -461,68 +461,71 @@ SZ_PUBLIC void sz_memory_allocator_init_fixed(sz_memory_allocator_t *alloc, void
461461
462462#pragma region API Signature Types
463463
464- /* * @brief Signature of :: sz_hash. */
464+ /* * @brief Signature of ` sz_hash` . */
465465typedef sz_u64_t (*sz_hash_t )(sz_cptr_t , sz_size_t , sz_u64_t );
466466
467- /* * @brief Signature of :: sz_hash_state_init. */
467+ /* * @brief Signature of ` sz_hash_state_init` . */
468468typedef void (*sz_hash_state_init_t )(struct sz_hash_state_t *, sz_u64_t );
469469
470- /* * @brief Signature of :: sz_hash_state_stream. */
470+ /* * @brief Signature of ` sz_hash_state_stream` . */
471471typedef void (*sz_hash_state_stream_t )(struct sz_hash_state_t *, sz_cptr_t , sz_size_t );
472472
473- /* * @brief Signature of :: sz_hash_state_fold. */
473+ /* * @brief Signature of ` sz_hash_state_fold` . */
474474typedef sz_u64_t (*sz_hash_state_fold_t )(struct sz_hash_state_t const *);
475475
476- /* * @brief Signature of :: sz_bytesum. */
476+ /* * @brief Signature of ` sz_bytesum` . */
477477typedef sz_u64_t (*sz_bytesum_t )(sz_cptr_t , sz_size_t );
478478
479- /* * @brief Signature of :: sz_generate. */
479+ /* * @brief Signature of ` sz_generate` . */
480480typedef void (*sz_generate_t )(sz_ptr_t , sz_size_t , sz_u64_t );
481481
482- /* * @brief Signature of :: sz_equal. */
482+ /* * @brief Signature of ` sz_equal` . */
483483typedef sz_bool_t (*sz_equal_t )(sz_cptr_t , sz_cptr_t , sz_size_t );
484484
485- /* * @brief Signature of :: sz_order. */
485+ /* * @brief Signature of ` sz_order` . */
486486typedef sz_ordering_t (*sz_order_t )(sz_cptr_t , sz_size_t , sz_cptr_t , sz_size_t );
487487
488- /* * @brief Signature of ::sz_look_up_transform . */
489- typedef void (*sz_look_up_transform_t )(sz_cptr_t , sz_size_t , sz_cptr_t , sz_ptr_t );
488+ /* * @brief Signature of `sz_lookup` . */
489+ typedef void (*sz_lookup_t )(sz_cptr_t , sz_size_t , sz_cptr_t , sz_ptr_t );
490490
491- /* * @brief Signature of :: sz_move. */
491+ /* * @brief Signature of ` sz_move` . */
492492typedef void (*sz_move_t )(sz_ptr_t , sz_cptr_t , sz_size_t );
493493
494- /* * @brief Signature of :: sz_fill. */
494+ /* * @brief Signature of ` sz_fill` . */
495495typedef void (*sz_fill_t )(sz_ptr_t , sz_size_t , sz_u8_t );
496496
497- /* * @brief Signature of :: sz_find_byte. */
497+ /* * @brief Signature of ` sz_find_byte` . */
498498typedef sz_cptr_t (*sz_find_byte_t )(sz_cptr_t , sz_size_t , sz_cptr_t );
499499
500- /* * @brief Signature of :: sz_find. */
500+ /* * @brief Signature of ` sz_find` . */
501501typedef sz_cptr_t (*sz_find_t )(sz_cptr_t , sz_size_t , sz_cptr_t , sz_size_t );
502502
503- /* * @brief Signature of :: sz_find_set. */
503+ /* * @brief Signature of ` sz_find_set` . */
504504typedef sz_cptr_t (*sz_find_set_t )(sz_cptr_t , sz_size_t , sz_charset_t const *);
505505
506- /* * @brief Signature of :: sz_hamming_distance. */
507- typedef sz_size_t (*sz_hamming_distance_t )(sz_cptr_t , sz_size_t , sz_cptr_t , sz_size_t , sz_size_t );
506+ /* * @brief Signature of ` sz_hamming_distance` . */
507+ typedef sz_status_t (*sz_hamming_distance_t )(sz_cptr_t , sz_size_t , sz_cptr_t , sz_size_t , sz_size_t , sz_size_t * );
508508
509- /* * @brief Signature of ::sz_edit_distance. */
510- typedef sz_size_t (*sz_edit_distance_t )(sz_cptr_t , sz_size_t , sz_cptr_t , sz_size_t , sz_size_t , sz_memory_allocator_t *);
509+ /* * @brief Signature of `sz_levenshtein_distance`. */
510+ typedef sz_status_t (*sz_levenshtein_distance_t )(sz_cptr_t , sz_size_t , sz_cptr_t , sz_size_t , sz_size_t ,
511+ sz_memory_allocator_t *, sz_size_t *);
511512
512- /* * @brief Signature of ::sz_alignment_score . */
513- typedef sz_ssize_t (*sz_alignment_score_t )(sz_cptr_t , sz_size_t , sz_cptr_t , sz_size_t , sz_error_cost_t const *,
514- sz_error_cost_t , sz_memory_allocator_t *);
513+ /* * @brief Signature of `sz_needleman_wunsch_score` . */
514+ typedef sz_status_t (*sz_needleman_wunsch_score_t )(sz_cptr_t , sz_size_t , sz_cptr_t , sz_size_t , sz_error_cost_t const *,
515+ sz_error_cost_t , sz_memory_allocator_t *, sz_ssize_t *);
515516
516- /* * @brief Signature of ::sz_sequence_argsort. */
517- typedef sz_bool_t (*sz_sequence_argsort_t )(struct sz_sequence_t const *, sz_memory_allocator_t *, sz_sorted_idx_t *);
517+ /* * @brief Signature of `sz_sequence_argsort`. */
518+ typedef sz_status_t (*sz_sequence_argsort_t )(struct sz_sequence_t const *, sz_memory_allocator_t *, sz_sorted_idx_t *,
519+ sz_bool_t *);
518520
519- /* * @brief Signature of ::sz_pgrams_sort. */
520- typedef sz_bool_t (*sz_pgrams_sort_t )(sz_pgram_t *, sz_size_t , sz_memory_allocator_t *, sz_sorted_idx_t *);
521+ /* * @brief Signature of `sz_pgrams_sort`. */
522+ typedef sz_status_t (*sz_pgrams_sort_t )(sz_pgram_t *, sz_size_t , sz_memory_allocator_t *, sz_sorted_idx_t *,
523+ sz_bool_t *);
521524
522- /* * @brief Signature of :: sz_sequence_argsort_stable. */
525+ /* * @brief Signature of ` sz_sequence_argsort_stable` . */
523526typedef sz_sequence_argsort_t sz_sequence_argsort_stable_t ;
524527
525- /* * @brief Signature of :: sz_pgrams_sort_stable. */
528+ /* * @brief Signature of ` sz_pgrams_sort_stable` . */
526529typedef sz_pgrams_sort_t sz_pgrams_sort_stable_t ;
527530
528531#pragma endregion
@@ -683,9 +686,17 @@ SZ_INTERNAL sz_size_t _sz_export_utf8_to_utf32(sz_cptr_t utf8, sz_size_t utf8_le
683686
684687#pragma region String Sequences API
685688
686- typedef sz_cptr_t (*sz_sequence_member_start_t )(struct sz_sequence_t const *, sz_size_t );
687- typedef sz_size_t (*sz_sequence_member_length_t )(struct sz_sequence_t const *, sz_size_t );
689+ /* * @brief Signature of `sz_sequence_t::get_start` used to get the start of a member string at a given index. */
690+ typedef sz_cptr_t (*sz_sequence_member_start_t )(void const *, sz_size_t );
691+ /* * @brief Signature of `sz_sequence_t::get_length` used to get the length of a member string at a given index. */
692+ typedef sz_size_t (*sz_sequence_member_length_t )(void const *, sz_size_t );
688693
694+ /* *
695+ * @brief Structure to represent an ordered collection of strings.
696+ * It's a generic structure that can be used to represent a sequence of strings in different layouts.
697+ * It can be easily combined with Apache Arrow and its tape-like concatenated strings.
698+ * @sa sz_sequence_from_null_terminated_strings
699+ */
689700typedef struct sz_sequence_t {
690701 void const *handle;
691702 sz_size_t count;
@@ -694,20 +705,12 @@ typedef struct sz_sequence_t {
694705} sz_sequence_t ;
695706
696707/* *
697- * @brief Initiates the sequence structure from a tape layout, used by Apache Arrow.
698- * Expects ::offsets to contains `count + 1` entries, the last pointing at the end
699- * of the last string, indicating the total length of the ::tape.
700- */
701- SZ_PUBLIC void sz_sequence_from_u32tape ( //
702- sz_cptr_t *start, sz_u32_t const *offsets, sz_size_t count, sz_sequence_t *sequence);
703-
704- /* *
705- * @brief Initiates the sequence structure from a tape layout, used by Apache Arrow.
706- * Expects ::offsets to contains `count + 1` entries, the last pointing at the end
707- * of the last string, indicating the total length of the ::tape.
708+ * @brief Initiates the sequence structure from a typical C-style strings array, like `char *[]`.
709+ * @param[in] start Pointer to the array of strings.
710+ * @param[in] count Number of strings in the array.
711+ * @param[out] sequence Sequence structure to initialize.
708712 */
709- SZ_PUBLIC void sz_sequence_from_u64tape ( //
710- sz_cptr_t *start, sz_u64_t const *offsets, sz_size_t count, sz_sequence_t *sequence);
713+ SZ_PUBLIC void sz_sequence_from_null_terminated_strings (sz_cptr_t *start, sz_size_t count, sz_sequence_t *sequence);
711714
712715#pragma endregion
713716
@@ -857,7 +860,7 @@ SZ_INTERNAL sz_u32_t sz_u32_bytes_reverse(sz_u32_t val) { return __builtin_bswap
857860SZ_INTERNAL sz_u64_t sz_u64_rotl (sz_u64_t x, sz_u64_t r) { return (x << r) | (x >> (64 - r)); }
858861
859862/* *
860- * @brief Select bits from either :: a or :: b depending on the value of :: mask bits.
863+ * @brief Select bits from either @p a or @p b depending on the value of @p mask bits.
861864 *
862865 * Similar to `_mm_blend_epi16` intrinsic on x86.
863866 * Described in the "Bit Twiddling Hacks" by Sean Eron Anderson.
@@ -987,7 +990,7 @@ SZ_INTERNAL sz_size_t sz_size_log2i_nonzero(sz_size_t x) {
987990}
988991
989992/* *
990- * @brief Compute the smallest power of two greater than or equal to :: x.
993+ * @brief Compute the smallest power of two greater than or equal to @p x.
991994 */
992995SZ_INTERNAL sz_size_t sz_size_bit_ceil (sz_size_t x) {
993996 // Unlike the commonly used trick with `clz` intrinsics, is valid across the whole range of `x`.
@@ -1149,6 +1152,25 @@ SZ_PUBLIC void sz_memory_allocator_init_fixed(sz_memory_allocator_t *alloc, void
11491152 *(sz_ptr_t )buffer = *(sz_cptr_t )&length;
11501153}
11511154
1155+ SZ_PUBLIC sz_cptr_t _sz_sequence_from_null_terminated_strings_get_start (void const *handle, sz_size_t i) {
1156+ sz_cptr_t const *start = (sz_cptr_t const *)handle;
1157+ return start[i];
1158+ }
1159+
1160+ SZ_PUBLIC sz_size_t _sz_sequence_from_null_terminated_strings_get_length (void const *handle, sz_size_t i) {
1161+ sz_cptr_t const *start = (sz_cptr_t const *)handle;
1162+ sz_size_t length = 0 ;
1163+ for (sz_cptr_t ptr = start[i]; *ptr; ptr++) length++;
1164+ return length;
1165+ }
1166+
1167+ SZ_PUBLIC void sz_sequence_from_null_terminated_strings (sz_cptr_t *start, sz_size_t count, sz_sequence_t *sequence) {
1168+ sequence->handle = start;
1169+ sequence->count = count;
1170+ sequence->get_start = _sz_sequence_from_null_terminated_strings_get_start;
1171+ sequence->get_length = _sz_sequence_from_null_terminated_strings_get_length;
1172+ }
1173+
11521174#pragma endregion
11531175
11541176#ifdef __cplusplus
0 commit comments