Skip to content

Commit 1a67064

Browse files
author
fengluo
committed
string_fastsearch.h:
format functions
1 parent 47a5d21 commit 1a67064

File tree

1 file changed

+33
-3
lines changed

1 file changed

+33
-3
lines changed

numpy/_core/src/umath/string_fastsearch.h

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,21 +33,25 @@
3333
detailed explanation. */
3434

3535
/**
36+
* @internal
3637
* @brief Mode for counting the number of occurrences of a substring
3738
*/
3839
#define FAST_COUNT 0
3940

4041
/**
42+
* @internal
4143
* @brief Mode for performing a forward search for a substring
4244
*/
4345
#define FAST_SEARCH 1
4446

4547
/**
48+
* @internal
4649
* @brief Mode for performing a reverse (backward) search for a substring
4750
*/
4851
#define FAST_RSEARCH 2
4952

5053
/**
54+
* @file_internal
5155
* @brief Defines the bloom filter width based on the size of LONG_BIT.
5256
*
5357
* This macro sets the value of `STRINGLIB_BLOOM_WIDTH` depending on the
@@ -67,6 +71,7 @@
6771
#endif
6872

6973
/**
74+
* @file_internal
7075
* @brief Adds a character to the bloom filter mask.
7176
*
7277
* This macro sets the bit in the bloom filter `mask` corresponding to the
@@ -80,6 +85,7 @@
8085
((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
8186

8287
/**
88+
* @file_internal
8389
* @brief Checks if a character is present in the bloom filter mask.
8490
*
8591
* This macro checks if the bit corresponding to the character `ch` is set
@@ -92,10 +98,8 @@
9298
#define STRINGLIB_BLOOM(mask, ch) \
9399
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
94100

95-
#define FORWARD_DIRECTION 1 ///< Defines the forward search direction
96-
#define BACKWARD_DIRECTION (-1) ///< Defines the backward search direction
97-
98101
/**
102+
* @file_internal
99103
* @brief Threshold for using memchr or wmemchr in character search.
100104
*
101105
* If the search length exceeds this value, memchr/wmemchr is used.
@@ -104,10 +108,13 @@
104108

105109

106110
/**
111+
* @internal
107112
* @brief A checked indexer for buffers of a specified character type.
108113
*
109114
* This structure provides safe indexing into a buffer with boundary checks.
110115
*
116+
* @internal
117+
*
111118
* @tparam char_type The type of characters stored in the buffer.
112119
*/
113120
template <typename char_type>
@@ -335,6 +342,7 @@ struct CheckedIndexer {
335342

336343

337344
/**
345+
* @internal
338346
* @brief Finds the first occurrence of a specified character in a
339347
* given range of a buffer.
340348
*
@@ -387,6 +395,7 @@ find_char(CheckedIndexer<char_type> s, Py_ssize_t n, char_type ch)
387395
}
388396

389397
/**
398+
* @internal
390399
* @brief Finds the last occurrence of a specified character in a
391400
* given range of a buffer.
392401
*
@@ -418,6 +427,7 @@ rfind_char(CheckedIndexer<char_type> s, Py_ssize_t n, char_type ch)
418427

419428

420429
/**
430+
* @file_internal
421431
* @brief Conditional logging for string fast search.
422432
*
423433
* Set to 1 to enable logging macros.
@@ -445,11 +455,15 @@ rfind_char(CheckedIndexer<char_type> s, Py_ssize_t n, char_type ch)
445455
#endif
446456

447457
/**
458+
* @file_internal
448459
* @brief Perform a lexicographic search for the maximal suffix in
449460
* a given string.
450461
*
451462
* This function searches through the `needle` string to find the
452463
* maximal suffix, which is essentially the largest lexicographic suffix.
464+
* Essentially this:
465+
* - max(needle[i:] for i in range(len(needle)+1))
466+
*
453467
* Additionally, it computes the period of the right half of the string.
454468
*
455469
* @param needle The string to search in.
@@ -513,6 +527,7 @@ lex_search(CheckedIndexer<char_type> needle, Py_ssize_t len_needle,
513527
}
514528

515529
/**
530+
* @file_internal
516531
* @brief Perform a critical factorization on a string.
517532
*
518533
* This function splits the input string into two parts where the local
@@ -575,32 +590,38 @@ factorize(CheckedIndexer<char_type> needle,
575590

576591

577592
/**
593+
* @file_internal
578594
* @brief Internal macro to define the shift type used in the table.
579595
*/
580596
#define SHIFT_TYPE uint8_t
581597

582598
/**
599+
* @file_internal
583600
* @brief Internal macro to define the maximum shift value.
584601
*/
585602
#define MAX_SHIFT UINT8_MAX
586603

587604

588605
/**
606+
* @file_internal
589607
* @brief Internal macro to define the number of bits for the table size.
590608
*/
591609
#define TABLE_SIZE_BITS 6u
592610

593611
/**
612+
* @file_internal
594613
* @brief Internal macro to define the table size based on TABLE_SIZE_BITS.
595614
*/
596615
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
597616

598617
/**
618+
* @file_internal
599619
* @brief Internal macro to define the table mask used for bitwise operations.
600620
*/
601621
#define TABLE_MASK (TABLE_SIZE - 1U)
602622

603623
/**
624+
* @file_internal
604625
* @brief Struct to store precomputed data for string search algorithms.
605626
*
606627
* This structure holds all the necessary precomputed values needed
@@ -621,6 +642,7 @@ struct search_prep_data {
621642

622643

623644
/**
645+
* @file_internal
624646
* @brief Preprocesses the needle (substring) for optimized string search.
625647
*
626648
* This function performs preprocessing on the given needle (substring)
@@ -695,6 +717,7 @@ preprocess(CheckedIndexer<char_type> needle, Py_ssize_t len_needle,
695717
}
696718

697719
/**
720+
* @file_internal
698721
* @brief Searches for a needle (substring) within a haystack (string)
699722
* using the Two-Way string matching algorithm.
700723
*
@@ -858,6 +881,7 @@ two_way(CheckedIndexer<char_type> haystack, Py_ssize_t len_haystack,
858881

859882

860883
/**
884+
* @file_internal
861885
* @brief Finds the first occurrence of a needle (substring) within a haystack (string).
862886
*
863887
* This function applies the two-way string matching algorithm to efficiently
@@ -884,6 +908,7 @@ two_way_find(CheckedIndexer<char_type> haystack, Py_ssize_t len_haystack,
884908

885909

886910
/**
911+
* @file_internal
887912
* @brief Counts the occurrences of a needle (substring) within a haystack (string).
888913
*
889914
* This function applies the two-way string matching algorithm to count how many
@@ -937,6 +962,7 @@ two_way_count(CheckedIndexer<char_type> haystack, Py_ssize_t len_haystack,
937962
#undef LOG_LINEUP
938963

939964
/**
965+
* @internal
940966
* @brief A function that searches for a substring `p` in the
941967
* string `s` using a bloom filter to optimize character matching.
942968
*
@@ -1022,6 +1048,7 @@ default_find(CheckedIndexer<char_type> s, Py_ssize_t n,
10221048

10231049

10241050
/**
1051+
* @internal
10251052
* @brief Performs an adaptive string search using a bloom filter and fallback
10261053
* to two-way search for large data.
10271054
*
@@ -1109,6 +1136,7 @@ adaptive_find(CheckedIndexer<char_type> s, Py_ssize_t n,
11091136

11101137

11111138
/**
1139+
* @internal
11121140
* @brief Performs a reverse Boyer-Moore string search.
11131141
*
11141142
* This function searches for the last occurrence of a pattern in a string,
@@ -1176,6 +1204,7 @@ default_rfind(CheckedIndexer<char_type> s, Py_ssize_t n,
11761204

11771205

11781206
/**
1207+
* @internal
11791208
* @brief Counts occurrences of a specified character in a given string.
11801209
*
11811210
* This function iterates through the string `s` and counts how many times
@@ -1208,6 +1237,7 @@ countchar(CheckedIndexer<char_type> s, Py_ssize_t n,
12081237

12091238

12101239
/**
1240+
* @internal
12111241
* @brief Searches for occurrences of a substring `p` in the string `s`
12121242
* using various optimized search algorithms.
12131243
*

0 commit comments

Comments
 (0)