Skip to content

Commit 23801e4

Browse files
committed
Improve: Drop stack-protection in hashing on GCC
1 parent 2e5784b commit 23801e4

File tree

3 files changed

+40
-10
lines changed

3 files changed

+40
-10
lines changed

.clang-format

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,23 @@ ReflowComments: true
88
UseTab: Never
99
IndentPPDirectives: None
1010

11+
# StringZilla-specific macros
12+
# Visibility/linkage macros that act like storage class specifiers
13+
StatementAttributeLikeMacros:
14+
- SZ_PUBLIC
15+
- SZ_INTERNAL
16+
- SZ_DYNAMIC
17+
- SZ_EXTERNAL
18+
19+
# Attribute-like macros (clang-format 12+)
20+
AttributeMacros:
21+
- SZ_NO_STACK_PROTECTOR
22+
- sz_align_
23+
24+
# Macros that behave like type qualifiers in parameters
25+
TypenameMacros:
26+
- sz_at_least_
27+
1128
AlignConsecutiveAssignments: false
1229
AlignConsecutiveDeclarations: false
1330
AlignEscapedNewlines: true

include/stringzilla/hash.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ SZ_DYNAMIC void sz_sha256_state_digest(sz_sha256_state_t const *state, sz_u8_t d
279279
SZ_PUBLIC sz_u64_t sz_bytesum_serial(sz_cptr_t text, sz_size_t length);
280280

281281
/** @copydoc sz_hash */
282-
SZ_PUBLIC sz_u64_t sz_hash_serial(sz_cptr_t text, sz_size_t length, sz_u64_t seed);
282+
SZ_PUBLIC SZ_NO_STACK_PROTECTOR sz_u64_t sz_hash_serial(sz_cptr_t text, sz_size_t length, sz_u64_t seed);
283283

284284
/** @copydoc sz_fill_random */
285285
SZ_PUBLIC void sz_fill_random_serial(sz_ptr_t text, sz_size_t length, sz_u64_t nonce);
@@ -296,7 +296,7 @@ SZ_PUBLIC sz_u64_t sz_hash_state_digest_serial(sz_hash_state_t const *state);
296296
#if SZ_USE_WESTMERE
297297

298298
/** @copydoc sz_hash */
299-
SZ_PUBLIC sz_u64_t sz_hash_westmere(sz_cptr_t text, sz_size_t length, sz_u64_t seed);
299+
SZ_PUBLIC SZ_NO_STACK_PROTECTOR sz_u64_t sz_hash_westmere(sz_cptr_t text, sz_size_t length, sz_u64_t seed);
300300

301301
/** @copydoc sz_fill_random */
302302
SZ_PUBLIC void sz_fill_random_westmere(sz_ptr_t text, sz_size_t length, sz_u64_t nonce);
@@ -338,7 +338,7 @@ SZ_PUBLIC sz_u64_t sz_bytesum_haswell(sz_cptr_t text, sz_size_t length);
338338
SZ_PUBLIC sz_u64_t sz_bytesum_skylake(sz_cptr_t text, sz_size_t length);
339339

340340
/** @copydoc sz_hash */
341-
SZ_PUBLIC sz_u64_t sz_hash_skylake(sz_cptr_t text, sz_size_t length, sz_u64_t seed);
341+
SZ_PUBLIC SZ_NO_STACK_PROTECTOR sz_u64_t sz_hash_skylake(sz_cptr_t text, sz_size_t length, sz_u64_t seed);
342342

343343
/** @copydoc sz_fill_random */
344344
SZ_PUBLIC void sz_fill_random_skylake(sz_ptr_t text, sz_size_t length, sz_u64_t nonce);
@@ -360,7 +360,7 @@ SZ_PUBLIC sz_u64_t sz_hash_state_digest_skylake(sz_hash_state_t const *state);
360360
SZ_PUBLIC sz_u64_t sz_bytesum_ice(sz_cptr_t text, sz_size_t length);
361361

362362
/** @copydoc sz_hash */
363-
SZ_PUBLIC sz_u64_t sz_hash_ice(sz_cptr_t text, sz_size_t length, sz_u64_t seed);
363+
SZ_PUBLIC SZ_NO_STACK_PROTECTOR sz_u64_t sz_hash_ice(sz_cptr_t text, sz_size_t length, sz_u64_t seed);
364364

365365
/** @copydoc sz_fill_random */
366366
SZ_PUBLIC void sz_fill_random_ice(sz_ptr_t text, sz_size_t length, sz_u64_t nonce);
@@ -395,7 +395,7 @@ SZ_PUBLIC sz_u64_t sz_bytesum_neon(sz_cptr_t text, sz_size_t length);
395395
#if SZ_USE_NEON_AES
396396

397397
/** @copydoc sz_hash */
398-
SZ_PUBLIC sz_u64_t sz_hash_neon(sz_cptr_t text, sz_size_t length, sz_u64_t seed);
398+
SZ_PUBLIC SZ_NO_STACK_PROTECTOR sz_u64_t sz_hash_neon(sz_cptr_t text, sz_size_t length, sz_u64_t seed);
399399

400400
/** @copydoc sz_fill_random */
401401
SZ_PUBLIC void sz_fill_random_neon(sz_ptr_t text, sz_size_t length, sz_u64_t nonce);
@@ -824,7 +824,7 @@ SZ_INTERNAL sz_u64_t sz_hash_state_finalize_serial_(sz_hash_state_t const *state
824824
return mixed_in_register.u64s[0];
825825
}
826826

827-
SZ_PUBLIC sz_u64_t sz_hash_serial(sz_cptr_t start, sz_size_t length, sz_u64_t seed) {
827+
SZ_PUBLIC SZ_NO_STACK_PROTECTOR sz_u64_t sz_hash_serial(sz_cptr_t start, sz_size_t length, sz_u64_t seed) {
828828
if (length <= 16) {
829829
// Initialize the AES block with a given seed
830830
sz_align_(16) sz_hash_minimal_t_ state;
@@ -1328,7 +1328,7 @@ SZ_INTERNAL sz_u64_t sz_hash_state_finalize_westmere_(sz_hash_state_t const *sta
13281328
return _mm_cvtsi128_si64(mixed_in_register);
13291329
}
13301330

1331-
SZ_PUBLIC sz_u64_t sz_hash_westmere(sz_cptr_t start, sz_size_t length, sz_u64_t seed) {
1331+
SZ_PUBLIC SZ_NO_STACK_PROTECTOR sz_u64_t sz_hash_westmere(sz_cptr_t start, sz_size_t length, sz_u64_t seed) {
13321332

13331333
if (length <= 16) {
13341334
// Initialize the AES block with a given seed
@@ -2223,7 +2223,7 @@ SZ_PUBLIC void sz_hash_state_init_skylake(sz_hash_state_t *state, sz_u64_t seed)
22232223
state->ins_length = 0;
22242224
}
22252225

2226-
SZ_PUBLIC sz_u64_t sz_hash_skylake(sz_cptr_t start, sz_size_t length, sz_u64_t seed) {
2226+
SZ_PUBLIC SZ_NO_STACK_PROTECTOR sz_u64_t sz_hash_skylake(sz_cptr_t start, sz_size_t length, sz_u64_t seed) {
22272227

22282228
if (length <= 16) {
22292229
// Initialize the AES block with a given seed
@@ -2562,7 +2562,7 @@ SZ_PUBLIC sz_u64_t sz_bytesum_ice(sz_cptr_t text, sz_size_t length) {
25622562
}
25632563
}
25642564

2565-
SZ_PUBLIC sz_u64_t sz_hash_ice(sz_cptr_t start, sz_size_t length, sz_u64_t seed) {
2565+
SZ_PUBLIC SZ_NO_STACK_PROTECTOR sz_u64_t sz_hash_ice(sz_cptr_t start, sz_size_t length, sz_u64_t seed) {
25662566

25672567
// For short strings the "masked loads" are identical to Skylake-X and
25682568
// the "logic" is identical to Haswell.
@@ -3477,7 +3477,7 @@ SZ_PUBLIC sz_u64_t sz_hash_state_digest_neon(sz_hash_state_t const *state) {
34773477
}
34783478
}
34793479

3480-
SZ_PUBLIC sz_u64_t sz_hash_neon(sz_cptr_t start, sz_size_t length, sz_u64_t seed) {
3480+
SZ_PUBLIC SZ_NO_STACK_PROTECTOR sz_u64_t sz_hash_neon(sz_cptr_t start, sz_size_t length, sz_u64_t seed) {
34813481
if (length <= 16) {
34823482
// Initialize the AES block with a given seed
34833483
sz_align_(16) sz_hash_minimal_t_ state;

include/stringzilla/types.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,19 @@
172172
#define SZ_INTERNAL inline static
173173
#endif // SZ_DYNAMIC_DISPATCH
174174

175+
/**
176+
* @brief Disables stack protection for performance-critical functions.
177+
*
178+
* GCC's `-fstack-protector-strong` inserts stack canary checks for functions with local arrays
179+
* or buffers. For hash functions that use fixed-size state structures, this is unnecessary
180+
* overhead (~10 cycles per call). This macro opts out of stack protection for such functions.
181+
*/
182+
#if defined(__GNUC__) || defined(__clang__)
183+
#define SZ_NO_STACK_PROTECTOR __attribute__((no_stack_protector))
184+
#else
185+
#define SZ_NO_STACK_PROTECTOR
186+
#endif
187+
175188
/**
176189
* @brief Alignment macro for N-byte alignment.
177190
*/

0 commit comments

Comments
 (0)