Skip to content

Commit 063f8a5

Browse files
Add comprehensive concurrent and multi-threaded testing for AT-102
- Create test-concurrent-stress.cpp: Sustained concurrent load testing with rapid context lifecycle, sustained inference, concurrent sequences, and memory operations stress tests - Create test-kv-cache-concurrent.cpp: Dedicated KV cache race condition testing including slot allocation/deallocation, sequence copy operations, cache clear operations, and mixed concurrent operations - Enhance test-thread-safety.cpp: Added race condition detection patterns with thread barrier synchronization, atomic counters for tracking operations, and performance monitoring for slow initializations - Update CMakeLists.txt: Added new test targets with 'stress' label and ThreadSanitizer configuration option (LLAMA_SANITIZE_THREAD) for race detection builds - Extend test_completion.py: Added high-volume concurrent request tests (32-128 requests), concurrent streaming tests, cache consistency validation, and parallel sequence processing tests These tests target critical concurrent systems: - KV cache prepare() and update() operations - Context initialization and parameter setup under concurrent access - Server task queue and slot management under high load - Backend resource allocation and cleanup patterns - Parallel batch processing with overlapping sequences Tests include proper ThreadSanitizer support via CMake option for automated race condition detection in CI/CD pipelines. Co-Authored-By: Alex Peng <[email protected]>
1 parent 661ae31 commit 063f8a5

File tree

5 files changed

+1119
-1
lines changed

5 files changed

+1119
-1
lines changed

tests/CMakeLists.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,20 @@
11
llama_add_compile_flags()
22

3+
# ThreadSanitizer configuration for race condition detection
4+
option(LLAMA_SANITIZE_THREAD "Enable ThreadSanitizer for race condition detection" OFF)
5+
6+
if (LLAMA_SANITIZE_THREAD)
7+
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
8+
add_compile_options(-fsanitize=thread -g -O1)
9+
add_link_options(-fsanitize=thread)
10+
message(STATUS "ThreadSanitizer enabled for concurrent testing")
11+
12+
set(ENV{TSAN_OPTIONS} "halt_on_error=1:second_deadlock_stack=1")
13+
else()
14+
message(WARNING "ThreadSanitizer is only supported with GCC or Clang")
15+
endif()
16+
endif()
17+
318
function(llama_build source)
419
if (DEFINED LLAMA_TEST_NAME)
520
set(TEST_TARGET ${LLAMA_TEST_NAME})
@@ -187,6 +202,10 @@ llama_build_and_test(test-regex-partial.cpp)
187202

188203
llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
189204

205+
llama_build_and_test(test-concurrent-stress.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 32 -c 512 -np 4 -t 2 LABEL "stress")
206+
207+
llama_build_and_test(test-kv-cache-concurrent.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 32 -c 1024 -np 4 -t 2 LABEL "stress")
208+
190209
# this fails on windows (github hosted runner) due to curl DLL not found (exit code 0xc0000135)
191210
if (NOT WIN32)
192211
llama_build_and_test(test-arg-parser.cpp)

0 commit comments

Comments
 (0)