Skip to content

Commit 3e52154

Browse files
authored
ci: Response Cache memory growth test (#8363)
1 parent b4cbfa2 commit 3e52154

File tree

2 files changed

+159
-0
lines changed

2 files changed

+159
-0
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env python3
2+
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in the
11+
# documentation and/or other materials provided with the distribution.
12+
# * Neither the name of NVIDIA CORPORATION nor the names of its
13+
# contributors may be used to endorse or promote products derived
14+
# from this software without specific prior written permission.
15+
#
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
28+
import argparse
29+
import json
30+
31+
import numpy as np
32+
33+
34+
def generate_input_data(num_inputs, batch_size, output_file):
35+
data = {"data": []}
36+
for _ in range(num_inputs):
37+
input_data = np.random.rand(batch_size, 1024).astype(np.float32)
38+
entry = {"INPUT0": input_data.flatten().tolist()}
39+
data["data"].append(entry)
40+
41+
with open(output_file, "w") as f:
42+
json.dump(data, f)
43+
44+
45+
if __name__ == "__main__":
46+
parser = argparse.ArgumentParser(
47+
description="Generate random input data for perf_analyzer."
48+
)
49+
parser.add_argument(
50+
"--num-inputs", type=int, help="Number of unique random inputs to generate."
51+
)
52+
parser.add_argument("--batch-size", type=int, help="The batch size for each input.")
53+
parser.add_argument(
54+
"--output-file", type=str, help="The name of the output JSON file."
55+
)
56+
args = parser.parse_args()
57+
58+
generate_input_data(args.num_inputs, args.batch_size, args.output_file)
59+
print(f"Successfully generated {args.num_inputs} inputs in '{args.output_file}'.")

qa/L0_response_cache/test.sh

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,106 @@ ERROR_MESSAGE="\n***\n*** Failed: Request added to cache successfully when it wa
381381
CACHE_SIZE=200
382382
test_response_cache_ensemble_model "${TEST_NAME}" "${ERROR_MESSAGE}"
383383

384+
385+
############### Response Cache Memory Growth Test ###############
386+
387+
# Set server, client and valgrind arguments
388+
LEAKCHECK=/usr/bin/valgrind
389+
MASSIF_TEST=../common/check_massif_log.py
390+
MODEL="identity_cache"
391+
LEAKCHECK_LOG="${MODEL}.valgrind.log"
392+
MASSIF_LOG="${MODEL}.valgrind.massif"
393+
GRAPH_LOG="memory_growth_${MODEL}.log"
394+
SERVER_LOG="${MODEL}.server.log"
395+
CLIENT_LOG="${MODEL}_PA.client.log"
396+
RANDOM_DATA_CLIENT_LOG="${MODEL}_random_data_script.log"
397+
RANDOM_DATA_JSON="`pwd`/random_inputs.json"
398+
RANDOM_DATA_GENERATOR="generate_random_data.py"
399+
400+
LEAKCHECK_ARGS="--tool=massif --time-unit=B --massif-out-file=$MASSIF_LOG --max-threads=3000 --log-file=$LEAKCHECK_LOG"
401+
SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --load-model=${MODEL} --cache-config=local,size=10485760" # 10MB cache
402+
403+
set +e
404+
# Generate random data for perf_analyzer requests to fill the cache and maximize cache misses
405+
python "$RANDOM_DATA_GENERATOR" --num-inputs=10000 --batch-size=1 --output-file="${RANDOM_DATA_JSON}" >> "$RANDOM_DATA_CLIENT_LOG" 2>&1
406+
if [ $? -ne 0 ]; then
407+
cat "$RANDOM_DATA_CLIENT_LOG"
408+
echo -e "\n***\n*** Failed to run ${RANDOM_DATA_GENERATOR}.\n***"
409+
RET=1
410+
exit 1
411+
else
412+
# Check if the JSON data file was generated
413+
if [ ! -f "${RANDOM_DATA_JSON}" ]; then
414+
echo -e "\n***\n*** FAILED - JSON data file was not found at the expected path: ${RANDOM_DATA_JSON}\n***"
415+
RET=1
416+
exit 1
417+
fi
418+
fi
419+
set -e
420+
421+
# Run the server
422+
run_server_leakcheck
423+
if [ "$SERVER_PID" == "0" ]; then
424+
echo -e "\n***\n*** Failed to start $SERVER\n***"
425+
cat $SERVER_LOG
426+
exit 1
427+
fi
428+
429+
430+
TEMP_RET=0
431+
REPETITION=10
432+
CONCURRENCY=20
433+
CLIENT_BS=1
434+
PERF_ANALYZER=../clients/perf_analyzer
435+
TEMP_CLIENT_LOG=temp_client.log
436+
437+
set +e
438+
SECONDS=0
439+
# Run the perf analyzer 'REPETITION' times
440+
for ((i=1; i<=$REPETITION; i++)); do
441+
# Use random data to ensure cache misses
442+
$PERF_ANALYZER -v -m $MODEL --shape=INPUT0:1024 -i grpc --concurrency-range $CONCURRENCY -b $CLIENT_BS -p 20000 --input-data="${RANDOM_DATA_JSON}" > $TEMP_CLIENT_LOG 2>&1
443+
PA_RET=$?
444+
cat $TEMP_CLIENT_LOG >> $CLIENT_LOG
445+
# Success
446+
if [ ${PA_RET} -eq 0 ]; then
447+
continue
448+
# Unstable measurement: OK for this test
449+
elif [ ${PA_RET} -eq 2 ]; then
450+
continue
451+
# Other failures unexpected, report error
452+
else
453+
echo -e "\n***\n*** perf_analyzer for $MODEL failed on iteration $i\n***" >> $CLIENT_LOG
454+
RET=1
455+
fi
456+
done
457+
TEST_DURATION=$SECONDS
458+
set -e
459+
460+
# Stop Server
461+
kill $SERVER_PID
462+
wait $SERVER_PID
463+
464+
set +e
465+
466+
# Log test duration and the graph for memory growth
467+
MAX_ALLOWED_ALLOC=2 # MB
468+
hrs=$(printf "%02d" $((TEST_DURATION / 3600)))
469+
mins=$(printf "%02d" $(((TEST_DURATION / 60) % 60)))
470+
secs=$(printf "%02d" $((TEST_DURATION % 60)))
471+
echo -e "Test Duration: $hrs:$mins:$secs (HH:MM:SS)" >> ${GRAPH_LOG}
472+
ms_print ${MASSIF_LOG} | head -n35 >> ${GRAPH_LOG}
473+
cat ${GRAPH_LOG}
474+
# Check the massif output
475+
python $MASSIF_TEST $MASSIF_LOG $MAX_ALLOWED_ALLOC --start-from-middle >> $GRAPH_LOG 2>&1
476+
if [ $? -ne 0 ]; then
477+
echo -e "\n***\n*** Memory growth test for $MODEL Failed.\n***"
478+
RET=1
479+
fi
480+
# Always output memory usage for easier triage of MAX_ALLOWED_ALLOC settings in the future
481+
grep -i "Change in memory allocation" "${GRAPH_LOG}" || true
482+
set -e
483+
384484
if [ $RET -eq 0 ]; then
385485
echo -e "\n***\n*** Test Passed\n***"
386486
else

0 commit comments

Comments
 (0)