Skip to content

Commit d733056

Browse files
atrivedi-tsavoritesiAshish Trivedi
andauthored
@FIR-754: Added all parameter parsing for the llama-cli (#18)
* @FIR-754: Added all parameter parsing for the llama-cli The test results are as follows Model Response cd /usr/bin/tsi/v0.1.1.tsv31_06_06_2025/bin/; ./run_llama_cli.sh "My cat's name" " 50 tinyllama-vo-5m-para.gguf tSavorite 1.5 1024 50 0.9 5 12288 0.0 [2018-03-09 13:03:17.788243] 271:272 [[32m info[m] :: </proj/work/mmankali/bld-setuptest/tsirel-31/tsi_yocto_workspace/tsi-apc-manager/platform/rsm_mgr/rsm_process_req.c:129> TXE resource allocation request processed successfully. My cat's name was Tim. He loved to play with his toy car. He would run and jump in the park, making loud noises. Tim was very happy with his new toy car. One day, Tim's mom said, "Tim. You llama_perf_sampler_print: sampling time = 999.96 ms / 56 runs ( 17.86 ms per token, 56.00 tokens per second)llama_perf_context_print: load time = 1713.55 ms llama_perf_context_print: prompt eval time = 603.51 ms / 6 tokens ( 100.58 ms per token, 9.94 tokens per second) llama_perf_context_print: eval time = 7069.36 ms / 49 runs ( 144.27 ms per token, 6.93 tokens per second) llama_perf_context_print: total time = 10046.17 ms / 55 tokens [2018-03-09 13:03:28.875126] 271:272 [[32m info[m] :: </proj/work/mmankali/bld-setuptest/tsirel-31/tsi_yocto_workspace/tsi-apc-manager/platform/rsm_mgr/rsm_process_req.c:145> TXE resource release request processed successfully. GGML Tsavorite Profiling Results: ------------------------------------------------------------------------------------------------------------------------ Calls Total(ms) T/call Self(ms) Function ------------------------------------------------------------------------------------------------------------------------ 2715 2720.000 1.002 0.000 [25%] RuntimeHostShim::awaitCommandListCompletion 1740 2635.984 1.515 2635.984 └─ [24%] [ txe_silu ] 925 1379.715 1.492 1379.715 └─ [12%] [ txe_mult ] 50 74.450 1.489 74.450 └─ [ 1%] [ txe_add ] 2715 0.448 0.000 0.448 └─ [ 0%] TXE 0 Idle 1 34.000 34.000 34.000 [ 0%] RuntimeHostShim::finalize 1 16.000 16.000 1.000 [ 0%] GGML Tsavorite 1 15.000 15.000 15.000 └─ [ 0%] RuntimeHostShim::initialize 2716 0.000 0.000 0.000 [ 0%] RuntimeHostShim::allocate 9120 0.000 0.000 0.000 [ 0%] RuntimeHostShim::getShmemManager 2715 0.000 0.000 0.000 [ 0%] RuntimeHostShim::createCommandList 2715 0.000 0.000 0.000 [ 0%] RuntimeHostShim::loadBlob 2715 0.000 0.000 0.000 [ 0%] RuntimeHostShim::launchBlob 2715 0.000 0.000 0.000 [ 0%] RuntimeHostShim::addCommandToList 2715 0.000 0.000 0.000 [ 0%] RuntimeHostShim::finalizeCommandList 2715 0.000 0.000 0.000 [ 0%] RuntimeHostShim::unloadBlob 2715 0.000 0.000 0.000 [ 0%] RuntimeHostShim::deallocate ======================================================================================================================== 33558 11098.000 0.331 11098.000 [100%] TOTAL ======================================================================================================================== ⟵ Back to Form The URL used is as follows http://10.50.0.124:5003/llama-cli?model=tiny-llama&backend=tSavorite&tokens=10&prompt=My+cat%27s+name&repeat-penalty=1.5&batch-size=1024&top-k=50&top-p=0.9&last-n=5&context-length=12288&temp=0.0 * @FIR-754: Addressed review comments. --------- Co-authored-by: Ashish Trivedi <[email protected]>
1 parent 6047d7a commit d733056

File tree

1 file changed

+26
-2
lines changed

1 file changed

+26
-2
lines changed

tools/flaskIfc/flaskIfc.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,19 @@
1010
app = Flask(__name__)
1111

1212
port = '/dev/ttyUSB3'
13+
#port = '/dev/ttyUSB2'
1314
baudrate = '921600'
15+
#baudrate = '115200'
1416
exe_path = "/usr/bin/tsi/v0.1.1.tsv31_06_06_2025/bin/"
1517

18+
DEFAULT_REPEAT_PENALTY = 1.5
19+
DEFAULT_BATCH_SIZE = 1024
20+
DEFAULT_TOP_K = 50
21+
DEFAULT_TOP_P = 0.9
22+
DEFAULT_LAST_N = 5
23+
DEFAULT_CONTEXT_LENGTH = 12288
24+
DEFAULT_TEMP = 0.0
25+
1626
@app.route('/')
1727
def index():
1828
return render_template('index.html')
@@ -25,6 +35,13 @@ def llama_cli_serial_command():
2535
backend = request.args.get('backend')
2636
tokens = request.args.get('tokens')
2737
prompt = request.args.get('prompt')
38+
repeat_penalty = request.args.get('repeat-penalty', DEFAULT_REPEAT_PENALTY)
39+
batch_size = request.args.get('batch-size', DEFAULT_BATCH_SIZE)
40+
top_k = request.args.get('top-k', DEFAULT_TOP_K)
41+
top_p = request.args.get('top-p', DEFAULT_TOP_P)
42+
last_n = request.args.get('last-n', DEFAULT_LAST_N)
43+
context_length = request.args.get('context-length', DEFAULT_CONTEXT_LENGTH)
44+
temp = request.args.get('temp', DEFAULT_TEMP)
2845

2946
# Define the model path (update with actual paths)
3047
model_paths = {
@@ -51,7 +68,7 @@ def llama_cli_serial_command():
5168
# URL to Test this end point is as follows
5269
# http://10.50.30.167:5001/llama-cli?model=tiny-llama&backend=tSavorite&tokens=5&prompt=Hello+How+are+you
5370
script_path = "./run_llama_cli.sh"
54-
command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend}"
71+
command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend} {repeat_penalty} {batch_size} {top_k} {top_p} {last_n} {context_length} {temp}"
5572

5673
try:
5774
result = subprocess.run(['python3', 'serial_script.py', port, baudrate, command], capture_output=True, text=True, check=True)
@@ -167,6 +184,13 @@ def submit():
167184
backend = request.form.get('backend')
168185
tokens = request.form.get('tokens')
169186
prompt = request.form.get('prompt')
187+
repeat_penalty = request.form.get('repeat-penalty', DEFAULT_REPEAT_PENALTY)
188+
batch_size = request.form.get('batch-size', DEFAULT_BATCH_SIZE)
189+
top_k = request.form.get('top-k', DEFAULT_TOP_K)
190+
top_p = request.form.get('top-p', DEFAULT_TOP_P)
191+
last_n = request.form.get('last-n', DEFAULT_LAST_N)
192+
context_length = request.form.get('context-length', DEFAULT_CONTEXT_LENGTH)
193+
temp = request.form.get('temp', DEFAULT_TEMP)
170194

171195
# Define the model path (update with actual paths)
172196
model_paths = {
@@ -192,7 +216,7 @@ def submit():
192216
#]
193217

194218
script_path = "./run_llama_cli.sh"
195-
command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend}"
219+
command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend} {repeat_penalty} {batch_size} {top_k} {top_p} {last_n} {context_length} {temp}"
196220

197221

198222
def run_script():

0 commit comments

Comments
 (0)