Skip to content

Commit 598fade

Browse files
Merge branch 'main' into kprashanth-numpy2-test
2 parents ba5c84d + 97405fd commit 598fade

File tree

22 files changed

+842
-443
lines changed

22 files changed

+842
-443
lines changed

build.py

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
"2024.0.0", # Standalone OpenVINO
7979
"3.2.6", # DCGM version
8080
"0.5.3.post1", # vLLM version
81+
"3.12.3", # RHEL Python version
8182
)
8283
}
8384

@@ -950,7 +951,6 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
950951
libb64-devel \\
951952
gperftools-devel \\
952953
patchelf \\
953-
python3.11-devel \\
954954
python3-pip \\
955955
python3-setuptools \\
956956
rapidjson-devel \\
@@ -963,6 +963,10 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
963963
libxml2-devel \\
964964
numactl-devel \\
965965
wget
966+
"""
967+
# Requires openssl-devel to be installed first for pyenv build to be successful
968+
df += change_default_python_version_rhel(TRITON_VERSION_MAP[FLAGS.version][7])
969+
df += """
966970
967971
RUN pip3 install --upgrade pip \\
968972
&& pip3 install --upgrade \\
@@ -1389,7 +1393,29 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
13891393

13901394
# Add dependencies needed for python backend
13911395
if "python" in backends:
1392-
df += """
1396+
if target_platform() == "rhel":
1397+
df += """
1398+
# python3, python3-pip and some pip installs required for the python backend
1399+
RUN yum install -y \\
1400+
libarchive-devel \\
1401+
python3-pip \\
1402+
openssl-devel \\
1403+
readline-devel
1404+
"""
1405+
# Requires openssl-devel to be installed first for pyenv build to be successful
1406+
df += change_default_python_version_rhel(
1407+
TRITON_VERSION_MAP[FLAGS.version][7]
1408+
)
1409+
df += """
1410+
RUN pip3 install --upgrade pip \\
1411+
&& pip3 install --upgrade \\
1412+
wheel \\
1413+
setuptools \\
1414+
\"numpy<2\" \\
1415+
virtualenv
1416+
"""
1417+
else:
1418+
df += """
13931419
# python3, python3-pip and some pip installs required for the python backend
13941420
RUN apt-get update \\
13951421
&& apt-get install -y --no-install-recommends \\
@@ -1514,6 +1540,34 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
15141540
return df
15151541

15161542

1543+
def change_default_python_version_rhel(version):
1544+
df = """
1545+
# The python library version available for install via 'yum install python3.X-devel' does not
1546+
# match the version of python inside the RHEL base container. This means that python packages
1547+
# installed within the container will not be picked up by the python backend stub process pybind
1548+
# bindings. It must instead must be installed via pyenv.
1549+
ENV PYENV_ROOT=/opt/pyenv_build
1550+
RUN curl https://pyenv.run | bash
1551+
ENV PATH="${{PYENV_ROOT}}/bin:$PATH"
1552+
RUN eval "$(pyenv init -)"
1553+
RUN CONFIGURE_OPTS=\"--with-openssl=/usr/lib64\" && pyenv install {} \\
1554+
&& cp ${{PYENV_ROOT}}/versions/{}/lib/libpython3* /usr/lib64/""".format(
1555+
version, version
1556+
)
1557+
df += """
1558+
# RHEL image has several python versions. It's important
1559+
# to set the correct version, otherwise, packages that are
1560+
# pip installed will not be found during testing.
1561+
ENV PYVER={} PYTHONPATH=/opt/python/v
1562+
RUN ln -sf ${{PYENV_ROOT}}/versions/${{PYVER}}* ${{PYTHONPATH}}
1563+
ENV PYBIN=${{PYTHONPATH}}/bin
1564+
ENV PYTHON_BIN_PATH=${{PYBIN}}/python${{PYVER}} PATH=${{PYBIN}}:${{PATH}}
1565+
""".format(
1566+
version
1567+
)
1568+
return df
1569+
1570+
15171571
def create_dockerfile_windows(
15181572
ddir, dockerfile_name, argmap, backends, repoagents, caches
15191573
):
@@ -1958,6 +2012,19 @@ def backend_build(
19582012
cmake_script.mkdir(os.path.join(install_dir, "backends"))
19592013
cmake_script.rmdir(os.path.join(install_dir, "backends", be))
19602014

2015+
# The python library version available for install via 'yum install python3.X-devel' does not
2016+
# match the version of python inside the RHEL base container. This means that python packages
2017+
# installed within the container will not be picked up by the python backend stub process pybind
2018+
# bindings. It must instead must be installed via pyenv. We package it here for better usability.
2019+
if target_platform() == "rhel" and be == "python":
2020+
major_minor_version = ".".join(
2021+
(TRITON_VERSION_MAP[FLAGS.version][7]).split(".")[:2]
2022+
)
2023+
version_matched_files = "/usr/lib64/libpython" + major_minor_version + "*"
2024+
cmake_script.cp(
2025+
version_matched_files, os.path.join(repo_install_dir, "backends", be)
2026+
)
2027+
19612028
cmake_script.cpdir(
19622029
os.path.join(repo_install_dir, "backends", be),
19632030
os.path.join(install_dir, "backends"),

docs/customization_guide/tritonfrontend.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,18 @@ Note: `model_path` may need to be edited depending on your setup.
5757

5858
2. Now, to start up the respective services with `tritonfrontend`
5959
```python
60-
from tritonfrontend import KServeHttp, KServeGrpc
60+
from tritonfrontend import KServeHttp, KServeGrpc, Metrics
6161
http_options = KServeHttp.Options(thread_count=5)
6262
http_service = KServeHttp(server, http_options)
6363
http_service.start()
6464

6565
# Default options (if none provided)
6666
grpc_service = KServeGrpc(server)
6767
grpc_service.start()
68+
69+
# Can start metrics service as well
70+
metrics_service = Metrics(server)
71+
metrics_service.start()
6872
```
6973

7074
3. Finally, with running services, we can use `tritonclient` or simple `curl` commands to send requests and receive responses from the frontends.
@@ -97,6 +101,7 @@ print("[INFERENCE RESULTS]")
97101
print("Output data:", output_data)
98102

99103
# Stop respective services and server.
104+
metrics_service.stop()
100105
http_service.stop()
101106
grpc_service.stop()
102107
server.stop()
@@ -139,7 +144,6 @@ With this workflow, you can avoid having to stop each service after client reque
139144
- The following features are not currently supported when launching the Triton frontend services through the python bindings:
140145
- [Tracing](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/trace.md)
141146
- [Shared Memory](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_shared_memory.md)
142-
- [Metrics](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/metrics.md)
143147
- [Restricted Protocols](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#limit-endpoint-access-beta)
144148
- VertexAI
145149
- Sagemaker

qa/L0_python_api/test_kserve.py

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
import tritonclient.http as httpclient
3535
import tritonserver
3636
from tritonclient.utils import InferenceServerException
37-
from tritonfrontend import KServeGrpc, KServeHttp
37+
from tritonfrontend import KServeGrpc, KServeHttp, Metrics
3838

3939

4040
class TestHttpOptions:
@@ -48,7 +48,7 @@ def test_wrong_http_parameters(self):
4848
with pytest.raises(Exception):
4949
KServeHttp.Options(port=-15)
5050
with pytest.raises(Exception):
51-
KServeHttp.Options(thread_count=-5)
51+
KServeHttp.Options(thread_count=0)
5252

5353
# Wrong data type
5454
with pytest.raises(Exception):
@@ -70,6 +70,20 @@ def test_wrong_grpc_parameters(self):
7070
KServeGrpc.Options(port=-5)
7171
with pytest.raises(Exception):
7272
KServeGrpc.Options(keepalive_timeout_ms=-20_000)
73+
with pytest.raises(Exception):
74+
KServeGrpc.Options(keepalive_time_ms=-1)
75+
with pytest.raises(Exception):
76+
KServeGrpc.Options(keepalive_timeout_ms=-1)
77+
with pytest.raises(Exception):
78+
KServeGrpc.Options(http2_max_pings_without_data=-1)
79+
with pytest.raises(Exception):
80+
KServeGrpc.Options(http2_min_recv_ping_interval_without_data_ms=-1)
81+
with pytest.raises(Exception):
82+
KServeGrpc.Options(http2_max_ping_strikes=-1)
83+
with pytest.raises(Exception):
84+
KServeGrpc.Options(max_connection_age_ms=-1)
85+
with pytest.raises(Exception):
86+
KServeGrpc.Options(max_connection_age_grace_ms=-1)
7387

7488
# Wrong data type
7589
with pytest.raises(Exception):
@@ -78,8 +92,25 @@ def test_wrong_grpc_parameters(self):
7892
KServeGrpc.Options(server_key=10)
7993

8094

95+
class TestMetricsOptions:
96+
def test_correct_http_parameters(self):
97+
Metrics.Options(address="0.0.0.1", port=8080, thread_count=16)
98+
99+
def test_wrong_http_parameters(self):
100+
# Out of range
101+
with pytest.raises(Exception):
102+
Metrics.Options(port=-15)
103+
with pytest.raises(Exception):
104+
Metrics.Options(thread_count=0)
105+
106+
# Wrong data type
107+
with pytest.raises(Exception):
108+
Metrics.Options(thread_count="ten")
109+
110+
81111
HTTP_ARGS = (KServeHttp, httpclient, "localhost:8000") # Default HTTP args
82112
GRPC_ARGS = (KServeGrpc, grpcclient, "localhost:8001") # Default GRPC args
113+
METRICS_ARGS = (Metrics, "localhost:8002") # Default Metrics args
83114

84115

85116
class TestKServe:
@@ -271,6 +302,61 @@ def callback(user_data, result, error):
271302
utils.teardown_client(grpc_client)
272303
utils.teardown_server(server)
273304

305+
@pytest.mark.parametrize("frontend, url", [METRICS_ARGS])
306+
def test_metrics_default_port(self, frontend, url):
307+
server = utils.setup_server()
308+
service = utils.setup_service(server, frontend)
309+
310+
metrics_url = f"http://{url}/metrics"
311+
status_code, _ = utils.get_metrics(metrics_url)
312+
313+
assert status_code == 200
314+
315+
utils.teardown_service(service)
316+
utils.teardown_server(server)
317+
318+
@pytest.mark.parametrize("frontend", [Metrics])
319+
def test_metrics_custom_port(self, frontend, port=8005):
320+
server = utils.setup_server()
321+
service = utils.setup_service(server, frontend, Metrics.Options(port=port))
322+
323+
metrics_url = f"http://localhost:{port}/metrics"
324+
status_code, _ = utils.get_metrics(metrics_url)
325+
326+
assert status_code == 200
327+
328+
utils.teardown_service(service)
329+
utils.teardown_server(server)
330+
331+
@pytest.mark.parametrize("frontend, url", [METRICS_ARGS])
332+
def test_metrics_update(self, frontend, url):
333+
# Setup Server, KServeGrpc, Metrics
334+
server = utils.setup_server()
335+
grpc_service = utils.setup_service(
336+
server, KServeGrpc
337+
) # Needed to send inference request
338+
metrics_service = utils.setup_service(server, frontend)
339+
340+
# Get Metrics and verify inference count == 0 before inference
341+
before_status_code, before_inference_count = utils.get_metrics(
342+
f"http://{url}/metrics"
343+
)
344+
assert before_status_code == 200 and before_inference_count == 0
345+
346+
# Send 1 Inference Request with send_and_test_inference()
347+
assert utils.send_and_test_inference_identity(GRPC_ARGS[1], GRPC_ARGS[2])
348+
349+
# Get Metrics and verify inference count == 1 after inference
350+
after_status_code, after_inference_count = utils.get_metrics(
351+
f"http://{url}/metrics"
352+
)
353+
assert after_status_code == 200 and after_inference_count == 1
354+
355+
# Teardown Metrics, GrpcService, Server
356+
utils.teardown_service(grpc_service)
357+
utils.teardown_service(metrics_service)
358+
utils.teardown_server(server)
359+
274360
# KNOWN ISSUE: CAUSES SEGFAULT
275361
# Created [DLIS-7231] to address at future date
276362
# Once the server has been stopped, the underlying TRITONSERVER_Server instance

qa/L0_python_api/test_model_repository/identity/config.pbtxt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,10 @@ output [
4141
data_type: TYPE_STRING
4242
dims: [ 1 ]
4343
}
44-
]
44+
]
45+
instance_group [
46+
{
47+
count: 1
48+
kind : KIND_CPU
49+
}
50+
]

0 commit comments

Comments
 (0)