Skip to content

Commit c488503

Browse files
Merge pull request #68 from RH-steve-grubb/add-tokenizer
Add tokenizer & upstream patch
2 parents 09b03d4 + a0ffb9b commit c488503

File tree

4 files changed

+56
-12
lines changed

4 files changed

+56
-12
lines changed

Dockerfile.redhat

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,16 @@ ARG ov_tokenizers_branch=master
227227
RUN git clone https://github.com/openvinotoolkit/openvino_tokenizers.git /openvino_tokenizers && cd /openvino_tokenizers && git checkout $ov_tokenizers_branch && git submodule update --init --recursive
228228
WORKDIR /openvino_tokenizers/build
229229
RUN cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_VERBOSE_MAKEFILE="${VERBOSE_LOGS}" -DCMAKE_CXX_FLAGS=" ${LTO_CXX_FLAGS} " -DCMAKE_SHARED_LINKER_FLAGS="${LTO_LD_FLAGS}" && cmake --build . --parallel $JOBS ; cp /openvino_tokenizers/build/src/lib*.so /opt/intel/openvino/runtime/lib/intel64/
230+
WORKDIR /openvino_tokenizers/
231+
# Install the openvino_tokenizers python bindings and use a symlink to point
232+
# to the shared object in it's final location.
233+
RUN if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then \
234+
mkdir -p /opt/intel/openvino/python/openvino_tokenizers/lib ; \
235+
cp -r python/* /opt/intel/openvino/python/ ; \
236+
cp build/python/* /opt/intel/openvino/python/openvino_tokenizers/ ; \
237+
mkdir -p /opt/intel/openvino/python/openvino_tokenizers-2025.1.dist-info ; \
238+
echo $'Metadata-Version: 1.0\nName: openvino-tokenizers\nVersion: 2025.1\nRequires-Python: >=3.9\nRequires-Dist: openvino~=2025.1.0' > /opt/intel/openvino/python/openvino_tokenizers-2025.1.dist-info/METADATA ; \
239+
ln -s /ovms/lib/libopenvino_tokenizers.so /opt/intel/openvino/python/openvino_tokenizers/lib/libopenvino_tokenizers.so ; fi
230240

231241
# Build OpenVINO Model Server
232242
WORKDIR /ovms
@@ -346,7 +356,9 @@ ARG FUZZER_BUILD=0
346356
ARG debug_bazel_flags="--strip=always --config=mp_on_py_on --//:distro=redhat"
347357
COPY --from=capi-build /ovms_release/lib/libovms_shared.so /ovms_release/lib/
348358
COPY create_package.sh /
349-
RUN ./create_package.sh
359+
RUN ./create_package.sh ; if ! [[ $debug_bazel_flags == *"_py_off"* ]]; then \
360+
echo $'#!/bin/bash\npython3 -m openvino_tokenizers.cli "$@"' > /ovms_release/bin/convert_tokenizer ; \
361+
chmod +x /ovms_release/bin/convert_tokenizer ; fi
350362

351363
# hadolint ignore=DL3059
352364
RUN chown -R ovms:ovms /ovms_release
@@ -395,6 +407,7 @@ RUN if [ -f /usr/bin/dnf ] ; then export DNF_TOOL=dnf ; echo -e "max_parallel_do
395407
useradd --home-dir /home/ovms --create-home --uid 5000 --gid 5000 --groups 39,44 --shell /bin/bash --skel /dev/null ovms
396408

397409
ENV LD_LIBRARY_PATH=/ovms/lib
410+
ENV PATH="$PATH:/ovms/bin"
398411

399412
COPY --from=pkg /ovms_release /ovms
400413
COPY --from=pkg /usr/local/lib/python3.*/site-packages/jinja2 /ovms/python_deps/jinja2

demos/c_api_minimal_app/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ BASE_OS ?= ubuntu24
2525

2626
ifeq ($(BASE_OS),ubuntu24)
2727
BASE_OS_TAG_UBUNTU ?= 24.04
28-
PACKAGE_URL ?="https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_ubuntu24.tar.gz"
28+
PACKAGE_URL ?="https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu24.tar.gz"
2929
BASE_IMAGE ?= ubuntu:$(BASE_OS_TAG_UBUNTU)
3030
DIST_OS=ubuntu
3131
endif
3232
ifeq ($(BASE_OS),redhat)
3333
BASE_OS_TAG_REDHAT ?= 9.5
34-
PACKAGE_URL ="https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_redhat.tar.gz"
34+
PACKAGE_URL ="https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_redhat.tar.gz"
3535
BASE_IMAGE ?= registry.access.redhat.com/ubi9/ubi:$(BASE_OS_TAG_REDHAT)
3636
DIST_OS=redhat
3737
endif

demos/code_local_assistant/README.md

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,41 @@ mkdir models
2121
> **Note:** The users in China need to set environment variable HF_ENDPOINT="https://hf-mirror.com" before running the export script to connect to the HF Hub.
2222
2323
Export `codellama/CodeLlama-7b-Instruct-hf`:
24+
25+
::::{tab-set}
26+
:::{tab-item} Intel GPU
2427
```console
25-
python export_model.py text_generation --source_model codellama/CodeLlama-7b-Instruct-hf --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --target_device NPU --overwrite_models
28+
python export_model.py text_generation --source_model codellama/CodeLlama-7b-Instruct-hf --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --target_device GPU --cache_size 1 --overwrite_models
2629
```
30+
:::
2731

28-
> **Note:** Use `--target_device GPU` for Intel GPU or omit this parameter to run on Intel CPU
32+
:::{tab-item} Intel NPU
33+
```console
34+
python export_model.py text_generation --source_model codellama/CodeLlama-7b-Instruct-hf --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --target_device NPU --overwrite_models
35+
```
36+
:::
37+
::::
2938

3039
## Prepare Code Completion Model
3140
For this task we need smaller, lighter model that will produce code quicker than chat task.
3241
Since we do not want to wait for the code to appear, we need to use smaller model. It should be responsive enough to generate multi-line blocks of code ahead of time as we type.
3342
Code completion works in non-streaming, unary mode. Do not use instruct model, there is no chat involved in the process.
3443

3544
Export `Qwen/Qwen2.5-Coder-1.5B`:
45+
46+
::::{tab-set}
47+
:::{tab-item} Intel GPU
48+
```console
49+
python export_model.py text_generation --source_model Qwen/Qwen2.5-Coder-1.5B --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --target_device GPU --cache_size 1 --overwrite_models
50+
```
51+
:::
52+
53+
:::{tab-item} Intel NPU
3654
```console
3755
python export_model.py text_generation --source_model Qwen/Qwen2.5-Coder-1.5B --weight-format int4 --config_file_path models/config_all.json --model_repository_path models --target_device NPU --overwrite_models
3856
```
57+
:::
58+
::::
3959

4060
Examine that workspace is set up properly `models/config_all.json`:
4161
```
@@ -105,10 +125,21 @@ ovms --rest_port 8000 --config_path ./models/config_all.json
105125
```
106126

107127
### Linux: via Docker
128+
::::{tab-set}
129+
:::{tab-item} Intel GPU
130+
```bash
131+
docker run -d --rm --device /dev/dri --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) -u $(id -u):$(id -g) \
132+
-p 8000:8000 -v $(pwd)/:/workspace/ openvino/model_server:2025.1 --rest_port 8000 --config_path /workspace/models/config_all.json
133+
```
134+
:::
135+
136+
:::{tab-item} Intel NPU
108137
```bash
109138
docker run -d --rm --device /dev/accel --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) -u $(id -u):$(id -g) \
110139
-p 8000:8000 -v $(pwd)/:/workspace/ openvino/model_server:2025.1 --rest_port 8000 --config_path /workspace/models/config_all.json
111140
```
141+
:::
142+
::::
112143

113144
## Set Up Visual Studio Code
114145

docs/deploying_server_baremetal.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ To deploy Model Server on baremetal, use pre-compiled binaries for Ubuntu22, Ubu
88
:sync: ubuntu-22-04
99
Download precompiled package (without python support):
1010
```{code} sh
11-
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_ubuntu22.tar.gz
11+
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu22.tar.gz
1212
tar -xzvf ovms_ubuntu22.tar.gz
1313
```
1414
or precompiled package (with python and LLM support):
1515
```{code} sh
16-
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_ubuntu22_python_on.tar.gz
16+
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu22_python_on.tar.gz
1717
tar -xzvf ovms_ubuntu22_python_on.tar.gz
1818
```
1919
Install required libraries:
@@ -36,12 +36,12 @@ pip3 install "Jinja2==3.1.6" "MarkupSafe==3.0.2"
3636
:sync: ubuntu-24-04
3737
Download precompiled package (without python support):
3838
```{code} sh
39-
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_ubuntu24.tar.gz
39+
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu24.tar.gz
4040
tar -xzvf ovms_ubuntu24.tar.gz
4141
```
4242
or precompiled package (with python and LLM support):
4343
```{code} sh
44-
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_ubuntu24_python_on.tar.gz
44+
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_ubuntu24_python_on.tar.gz
4545
tar -xzvf ovms_ubuntu24_python_on.tar.gz
4646
```
4747
Install required libraries:
@@ -64,12 +64,12 @@ pip3 install "Jinja2==3.1.6" "MarkupSafe==3.0.2"
6464
:sync: rhel-9.5
6565
Download precompiled package (without python support):
6666
```{code} sh
67-
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_redhat.tar.gz
67+
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_redhat.tar.gz
6868
tar -xzvf ovms_redhat.tar.gz
6969
```
7070
or precompiled package (with python and LLM support):
7171
```{code} sh
72-
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_redhat_python_on.tar.gz
72+
wget https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_redhat_python_on.tar.gz
7373
tar -xzvf ovms_redhat_python_on.tar.gz
7474
```
7575
Install required libraries:
@@ -95,7 +95,7 @@ Make sure you have [Microsoft Visual C++ Redistributable](https://aka.ms/vs/17/r
9595
Download and unpack model server archive for Windows:
9696

9797
```bat
98-
curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.0/ovms_windows.zip -o ovms.zip
98+
curl -L https://github.com/openvinotoolkit/model_server/releases/download/v2025.1/ovms_windows.zip -o ovms.zip
9999
tar -xf ovms.zip
100100
```
101101

0 commit comments

Comments
 (0)