Update rust, transformers, optimum, onnxruntime, onnx, loguru, pytest

njhill · njhill · commit 36052c1d8b1e · 2023-10-13T15:15:25.000-07:00
Rust 1.32.1
transformers 4.33.2
optimum 1.13.2
onnxruntime 1.16.0
onnx 1.14.1
loguru 0.7.2
pytest 7.4.2
diff --git a/Dockerfile b/Dockerfile
@@ -88,7 +88,7 @@ ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
 
 ## Rust builder ################################################################
 # Specific debian version so that compatible glibc version is used
-FROM rust:1.72-bullseye as rust-builder
+FROM rust:1.72.1-bullseye as rust-builder
 ARG PROTOC_VERSION
 
 ENV CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@@ -255,7 +255,7 @@ COPY proto proto
 COPY server server
 RUN cd server && make gen-server && pip install ".[accelerate, onnx-gpu]" --no-cache-dir
 
-# Patch codegen model changes into transformers 4.31
+# Patch codegen model changes into transformers 4.33.2
 RUN cp server/transformers_patch/modeling_codegen.py \
        /opt/miniconda/lib/python3.*/site-packages/transformers/models/codegen/modeling_codegen.py
 
diff --git a/integration_tests/poetry.lock b/integration_tests/poetry.lock
diff --git a/integration_tests/pyproject.toml b/integration_tests/pyproject.toml
@@ -10,7 +10,7 @@ python = "^3.9"
 [tool.poetry.group.dev.dependencies]
 protobuf = "^4.24.3"
 grpcio-tools = "^1.58.0"
-pytest = "^7.4.0"
+pytest = "^7.4.2"
 pytest-asyncio = "^0.21.1"
 requests = "^2.31.0"
 pyyaml = "^6.0.1"
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
@@ -1,3 +1,3 @@
 [toolchain]
-channel = "1.72.0"
+channel = "1.72.1"
 components = ["rustfmt", "clippy"]
diff --git a/server/poetry.lock b/server/poetry.lock
diff --git a/server/pyproject.toml b/server/pyproject.toml
@@ -12,18 +12,18 @@ python = ">=3.9.0,<3.13"
 protobuf = "^4.24.3"
 grpcio = "^1.58.0"
 grpcio-reflection = "^1.58.0"
-loguru = "^0.7.0"
+loguru = "^0.7.2"
 typer = "^0.9.0"
 accelerate = { version = "0.23.0", optional = true }
-bitsandbytes = { version = "^0.41.0", optional = true }
+bitsandbytes = { version = "^0.41.1", optional = true }
 scipy = { version = "^1.11.2", optional = true }
 safetensors = "^0.3.3"
 sentencepiece = "^0.1.99"
-transformers = "4.33.1"
-optimum = { version = "1.11.0", extras = ["onnxruntime-gpu"], optional = true }
-onnxruntime = { version = "1.15.1", optional = true }
-onnxruntime-gpu = { version = "1.15.1", optional = true }
-onnx = { version = "1.14.0", optional = true }
+transformers = "4.33.2"
+optimum = { version = "1.13.2", extras = ["onnxruntime-gpu"], optional = true }
+onnxruntime = { version = "1.16.0", optional = true }
+onnxruntime-gpu = { version = "1.16.0", optional = true }
+onnx = { version = "1.14.1", optional = true }
 einops = "^0.7.0rc2"
 
 # Explicitly install some transitive dependencies to avoid CVEs
@@ -40,7 +40,7 @@ onnx-gpu = ["optimum", "onnxruntime-gpu", "onnx"]
 
 [tool.poetry.group.dev.dependencies]
 grpcio-tools = "^1.58.0"
-pytest = "^7.4.0"
+pytest = "^7.4.2"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
diff --git a/server/transformers_patch/modeling_codegen.py b/server/transformers_patch/modeling_codegen.py
@@ -463,6 +463,7 @@ def forward(
         if input_ids is not None and inputs_embeds is not None:
             raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
         elif input_ids is not None:
+            self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
             input_shape = input_ids.size()
             input_ids = input_ids.view(-1, input_shape[-1])
             batch_size = input_ids.shape[0]