Revert TypicalLogitsWarper change for now

njhill · njhill · commit a8926f68c180 · 2023-11-14T18:09:26.000-08:00
diff --git a/integration_tests/Makefile b/integration_tests/Makefile
@@ -1,6 +1,6 @@
 gen-client:
 	# Compile protos
-	pip install grpcio-tools==1.58.0 mypy-protobuf==3.4.0 'types-protobuf>=3.20.4' --no-cache-dir
+	pip install grpcio-tools==1.59.0 mypy-protobuf==3.4.0 'types-protobuf>=3.20.4' --no-cache-dir
 	mkdir text_generation_tests/pb || true
 	python -m grpc_tools.protoc -I../proto --python_out=text_generation_tests/pb \
 		--grpc_python_out=text_generation_tests/pb --mypy_out=text_generation_tests/pb ../proto/generation.proto
diff --git a/server/Makefile b/server/Makefile
@@ -3,7 +3,7 @@ include Makefile-flash-att-v2
 
 gen-server:
 	# Compile protos
-	pip install grpcio-tools==1.58.0 mypy-protobuf==3.4.0 'types-protobuf>=3.20.4' --no-cache-dir
+	pip install grpcio-tools==1.59.0 mypy-protobuf==3.4.0 'types-protobuf>=3.20.4' --no-cache-dir
 	mkdir text_generation_server/pb || true
 	python -m grpc_tools.protoc -I../proto --python_out=text_generation_server/pb \
 		--grpc_python_out=text_generation_server/pb --mypy_out=text_generation_server/pb ../proto/generate.proto
@@ -15,7 +15,7 @@ TORCH_VERSION := 2.0.0+cu118
 
 install-torch:
 	# Install specific version of torch
-	pip install ninja==1.11.1 torch==$(TORCH_VERSION) --extra-index-url $(TORCH_URL) --no-cache-dir
+	pip install ninja==1.11.1.1 torch==$(TORCH_VERSION) --extra-index-url $(TORCH_URL) --no-cache-dir
 
 install-deepspeed:
 	# Install specific version of deepspeed
diff --git a/server/text_generation_server/utils/logits_process.py b/server/text_generation_server/utils/logits_process.py
@@ -457,8 +457,8 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to
         cumulative_probs = sorted_logits.softmax(dim=-1).cumsum(dim=-1)
 
         # Remove tokens with cumulative mass above the threshold
-        last_ind = (cumulative_probs < self.mass).sum(dim=1) - 1
-        last_ind.clamp_(min=0)
+        last_ind = (cumulative_probs < self.mass).sum(dim=1)
+        last_ind.clamp_(max=sorted_scores.shape[-1] - 1)
         sorted_indices_to_remove = sorted_scores > sorted_scores.gather(1, last_ind.view(-1, 1))
         # Keep at least min_tokens_to_keep (set to min_tokens_to_keep-1 because we add the first one below)
         sorted_indices_to_remove[..., : self.min_tokens_to_keep] = 0