add max context length

michaelfeil · michaelfeil · commit 396085fff33b · 2025-03-27T22:45:45.000Z
diff --git a/11-embeddings-reranker-classification-tensorrt/BEI-allenai-llama-3.1-tulu-3-8b-reward-model-fp8/README.md b/11-embeddings-reranker-classification-tensorrt/BEI-allenai-llama-3.1-tulu-3-8b-reward-model-fp8/README.md
@@ -64,7 +64,7 @@ requests.post(
     headers=headers,
     url="https://model-xxxxxx.api.baseten.co/environments/production/sync/predict",
     json={
-        "inputs": "Baseten is a fast inference provider",
+        "inputs": [["Baseten is a fast inference provider", ["classify this separately."]],
         "raw_scores": True,
         "truncate": True,
         "truncation_direction": "Right"
@@ -74,10 +74,18 @@ requests.post(
 Returns:
 ```json
 [
-  {
-    "label": "excitement",
-    "score": 0.99
-  }
+  [
+    {
+        "label": "excitement",
+        "score": 0.99
+    }
+  ],
+  [
+    {
+        "label": "excitement",
+        "score": 0.01
+    }
+  ]
 ]
 ```
 Important, this is different from the `predict` route that you usually call. (https://model-xxxxxx.api.baseten.co/environments/production/predict), it contains an additional `sync` before that.
diff --git a/11-embeddings-reranker-classification-tensorrt/BEI-baseten-example-meta-llama-3-70b-instructforsequenceclassification-fp8/README.md b/11-embeddings-reranker-classification-tensorrt/BEI-baseten-example-meta-llama-3-70b-instructforsequenceclassification-fp8/README.md
@@ -64,7 +64,7 @@ requests.post(
     headers=headers,
     url="https://model-xxxxxx.api.baseten.co/environments/production/sync/predict",
     json={
-        "inputs": "Baseten is a fast inference provider",
+        "inputs": [["Baseten is a fast inference provider", ["classify this separately."]],
         "raw_scores": True,
         "truncate": True,
         "truncation_direction": "Right"
@@ -74,10 +74,18 @@ requests.post(
 Returns:
 ```json
 [
-  {
-    "label": "excitement",
-    "score": 0.99
-  }
+  [
+    {
+        "label": "excitement",
+        "score": 0.99
+    }
+  ],
+  [
+    {
+        "label": "excitement",
+        "score": 0.01
+    }
+  ]
 ]
 ```
 Important, this is different from the `predict` route that you usually call. (https://model-xxxxxx.api.baseten.co/environments/production/predict), it contains an additional `sync` before that.
diff --git a/11-embeddings-reranker-classification-tensorrt/BEI-mixedbread-ai-mxbai-rerank-base-v2-reranker-fp8/README.md b/11-embeddings-reranker-classification-tensorrt/BEI-mixedbread-ai-mxbai-rerank-base-v2-reranker-fp8/README.md
@@ -64,7 +64,7 @@ requests.post(
     headers=headers,
     url="https://model-xxxxxx.api.baseten.co/environments/production/sync/predict",
     json={
-        "inputs": "Baseten is a fast inference provider",
+        "inputs": [["Baseten is a fast inference provider", ["classify this separately."]],
         "raw_scores": True,
         "truncate": True,
         "truncation_direction": "Right"
@@ -74,10 +74,18 @@ requests.post(
 Returns:
 ```json
 [
-  {
-    "label": "excitement",
-    "score": 0.99
-  }
+  [
+    {
+        "label": "excitement",
+        "score": 0.99
+    }
+  ],
+  [
+    {
+        "label": "excitement",
+        "score": 0.01
+    }
+  ]
 ]
 ```
 Important, this is different from the `predict` route that you usually call. (https://model-xxxxxx.api.baseten.co/environments/production/predict), it contains an additional `sync` before that.
diff --git a/11-embeddings-reranker-classification-tensorrt/BEI-mixedbread-ai-mxbai-rerank-large-v2-reranker-fp8/README.md b/11-embeddings-reranker-classification-tensorrt/BEI-mixedbread-ai-mxbai-rerank-large-v2-reranker-fp8/README.md
@@ -64,7 +64,7 @@ requests.post(
     headers=headers,
     url="https://model-xxxxxx.api.baseten.co/environments/production/sync/predict",
     json={
-        "inputs": "Baseten is a fast inference provider",
+        "inputs": [["Baseten is a fast inference provider", ["classify this separately."]],
         "raw_scores": True,
         "truncate": True,
         "truncation_direction": "Right"
@@ -74,10 +74,18 @@ requests.post(
 Returns:
 ```json
 [
-  {
-    "label": "excitement",
-    "score": 0.99
-  }
+  [
+    {
+        "label": "excitement",
+        "score": 0.99
+    }
+  ],
+  [
+    {
+        "label": "excitement",
+        "score": 0.01
+    }
+  ]
 ]
 ```
 Important, this is different from the `predict` route that you usually call. (https://model-xxxxxx.api.baseten.co/environments/production/predict), it contains an additional `sync` before that.
diff --git a/11-embeddings-reranker-classification-tensorrt/BEI-papluca-xlm-roberta-base-language-detection-classification/README.md b/11-embeddings-reranker-classification-tensorrt/BEI-papluca-xlm-roberta-base-language-detection-classification/README.md
@@ -63,7 +63,7 @@ requests.post(
     headers=headers,
     url="https://model-xxxxxx.api.baseten.co/environments/production/sync/predict",
     json={
-        "inputs": "Baseten is a fast inference provider",
+        "inputs": [["Baseten is a fast inference provider", ["classify this separately."]],
         "raw_scores": True,
         "truncate": True,
         "truncation_direction": "Right"
@@ -73,10 +73,18 @@ requests.post(
 Returns:
 ```json
 [
-  {
-    "label": "excitement",
-    "score": 0.99
-  }
+  [
+    {
+        "label": "excitement",
+        "score": 0.99
+    }
+  ],
+  [
+    {
+        "label": "excitement",
+        "score": 0.01
+    }
+  ]
 ]
 ```
 Important, this is different from the `predict` route that you usually call. (https://model-xxxxxx.api.baseten.co/environments/production/predict), it contains an additional `sync` before that.
diff --git a/11-embeddings-reranker-classification-tensorrt/BEI-samlowe-roberta-base-go_emotions-classification/README.md b/11-embeddings-reranker-classification-tensorrt/BEI-samlowe-roberta-base-go_emotions-classification/README.md
@@ -63,7 +63,7 @@ requests.post(
     headers=headers,
     url="https://model-xxxxxx.api.baseten.co/environments/production/sync/predict",
     json={
-        "inputs": "Baseten is a fast inference provider",
+        "inputs": [["Baseten is a fast inference provider", ["classify this separately."]],
         "raw_scores": True,
         "truncate": True,
         "truncation_direction": "Right"
@@ -73,10 +73,18 @@ requests.post(
 Returns:
 ```json
 [
-  {
-    "label": "excitement",
-    "score": 0.99
-  }
+  [
+    {
+        "label": "excitement",
+        "score": 0.99
+    }
+  ],
+  [
+    {
+        "label": "excitement",
+        "score": 0.01
+    }
+  ]
 ]
 ```
 Important, this is different from the `predict` route that you usually call. (https://model-xxxxxx.api.baseten.co/environments/production/predict), it contains an additional `sync` before that.
diff --git a/11-embeddings-reranker-classification-tensorrt/BEI-skywork-skywork-reward-llama-3.1-8b-v0.2-reward-model-fp8/README.md b/11-embeddings-reranker-classification-tensorrt/BEI-skywork-skywork-reward-llama-3.1-8b-v0.2-reward-model-fp8/README.md
@@ -64,7 +64,7 @@ requests.post(
     headers=headers,
     url="https://model-xxxxxx.api.baseten.co/environments/production/sync/predict",
     json={
-        "inputs": "Baseten is a fast inference provider",
+        "inputs": [["Baseten is a fast inference provider", ["classify this separately."]],
         "raw_scores": True,
         "truncate": True,
         "truncation_direction": "Right"
@@ -74,10 +74,18 @@ requests.post(
 Returns:
 ```json
 [
-  {
-    "label": "excitement",
-    "score": 0.99
-  }
+  [
+    {
+        "label": "excitement",
+        "score": 0.99
+    }
+  ],
+  [
+    {
+        "label": "excitement",
+        "score": 0.01
+    }
+  ]
 ]
 ```
 Important, this is different from the `predict` route that you usually call. (https://model-xxxxxx.api.baseten.co/environments/production/predict), it contains an additional `sync` before that.