Remove MAX_BATCH_WEIGHT and MAX_PREFILL_WEIGHT from configs

njhill · njhill · commit 3e64d5ebd12f · 2024-01-29T17:14:02.000-08:00
diff --git a/deployment/base/patches/limits/t5-xl-v100.yaml b/deployment/base/patches/limits/t5-xl-v100.yaml
@@ -8,9 +8,6 @@ spec:
       containers:
         - name: server
           env:
-            # These values are based on the batch_size*seq_len^2 scaling
-            # Whereas in recent experiments the scaling appears to mostly
-            # more like batch_size*seq_len, so this should be revisited
             - name: MAX_SEQUENCE_LENGTH
               value: "2048"
             - name: MAX_NEW_TOKENS
@@ -19,7 +16,3 @@ spec:
               value: "32"
             - name: MAX_CONCURRENT_REQUESTS
               value: "64"
-            - name: MAX_BATCH_WEIGHT
-              value: "12000000"
-            - name: MAX_PREFILL_WEIGHT
-              value: "300000"
diff --git a/deployment/base/patches/limits/t5-xxl.yaml b/deployment/base/patches/limits/t5-xxl.yaml
@@ -1,4 +1,3 @@
-# These must be set in conjunction with an appropriate MAX_BATCH_WEIGHT value
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/deployment/models/bloom/kustomization.yaml b/deployment/models/bloom/kustomization.yaml
@@ -30,9 +30,6 @@ patchesStrategicMerge:
              - name: DEPLOYMENT_FRAMEWORK
                value: hf_custom_tp
                
-             # Measurements showed incremental mem usage to actually be linear
-             # So these could be improved if we change the logic in the code to
-             # support that kind of relationship
              - name: MAX_BATCH_SIZE
                value: "16"
              - name: MAX_CONCURRENT_REQUESTS
@@ -41,8 +38,3 @@ patchesStrategicMerge:
                value: "4096"
              - name: MAX_NEW_TOKENS
                value: "1536"
-             - name: MAX_PREFILL_WEIGHT
-             #  value: "420000"
-               value: "320000"
-             - name: MAX_BATCH_WEIGHT
-               value: "80000000"
diff --git a/deployment/models/flan-t5/kustomization.yaml b/deployment/models/flan-t5/kustomization.yaml
@@ -26,5 +26,3 @@ patchesStrategicMerge:
              env:
              - name: MODEL_NAME
                value: google/flan-t5-xxl
-             - name: MAX_BATCH_WEIGHT
-               value: "47458400"
diff --git a/deployment/models/flan-ul2-tp/kustomization.yaml b/deployment/models/flan-ul2-tp/kustomization.yaml
@@ -29,5 +29,4 @@ patchesStrategicMerge:
                value: google/flan-ul2
              - name: DEPLOYMENT_FRAMEWORK
                value: hf_custom_tp
-             - name: MAX_BATCH_WEIGHT
-               value: "34543200"
+
diff --git a/deployment/models/flan-ul2/kustomization.yaml b/deployment/models/flan-ul2/kustomization.yaml
@@ -26,7 +26,3 @@ patchesStrategicMerge:
              env:
              - name: MODEL_NAME
                value: google/flan-ul2
-             - name: MAX_PREFILL_WEIGHT
-               value: "1500000"
-             - name: MAX_BATCH_WEIGHT
-               value: "34543200"
diff --git a/deployment/models/gpt-neox/kustomization.yaml b/deployment/models/gpt-neox/kustomization.yaml
@@ -32,11 +32,7 @@ patchesStrategicMerge:
                value: "256"
              - name: MAX_CONCURRENT_REQUESTS
                value: "320"
-             - name: MAX_BATCH_WEIGHT
-               value: "9200"
              - name: MAX_SEQUENCE_LENGTH
                value: "8192"
-             - name: MAX_PREFILL_WEIGHT
-               value: "8192"
              - name: MAX_NEW_TOKENS
                value: "1536"
diff --git a/deployment/models/mpt-7b/kustomization.yaml b/deployment/models/mpt-7b/kustomization.yaml
@@ -38,7 +38,4 @@ patchesStrategicMerge:
                value: "100"
              - name: MAX_NEW_TOKENS
                value: "1024"
-             - name: MAX_PREFILL_WEIGHT
-               value: "2000000"
-             - name: MAX_BATCH_WEIGHT
-               value: "200000000"
+
diff --git a/deployment/models/mt0/kustomization.yaml b/deployment/models/mt0/kustomization.yaml
@@ -26,5 +26,3 @@ patchesStrategicMerge:
              env:
              - name: MODEL_NAME
                value: bigscience/mt0-xxl
-             - name: MAX_BATCH_WEIGHT
-               value: "44752800"
diff --git a/deployment/models/ul2/kustomization.yaml b/deployment/models/ul2/kustomization.yaml
@@ -26,5 +26,3 @@ patchesStrategicMerge:
              env:
              - name: MODEL_NAME
                value: google/ul2
-             - name: MAX_BATCH_WEIGHT
-               value: "34543200"

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-# These must be set in conjunction with an appropriate MAX_BATCH_WEIGHT value`
`2`	`1`	`apiVersion: apps/v1`
`3`	`2`	`kind: Deployment`
`4`	`3`	`metadata:`