Skip to content

Commit 3e64d5e

Browse files
committed
Remove MAX_BATCH_WEIGHT and MAX_PREFILL_WEIGHT from configs
1 parent c6b33ac commit 3e64d5e

File tree

10 files changed

+2
-36
lines changed

10 files changed

+2
-36
lines changed

deployment/base/patches/limits/t5-xl-v100.yaml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@ spec:
88
containers:
99
- name: server
1010
env:
11-
# These values are based on the batch_size*seq_len^2 scaling
12-
# Whereas in recent experiments the scaling appears to mostly
13-
# more like batch_size*seq_len, so this should be revisited
1411
- name: MAX_SEQUENCE_LENGTH
1512
value: "2048"
1613
- name: MAX_NEW_TOKENS
@@ -19,7 +16,3 @@ spec:
1916
value: "32"
2017
- name: MAX_CONCURRENT_REQUESTS
2118
value: "64"
22-
- name: MAX_BATCH_WEIGHT
23-
value: "12000000"
24-
- name: MAX_PREFILL_WEIGHT
25-
value: "300000"

deployment/base/patches/limits/t5-xxl.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# These must be set in conjunction with an appropriate MAX_BATCH_WEIGHT value
21
apiVersion: apps/v1
32
kind: Deployment
43
metadata:

deployment/models/bloom/kustomization.yaml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ patchesStrategicMerge:
3030
- name: DEPLOYMENT_FRAMEWORK
3131
value: hf_custom_tp
3232
33-
# Measurements showed incremental mem usage to actually be linear
34-
# So these could be improved if we change the logic in the code to
35-
# support that kind of relationship
3633
- name: MAX_BATCH_SIZE
3734
value: "16"
3835
- name: MAX_CONCURRENT_REQUESTS
@@ -41,8 +38,3 @@ patchesStrategicMerge:
4138
value: "4096"
4239
- name: MAX_NEW_TOKENS
4340
value: "1536"
44-
- name: MAX_PREFILL_WEIGHT
45-
# value: "420000"
46-
value: "320000"
47-
- name: MAX_BATCH_WEIGHT
48-
value: "80000000"

deployment/models/flan-t5/kustomization.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,3 @@ patchesStrategicMerge:
2626
env:
2727
- name: MODEL_NAME
2828
value: google/flan-t5-xxl
29-
- name: MAX_BATCH_WEIGHT
30-
value: "47458400"

deployment/models/flan-ul2-tp/kustomization.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,4 @@ patchesStrategicMerge:
2929
value: google/flan-ul2
3030
- name: DEPLOYMENT_FRAMEWORK
3131
value: hf_custom_tp
32-
- name: MAX_BATCH_WEIGHT
33-
value: "34543200"
32+

deployment/models/flan-ul2/kustomization.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,3 @@ patchesStrategicMerge:
2626
env:
2727
- name: MODEL_NAME
2828
value: google/flan-ul2
29-
- name: MAX_PREFILL_WEIGHT
30-
value: "1500000"
31-
- name: MAX_BATCH_WEIGHT
32-
value: "34543200"

deployment/models/gpt-neox/kustomization.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,7 @@ patchesStrategicMerge:
3232
value: "256"
3333
- name: MAX_CONCURRENT_REQUESTS
3434
value: "320"
35-
- name: MAX_BATCH_WEIGHT
36-
value: "9200"
3735
- name: MAX_SEQUENCE_LENGTH
3836
value: "8192"
39-
- name: MAX_PREFILL_WEIGHT
40-
value: "8192"
4137
- name: MAX_NEW_TOKENS
4238
value: "1536"

deployment/models/mpt-7b/kustomization.yaml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,4 @@ patchesStrategicMerge:
3838
value: "100"
3939
- name: MAX_NEW_TOKENS
4040
value: "1024"
41-
- name: MAX_PREFILL_WEIGHT
42-
value: "2000000"
43-
- name: MAX_BATCH_WEIGHT
44-
value: "200000000"
41+

deployment/models/mt0/kustomization.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,3 @@ patchesStrategicMerge:
2626
env:
2727
- name: MODEL_NAME
2828
value: bigscience/mt0-xxl
29-
- name: MAX_BATCH_WEIGHT
30-
value: "44752800"

deployment/models/ul2/kustomization.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,3 @@ patchesStrategicMerge:
2626
env:
2727
- name: MODEL_NAME
2828
value: google/ul2
29-
- name: MAX_BATCH_WEIGHT
30-
value: "34543200"

0 commit comments

Comments
 (0)