Skip to content

Commit 62b91b6

Browse files
authored
bugfix: update values.yaml to enhance ray-cluster configuration with security context, environment variables, and resource limits (#172)
* feature: unstructured支持简单pdf处理 * feature: update values.yaml to enhance ray-cluster configuration with security context, environment variables, and resource limits
1 parent 082aca1 commit 62b91b6

File tree

5 files changed

+62
-51
lines changed

5 files changed

+62
-51
lines changed

deployment/helm/datamate/charts/ray-cluster/values.yaml

Lines changed: 5 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -58,25 +58,10 @@ head:
5858
# in the headGroupSpec. See https://github.com/ray-project/kuberay/pull/1128 for more details.
5959
serviceAccountName: ""
6060
restartPolicy: ""
61-
rayStartParams:
62-
object-store-memory: '78643200'
61+
rayStartParams: {}
6362
# containerEnv specifies environment variables for the Ray container,
6463
# Follows standard K8s container env schema.
65-
containerEnv:
66-
- name: RAY_DEDUP_LOGS
67-
value: "0"
68-
- name: RAY_TQDM_PATCH_PRINT
69-
value: "0"
70-
- name: MYSQL_HOST
71-
value: "datamate-database"
72-
- name: MYSQL_PORT
73-
value: "3306"
74-
- name: MYSQL_USER
75-
value: "root"
76-
- name: MYSQL_PASSWORD
77-
value: "password"
78-
- name: MYSQL_DATABASE
79-
value: "datamate"
64+
containerEnv: []
8065
# - name: EXAMPLE_ENV
8166
# value: "1"
8267
envFrom: []
@@ -93,14 +78,7 @@ head:
9378
# It is usually best to set requests equal to limits.
9479
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
9580
# for further guidance.
96-
resources:
97-
limits:
98-
cpu: "2"
99-
# To avoid out-of-memory issues, never allocate less than 2G memory for the Ray head.
100-
memory: "8G"
101-
requests:
102-
cpu: "1"
103-
memory: "2G"
81+
resources: {}
10482
annotations: {}
10583
nodeSelector: {}
10684
tolerations: []
@@ -156,21 +134,7 @@ worker:
156134
initContainers: []
157135
# containerEnv specifies environment variables for the Ray container,
158136
# Follows standard K8s container env schema.
159-
containerEnv:
160-
- name: RAY_DEDUP_LOGS
161-
value: "0"
162-
- name: RAY_TQDM_PATCH_PRINT
163-
value: "0"
164-
- name: MYSQL_HOST
165-
value: "datamate-database"
166-
- name: MYSQL_PORT
167-
value: "3306"
168-
- name: MYSQL_USER
169-
value: "root"
170-
- name: MYSQL_PASSWORD
171-
value: "password"
172-
- name: MYSQL_DATABASE
173-
value: "datamate"
137+
containerEnv: []
174138
# - name: EXAMPLE_ENV
175139
# value: "1"
176140
envFrom: []
@@ -187,13 +151,7 @@ worker:
187151
# It is usually best to set requests equal to limits.
188152
# See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources
189153
# for further guidance.
190-
resources:
191-
limits:
192-
cpu: "4"
193-
memory: "8G"
194-
requests:
195-
cpu: "1"
196-
memory: "1G"
154+
resources: {}
197155
annotations: {}
198156
nodeSelector: {}
199157
tolerations: []

deployment/helm/datamate/values.yaml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ database:
7777
subPath: database
7878

7979
backend:
80+
securityContext:
81+
capabilities:
82+
add:
83+
- SYS_ADMIN
8084
env:
8185
- name: DB_PASSWORD
8286
value: *dbPass
@@ -170,6 +174,31 @@ runtime:
170174
ray-cluster:
171175
enabled: true
172176
head:
177+
rayStartParams:
178+
object-store-memory: '78643200'
179+
num-cpus: '0'
180+
containerEnv:
181+
- name: RAY_DEDUP_LOGS
182+
value: "0"
183+
- name: RAY_TQDM_PATCH_PRINT
184+
value: "0"
185+
- name: MYSQL_HOST
186+
value: "datamate-database"
187+
- name: MYSQL_PORT
188+
value: "3306"
189+
- name: MYSQL_USER
190+
value: "root"
191+
- name: MYSQL_PASSWORD
192+
value: *dbPass
193+
- name: MYSQL_DATABASE
194+
value: "datamate"
195+
resources:
196+
limits:
197+
cpu: "2"
198+
memory: "8G"
199+
requests:
200+
cpu: "1"
201+
memory: "2G"
173202
volumes:
174203
- *datasetVolume
175204
- *flowVolume
@@ -196,6 +225,28 @@ ray-cluster:
196225
- containerPort: 8081
197226
volumeMounts: *runtimeVolumeMounts
198227
worker:
228+
containerEnv:
229+
- name: RAY_DEDUP_LOGS
230+
value: "0"
231+
- name: RAY_TQDM_PATCH_PRINT
232+
value: "0"
233+
- name: MYSQL_HOST
234+
value: "datamate-database"
235+
- name: MYSQL_PORT
236+
value: "3306"
237+
- name: MYSQL_USER
238+
value: "root"
239+
- name: MYSQL_PASSWORD
240+
value: *dbPass
241+
- name: MYSQL_DATABASE
242+
value: "datamate"
243+
resources:
244+
limits:
245+
cpu: "8"
246+
memory: "64G"
247+
requests:
248+
cpu: "1"
249+
memory: "2G"
199250
volumes:
200251
- *datasetVolume
201252
- *flowVolume

runtime/python-executor/datamate/core/base_op.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,10 @@ def create_failure_sample(self, sample: Dict[str, Any], op_name, excp: BaseExcep
146146
def read_file(self, sample):
147147
filepath = sample[self.filepath_key]
148148
filetype = sample[self.filetype_key]
149-
if filetype in ["ppt", "pptx", "docx", "doc", "xlsx"]:
149+
if filetype in ["ppt", "pptx", "docx", "doc", "xlsx", "csv", "md", "pdf"]:
150150
elements = partition(filename=filepath)
151151
sample[self.text_key] = "\n\n".join([str(el) for el in elements])
152-
elif filetype in ["txt", "md", "markdown", "xml", "html", "csv", "json", "jsonl"]:
152+
elif filetype in ["txt", "md", "markdown", "xml", "html", "json", "jsonl"]:
153153
with open(filepath, 'rb') as f:
154154
content = f.read()
155155
sample[self.text_key] = content.decode("utf-8-sig").replace("\r\n", "\n")

runtime/python-executor/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ dependencies = [
2121
"loguru>=0.7.3",
2222
"opencv-python-headless>=4.12.0.88",
2323
"ray[data,default]==2.52.1",
24-
"unstructured[csv,docx,pptx,xlsx]==0.18.15",
24+
"unstructured[csv,docx,pptx,xlsx,pdf,md]==0.18.15",
2525
"uvicorn[standard]>=0.38.0",
2626
]
2727

scripts/images/runtime/Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ WORKDIR /opt/runtime
2121

2222
RUN --mount=type=cache,target=/root/.cache/uv \
2323
uv pip install -e . --system \
24-
&& uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
24+
&& UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
25+
&& uv pip uninstall torch torchvision triton --system \
26+
&& uv pip list | grep -E '^nvidia-' | awk '{print $1}' | xargs -r uv pip uninstall --system \
2527
&& python -m spacy download zh_core_web_sm
2628

2729
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \

0 commit comments

Comments
 (0)